Esempio n. 1
0
    virtual ElementIterator* allocateIterator(const ElementPath* path) const {
        WorkingSetMember* member = _ws->get(_id);
        if (!member->hasObj()) {
            // Try to look in the key.
            BSONObjIterator keyPatternIt(_keyPattern);
            BSONObjIterator keyDataIt(_key);

            while (keyPatternIt.more()) {
                BSONElement keyPatternElt = keyPatternIt.next();
                verify(keyDataIt.more());
                BSONElement keyDataElt = keyDataIt.next();

                if (path->fieldRef().equalsDottedField(keyPatternElt.fieldName())) {
                    if (Array == keyDataElt.type()) {
                        return new SimpleArrayElementIterator(keyDataElt, true);
                    } else {
                        return new SingleElementElementIterator(keyDataElt);
                    }
                }
            }
        }

        // Go to the raw document, fetching if needed.
        return new BSONElementIterator(path, getObj());
    }
Esempio n. 2
0
    PlanStage::StageState FetchStage::fetchCompleted(WorkingSetID* out) {
        WorkingSetMember* member = _ws->get(_idBeingPagedIn);

        // The DiskLoc we're waiting to page in was invalidated (forced fetch).  Test for
        // matching and maybe pass it up.
        if (member->state == WorkingSetMember::OWNED_OBJ) {
            WorkingSetID memberID = _idBeingPagedIn;
            _idBeingPagedIn = WorkingSet::INVALID_ID;
            return returnIfMatches(member, memberID, out);
        }

        // Assume that the caller has fetched appropriately.
        // TODO: Do we want to double-check the runner?  Not sure how reliable likelyInMemory is
        // on all platforms.
        verify(member->hasLoc());
        verify(!member->hasObj());

        // Make the (unowned) object.
        Record* record = member->loc.rec();
        const char* data = record->dataNoThrowing();
        member->obj = BSONObj(data);

        // Don't need index data anymore as we have an obj.
        member->keyData.clear();
        member->state = WorkingSetMember::LOC_AND_UNOWNED_OBJ;
        verify(!member->obj.isOwned());

        // Return the obj if it passes our filter.
        WorkingSetID memberID = _idBeingPagedIn;
        _idBeingPagedIn = WorkingSet::INVALID_ID;
        return returnIfMatches(member, memberID, out);
    }
Esempio n. 3
0
PlanStage::StageState OplogStart::workBackwardsScan(WorkingSetID* out) {
    PlanStage::StageState state = child()->work(out);

    // EOF.  Just start from the beginning, which is where we've hit.
    if (PlanStage::IS_EOF == state) {
        _done = true;
        return state;
    }

    if (PlanStage::ADVANCED != state) {
        return state;
    }

    WorkingSetMember* member = _workingSet->get(*out);
    verify(member->hasObj());
    verify(member->hasRecordId());

    if (!_filter->matchesBSON(member->obj.value())) {
        _done = true;
        // RecordId is returned in *out.
        return PlanStage::ADVANCED;
    } else {
        _workingSet->free(*out);
        return PlanStage::NEED_TIME;
    }
}
Esempio n. 4
0
    PlanStage::StageState FetchStage::work(WorkingSetID* out) {
        ++_commonStats.works;

        if (isEOF()) { return PlanStage::IS_EOF; }

        // If we asked our parent for a page-in last time work(...) was called, finish the fetch.
        if (WorkingSet::INVALID_ID != _idBeingPagedIn) {
            cout << "fetch completed, id being paged on " << _idBeingPagedIn << endl;
            return fetchCompleted(out);
        }

        // If we're here, we're not waiting for a DiskLoc to be fetched.  Get another to-be-fetched
        // result from our child.
        WorkingSetID id;
        StageState status = _child->work(&id);

        if (PlanStage::ADVANCED == status) {
            WorkingSetMember* member = _ws->get(id);

            // If there's an obj there, there is no fetching to perform.
            if (member->hasObj()) {
                ++_specificStats.alreadyHasObj;
                return returnIfMatches(member, id, out);
            }

            // We need a valid loc to fetch from and this is the only state that has one.
            verify(WorkingSetMember::LOC_AND_IDX == member->state);
            verify(member->hasLoc());

            Record* record = member->loc.rec();
            const char* data = record->dataNoThrowing();

            if (!recordInMemory(data)) {
                // member->loc points to a record that's NOT in memory.  Pass a fetch request up.
                verify(WorkingSet::INVALID_ID == _idBeingPagedIn);
                _idBeingPagedIn = id;
                *out = id;
                ++_commonStats.needFetch;
                return PlanStage::NEED_FETCH;
            }
            else {
                // Don't need index data anymore as we have an obj.
                member->keyData.clear();
                member->obj = BSONObj(data);
                member->state = WorkingSetMember::LOC_AND_UNOWNED_OBJ;
                return returnIfMatches(member, id, out);
            }
        }
        else {
            if (PlanStage::NEED_FETCH == status) {
                *out = id;
                ++_commonStats.needFetch;
            }
            else if (PlanStage::NEED_TIME == status) {
                ++_commonStats.needTime;
            }
            return status;
        }
    }
Esempio n. 5
0
PlanStage::StageState ShardFilterStage::doWork(WorkingSetID* out) {
    // If we've returned as many results as we're limited to, isEOF will be true.
    if (isEOF()) {
        return PlanStage::IS_EOF;
    }

    StageState status = child()->work(out);

    if (PlanStage::ADVANCED == status) {
        // If we're sharded make sure that we don't return data that is not owned by us,
        // including pending documents from in-progress migrations and orphaned documents from
        // aborted migrations
        if (_metadata->isSharded()) {
            ShardKeyPattern shardKeyPattern(_metadata->getKeyPattern());
            WorkingSetMember* member = _ws->get(*out);
            WorkingSetMatchableDocument matchable(member);
            BSONObj shardKey = shardKeyPattern.extractShardKeyFromMatchable(matchable);

            if (shardKey.isEmpty()) {
                // We can't find a shard key for this document - this should never happen with
                // a non-fetched result unless our query planning is screwed up
                if (!member->hasObj()) {
                    Status status(ErrorCodes::InternalError,
                                  "shard key not found after a covered stage, "
                                  "query planning has failed");

                    // Fail loudly and cleanly in production, fatally in debug
                    error() << redact(status);
                    dassert(false);

                    _ws->free(*out);
                    *out = WorkingSetCommon::allocateStatusMember(_ws, status);
                    return PlanStage::FAILURE;
                }

                // Skip this document with a warning - no shard key should not be possible
                // unless manually inserting data into a shard
                warning() << "no shard key found in document " << redact(member->obj.value()) << " "
                          << "for shard key pattern " << _metadata->getKeyPattern() << ", "
                          << "document may have been inserted manually into shard";
            }

            if (!_metadata->keyBelongsToMe(shardKey)) {
                _ws->free(*out);
                ++_specificStats.chunkSkips;
                return PlanStage::NEED_TIME;
            }
        }

        // If we're here either we have shard state and our doc passed, or we have no shard
        // state.  Either way, we advance.
        return status;
    }

    return status;
}
Esempio n. 6
0
    PlanStage::StageState FetchStage::work(WorkingSetID* out) {
        ++_commonStats.works;

        // Adds the amount of time taken by work() to executionTimeMillis.
        ScopedTimer timer(&_commonStats.executionTimeMillis);

        if (isEOF()) { return PlanStage::IS_EOF; }

        // If we're here, we're not waiting for a DiskLoc to be fetched.  Get another to-be-fetched
        // result from our child.
        WorkingSetID id = WorkingSet::INVALID_ID;
        StageState status = _child->work(&id);

        if (PlanStage::ADVANCED == status) {
            WorkingSetMember* member = _ws->get(id);

            // If there's an obj there, there is no fetching to perform.
            if (member->hasObj()) {
                ++_specificStats.alreadyHasObj;
            }
            else {
                // We need a valid loc to fetch from and this is the only state that has one.
                verify(WorkingSetMember::LOC_AND_IDX == member->state);
                verify(member->hasLoc());

                // Don't need index data anymore as we have an obj.
                member->keyData.clear();
                member->obj = _collection->docFor(member->loc);
                member->state = WorkingSetMember::LOC_AND_UNOWNED_OBJ;
            }

            ++_specificStats.docsExamined;

            return returnIfMatches(member, id, out);
        }
        else if (PlanStage::FAILURE == status) {
            *out = id;
            // If a stage fails, it may create a status WSM to indicate why it
            // failed, in which case 'id' is valid.  If ID is invalid, we
            // create our own error message.
            if (WorkingSet::INVALID_ID == id) {
                mongoutils::str::stream ss;
                ss << "fetch stage failed to read in results from child";
                Status status(ErrorCodes::InternalError, ss);
                *out = WorkingSetCommon::allocateStatusMember( _ws, status);
            }
            return status;
        }
        else {
            if (PlanStage::NEED_TIME == status) {
                ++_commonStats.needTime;
            }
            return status;
        }
    }
Esempio n. 7
0
    Runner::RunnerState MultiPlanRunner::getNext(BSONObj* objOut, DiskLoc* dlOut) {
        if (_killed) { return Runner::RUNNER_DEAD; }
        if (_failure) { return Runner::RUNNER_ERROR; }

        // If we haven't picked the best plan yet...
        if (NULL == _bestPlan) {
            if (!pickBestPlan(NULL)) {
                verify(_failure || _killed);
                if (_killed) { return Runner::RUNNER_DEAD; }
                if (_failure) { return Runner::RUNNER_ERROR; }
            }
        }

        if (!_alreadyProduced.empty()) {
            WorkingSetID id = _alreadyProduced.front();
            _alreadyProduced.pop_front();

            WorkingSetMember* member = _bestPlan->getWorkingSet()->get(id);
            // Note that this copies code from PlanExecutor.
            if (NULL != objOut) {
                if (WorkingSetMember::LOC_AND_IDX == member->state) {
                    if (1 != member->keyData.size()) {
                        _bestPlan->getWorkingSet()->free(id);
                        return Runner::RUNNER_ERROR;
                    }
                    *objOut = member->keyData[0].keyData;
                }
                else if (member->hasObj()) {
                    *objOut = member->obj;
                }
                else {
                    // TODO: Checking the WSM for covered fields goes here.
                    _bestPlan->getWorkingSet()->free(id);
                    return Runner::RUNNER_ERROR;
                }
            }

            if (NULL != dlOut) {
                if (member->hasLoc()) {
                    *dlOut = member->loc;
                }
                else {
                    _bestPlan->getWorkingSet()->free(id);
                    return Runner::RUNNER_ERROR;
                }
            }
            _bestPlan->getWorkingSet()->free(id);
            return Runner::RUNNER_ADVANCED;
        }

        return _bestPlan->getNext(objOut, dlOut);
    }
Esempio n. 8
0
    void run() {
        OldClientWriteContext ctx(&_txn, nss.ns());
        addIndex(BSON("b" << 1 << "a" << 1));
        addIndex(BSON("c" << 1 << "a" << 1));

        BSONObj query = fromjson("{a: 1, $or: [{b: 2}, {c: 3}]}");

        // Two of these documents match.
        insert(BSON("_id" << 1 << "a" << 1 << "b" << 2));
        insert(BSON("_id" << 2 << "a" << 2 << "b" << 2));
        insert(BSON("_id" << 3 << "a" << 1 << "c" << 3));
        insert(BSON("_id" << 4 << "a" << 1 << "c" << 4));

        auto qr = stdx::make_unique<QueryRequest>(nss);
        qr->setFilter(query);
        auto cq = unittest::assertGet(CanonicalQuery::canonicalize(
            txn(), std::move(qr), ExtensionsCallbackDisallowExtensions()));

        Collection* collection = ctx.getCollection();

        // Get planner params.
        QueryPlannerParams plannerParams;
        fillOutPlannerParams(&_txn, collection, cq.get(), &plannerParams);

        WorkingSet ws;
        std::unique_ptr<SubplanStage> subplan(
            new SubplanStage(&_txn, collection, &ws, plannerParams, cq.get()));

        // Plan selection should succeed due to falling back on regular planning.
        PlanYieldPolicy yieldPolicy(PlanExecutor::YIELD_MANUAL, _clock);
        ASSERT_OK(subplan->pickBestPlan(&yieldPolicy));

        // Work the stage until it produces all results.
        size_t numResults = 0;
        PlanStage::StageState stageState = PlanStage::NEED_TIME;
        while (stageState != PlanStage::IS_EOF) {
            WorkingSetID id = WorkingSet::INVALID_ID;
            stageState = subplan->work(&id);
            ASSERT_NE(stageState, PlanStage::DEAD);
            ASSERT_NE(stageState, PlanStage::FAILURE);

            if (stageState == PlanStage::ADVANCED) {
                ++numResults;
                WorkingSetMember* member = ws.get(id);
                ASSERT(member->hasObj());
                ASSERT(member->obj.value() == BSON("_id" << 1 << "a" << 1 << "b" << 2) ||
                       member->obj.value() == BSON("_id" << 3 << "a" << 1 << "c" << 3));
            }
        }

        ASSERT_EQ(numResults, 2U);
    }
Esempio n. 9
0
    /**
     * Returns a vector of all of the documents currently in 'collection'.
     *
     * Uses a forward collection scan stage to get the docs, and populates 'out' with
     * the results.
     */
    void getCollContents(Collection* collection, vector<BSONObj>* out) {
        WorkingSet ws;

        CollectionScanParams params;
        params.direction = CollectionScanParams::FORWARD;
        params.tailable = false;

        unique_ptr<CollectionScan> scan(new CollectionScan(&_opCtx, collection, params, &ws, NULL));
        while (!scan->isEOF()) {
            WorkingSetID id = WorkingSet::INVALID_ID;
            PlanStage::StageState state = scan->work(&id);
            if (PlanStage::ADVANCED == state) {
                WorkingSetMember* member = ws.get(id);
                verify(member->hasObj());
                out->push_back(member->obj.value().getOwned());
            }
        }
    }
Esempio n. 10
0
        /**
         * Returns the projected value from the working set that would
         * be returned in the 'values' field of the distinct command result.
         * Limited to NumberInt BSON types because this is the only
         * BSON type used in this suite of tests.
         */
        static int getIntFieldDotted(const WorkingSet& ws, WorkingSetID wsid,
                                     const std::string& field) {
            // For some reason (at least under OS X clang), we cannot refer to INVALID_ID
            // inside the test assertion macro.
            WorkingSetID invalid = WorkingSet::INVALID_ID;
            ASSERT_NOT_EQUALS(invalid, wsid);

            WorkingSetMember* member = ws.get(wsid);

            // Distinct hack execution is always covered.
            // Key value is retrieved from working set key data
            // instead of RecordId.
            ASSERT_FALSE(member->hasObj());
            BSONElement keyElt;
            ASSERT_TRUE(member->getFieldDotted(field, &keyElt));
            ASSERT_TRUE(keyElt.isNumber());

            return keyElt.numberInt();
        }
Esempio n. 11
0
Status SortKeyGenerator::getSortKey(const WorkingSetMember& member, BSONObj* objOut) const {
    StatusWith<BSONObj> sortKey = BSONObj();

    if (member.hasObj()) {
        sortKey = getSortKeyFromObject(member);
    } else {
        sortKey = getSortKeyFromIndexKey(member);
    }
    if (!sortKey.isOK()) {
        return sortKey.getStatus();
    }

    if (!_sortHasMeta) {
        *objOut = sortKey.getValue();
        return Status::OK();
    }

    BSONObjBuilder mergedKeyBob;

    // Merge metadata into the key.
    BSONObjIterator it(_rawSortSpec);
    BSONObjIterator sortKeyIt(sortKey.getValue());
    while (it.more()) {
        BSONElement elt = it.next();
        if (elt.isNumber()) {
            // Merge btree key elt.
            mergedKeyBob.append(sortKeyIt.next());
        } else if (LiteParsedQuery::isTextScoreMeta(elt)) {
            // Add text score metadata
            double score = 0.0;
            if (member.hasComputed(WSM_COMPUTED_TEXT_SCORE)) {
                const TextScoreComputedData* scoreData = static_cast<const TextScoreComputedData*>(
                    member.getComputed(WSM_COMPUTED_TEXT_SCORE));
                score = scoreData->getScore();
            }
            mergedKeyBob.append("$metaTextScore", score);
        }
    }

    *objOut = mergedKeyBob.obj();
    return Status::OK();
}
Esempio n. 12
0
PlanStage::StageState SortStage::doWork(WorkingSetID* out) {
    const size_t maxBytes = static_cast<size_t>(internalQueryExecMaxBlockingSortBytes);
    if (_memUsage > maxBytes) {
        mongoutils::str::stream ss;
        ss << "Sort operation used more than the maximum " << maxBytes
           << " bytes of RAM. Add an index, or specify a smaller limit.";
        Status status(ErrorCodes::OperationFailed, ss);
        *out = WorkingSetCommon::allocateStatusMember(_ws, status);
        return PlanStage::FAILURE;
    }

    if (isEOF()) {
        return PlanStage::IS_EOF;
    }

    // Still reading in results to sort.
    if (!_sorted) {
        WorkingSetID id = WorkingSet::INVALID_ID;
        StageState code = child()->work(&id);

        if (PlanStage::ADVANCED == code) {
            // Add it into the map for quick invalidation if it has a valid RecordId.
            // A RecordId may be invalidated at any time (during a yield).  We need to get into
            // the WorkingSet as quickly as possible to handle it.
            WorkingSetMember* member = _ws->get(id);

            // Planner must put a fetch before we get here.
            verify(member->hasObj());

            // We might be sorting something that was invalidated at some point.
            if (member->hasLoc()) {
                _wsidByDiskLoc[member->loc] = id;
            }

            SortableDataItem item;
            item.wsid = id;

            // We extract the sort key from the WSM's computed data. This must have been generated
            // by a SortKeyGeneratorStage descendent in the execution tree.
            auto sortKeyComputedData =
                static_cast<const SortKeyComputedData*>(member->getComputed(WSM_SORT_KEY));
            item.sortKey = sortKeyComputedData->getSortKey();

            if (member->hasLoc()) {
                // The RecordId breaks ties when sorting two WSMs with the same sort key.
                item.loc = member->loc;
            }

            addToBuffer(item);

            return PlanStage::NEED_TIME;
        } else if (PlanStage::IS_EOF == code) {
            // TODO: We don't need the lock for this.  We could ask for a yield and do this work
            // unlocked.  Also, this is performing a lot of work for one call to work(...)
            sortBuffer();
            _resultIterator = _data.begin();
            _sorted = true;
            return PlanStage::NEED_TIME;
        } else if (PlanStage::FAILURE == code || PlanStage::DEAD == code) {
            *out = id;
            // If a stage fails, it may create a status WSM to indicate why it
            // failed, in which case 'id' is valid.  If ID is invalid, we
            // create our own error message.
            if (WorkingSet::INVALID_ID == id) {
                mongoutils::str::stream ss;
                ss << "sort stage failed to read in results to sort from child";
                Status status(ErrorCodes::InternalError, ss);
                *out = WorkingSetCommon::allocateStatusMember(_ws, status);
            }
            return code;
        } else if (PlanStage::NEED_YIELD == code) {
            *out = id;
        }

        return code;
    }

    // Returning results.
    verify(_resultIterator != _data.end());
    verify(_sorted);
    *out = _resultIterator->wsid;
    _resultIterator++;

    // If we're returning something, take it out of our DL -> WSID map so that future
    // calls to invalidate don't cause us to take action for a DL we're done with.
    WorkingSetMember* member = _ws->get(*out);
    if (member->hasLoc()) {
        _wsidByDiskLoc.erase(member->loc);
    }

    return PlanStage::ADVANCED;
}
Esempio n. 13
0
    Runner::RunnerState MultiPlanRunner::getNext(BSONObj* objOut, DiskLoc* dlOut) {
        if (_killed) { return Runner::RUNNER_DEAD; }
        if (_failure) { return Runner::RUNNER_ERROR; }

        // If we haven't picked the best plan yet...
        if (NULL == _bestPlan) {
            if (!pickBestPlan(NULL, objOut)) {
                verify(_failure || _killed);
                if (_killed) { return Runner::RUNNER_DEAD; }
                if (_failure) { return Runner::RUNNER_ERROR; }
            }
        }

        // Look for an already produced result that provides the data the caller wants.
        while (!_alreadyProduced.empty()) {
            WorkingSetID id = _alreadyProduced.front();
            _alreadyProduced.pop_front();

            WorkingSetMember* member = _bestPlan->getWorkingSet()->get(id);

            // Note that this copies code from PlanExecutor.
            if (NULL != objOut) {
                if (WorkingSetMember::LOC_AND_IDX == member->state) {
                    if (1 != member->keyData.size()) {
                        _bestPlan->getWorkingSet()->free(id);
                        // If the caller needs the key data and the WSM doesn't have it, drop the
                        // result and carry on.
                        continue;
                    }
                    *objOut = member->keyData[0].keyData;
                }
                else if (member->hasObj()) {
                    *objOut = member->obj;
                }
                else {
                    // If the caller needs an object and the WSM doesn't have it, drop and
                    // try the next result.
                    _bestPlan->getWorkingSet()->free(id);
                    continue;
                }
            }

            if (NULL != dlOut) {
                if (member->hasLoc()) {
                    *dlOut = member->loc;
                }
                else {
                    // If the caller needs a DiskLoc and the WSM doesn't have it, drop and carry on.
                    _bestPlan->getWorkingSet()->free(id);
                    continue;
                }
            }

            // If we're here, the caller has all the data needed and we've set the out
            // parameters.  Remove the result from the WorkingSet.
            _bestPlan->getWorkingSet()->free(id);
            return Runner::RUNNER_ADVANCED;
        }

        RunnerState state = _bestPlan->getNext(objOut, dlOut);

        if (Runner::RUNNER_ERROR == state && (NULL != _backupSolution)) {
            QLOG() << "Best plan errored out switching to backup\n";
            // Uncache the bad solution if we fall back
            // on the backup solution.
            //
            // XXX: Instead of uncaching we should find a way for the
            // cached plan runner to fall back on a different solution
            // if the best solution fails. Alternatively we could try to
            // defer cache insertion to be after the first produced result.
            Database* db = cc().database();
            verify(NULL != db);
            Collection* collection = db->getCollection(_query->ns());
            verify(NULL != collection);
            PlanCache* cache = collection->infoCache()->getPlanCache();
            cache->remove(*_query);

            _bestPlan.reset(_backupPlan);
            _backupPlan = NULL;
            _bestSolution.reset(_backupSolution);
            _backupSolution = NULL;
            _alreadyProduced = _backupAlreadyProduced;
            return getNext(objOut, dlOut);
        }

        if (NULL != _backupSolution && Runner::RUNNER_ADVANCED == state) {
            QLOG() << "Best plan had a blocking sort, became unblocked, deleting backup plan\n";
            delete _backupSolution;
            delete _backupPlan;
            _backupSolution = NULL;
            _backupPlan = NULL;
            // TODO: free from WS?
            _backupAlreadyProduced.clear();
        }

        return state;
    }
Esempio n. 14
0
PlanStage::StageState FetchStage::work(WorkingSetID* out) {
    ++_commonStats.works;

    // Adds the amount of time taken by work() to executionTimeMillis.
    ScopedTimer timer(&_commonStats.executionTimeMillis);

    if (isEOF()) {
        return PlanStage::IS_EOF;
    }

    // Either retry the last WSM we worked on or get a new one from our child.
    WorkingSetID id;
    StageState status;
    if (_idRetrying == WorkingSet::INVALID_ID) {
        status = child()->work(&id);
    } else {
        status = ADVANCED;
        id = _idRetrying;
        _idRetrying = WorkingSet::INVALID_ID;
    }

    if (PlanStage::ADVANCED == status) {
        WorkingSetMember* member = _ws->get(id);

        // If there's an obj there, there is no fetching to perform.
        if (member->hasObj()) {
            ++_specificStats.alreadyHasObj;
        } else {
            // We need a valid loc to fetch from and this is the only state that has one.
            verify(WorkingSetMember::LOC_AND_IDX == member->getState());
            verify(member->hasLoc());

            try {
                if (!_cursor)
                    _cursor = _collection->getCursor(getOpCtx());

                if (auto fetcher = _cursor->fetcherForId(member->loc)) {
                    // There's something to fetch. Hand the fetcher off to the WSM, and pass up
                    // a fetch request.
                    _idRetrying = id;
                    member->setFetcher(fetcher.release());
                    *out = id;
                    _commonStats.needYield++;
                    return NEED_YIELD;
                }

                // The doc is already in memory, so go ahead and grab it. Now we have a RecordId
                // as well as an unowned object
                if (!WorkingSetCommon::fetch(getOpCtx(), _ws, id, _cursor)) {
                    _ws->free(id);
                    _commonStats.needTime++;
                    return NEED_TIME;
                }
            } catch (const WriteConflictException& wce) {
                _idRetrying = id;
                *out = WorkingSet::INVALID_ID;
                _commonStats.needYield++;
                return NEED_YIELD;
            }
        }

        return returnIfMatches(member, id, out);
    } else if (PlanStage::FAILURE == status || PlanStage::DEAD == status) {
        *out = id;
        // If a stage fails, it may create a status WSM to indicate why it
        // failed, in which case 'id' is valid.  If ID is invalid, we
        // create our own error message.
        if (WorkingSet::INVALID_ID == id) {
            mongoutils::str::stream ss;
            ss << "fetch stage failed to read in results from child";
            Status status(ErrorCodes::InternalError, ss);
            *out = WorkingSetCommon::allocateStatusMember(_ws, status);
        }
        return status;
    } else if (PlanStage::NEED_TIME == status) {
        ++_commonStats.needTime;
    } else if (PlanStage::NEED_YIELD == status) {
        ++_commonStats.needYield;
        *out = id;
    }

    return status;
}
Esempio n. 15
0
PlanStage::StageState TextStage::addTerm(WorkingSetID wsid, WorkingSetID* out) {
    WorkingSetMember* wsm = _ws->get(wsid);
    invariant(wsm->state == WorkingSetMember::LOC_AND_IDX);
    invariant(1 == wsm->keyData.size());
    const IndexKeyDatum newKeyData = wsm->keyData.back();  // copy to keep it around.

    TextRecordData* textRecordData = &_scores[wsm->loc];
    double* documentAggregateScore = &textRecordData->score;

    if (WorkingSet::INVALID_ID == textRecordData->wsid) {
        // We haven't seen this RecordId before. Keep the working set member around
        // (it may be force-fetched on saveState()).
        textRecordData->wsid = wsid;

        if (_filter) {
            // We have not seen this document before and need to apply a filter.
            bool shouldKeep;
            bool wasDeleted = false;
            try {
                TextMatchableDocument tdoc(
                    _txn, newKeyData.indexKeyPattern, newKeyData.keyData, wsm, _recordCursor);
                shouldKeep = _filter->matches(&tdoc);
            } catch (const WriteConflictException& wce) {
                _idRetrying = wsid;
                *out = WorkingSet::INVALID_ID;
                return NEED_YIELD;
            } catch (const TextMatchableDocument::DocumentDeletedException&) {
                // We attempted to fetch the document but decided it should be excluded from the
                // result set.
                shouldKeep = false;
                wasDeleted = true;
            }

            if (!shouldKeep) {
                if (wasDeleted || wsm->hasObj()) {
                    // We had to fetch but we're not going to return it.
                    ++_specificStats.fetches;
                }
                _ws->free(textRecordData->wsid);
                textRecordData->wsid = WorkingSet::INVALID_ID;
                *documentAggregateScore = -1;
                return NEED_TIME;
            }
        } else {
            // If we're here, we're going to return the doc, and we do a fetch later.
            ++_specificStats.fetches;
        }
    } else {
        // We already have a working set member for this RecordId. Free the new
        // WSM and retrieve the old one.
        // Note that since we don't keep all index keys, we could get a score that doesn't match
        // the document, but this has always been a problem.
        // TODO something to improve the situation.
        invariant(wsid != textRecordData->wsid);
        _ws->free(wsid);
        wsm = _ws->get(textRecordData->wsid);
    }

    ++_specificStats.keysExamined;

    if (*documentAggregateScore < 0) {
        // We have already rejected this document for not matching the filter.
        return NEED_TIME;
    }

    // Locate score within possibly compound key: {prefix,term,score,suffix}.
    BSONObjIterator keyIt(newKeyData.keyData);
    for (unsigned i = 0; i < _params.spec.numExtraBefore(); i++) {
        keyIt.next();
    }

    keyIt.next();  // Skip past 'term'.

    BSONElement scoreElement = keyIt.next();
    double documentTermScore = scoreElement.number();

    // Aggregate relevance score, term keys.
    *documentAggregateScore += documentTermScore;
    return NEED_TIME;
}
Esempio n. 16
0
    void run() {
        // Data is just a single {_id: 1, a: 1, b: 1} document.
        insert(BSON("_id" << 1 << "a" << 1 << "b" << 1));

        // Indices on 'a' and 'b'.
        addIndex(BSON("a" << 1));
        addIndex(BSON("b" << 1));

        AutoGetCollectionForRead ctx(&_txn, nss.ns());
        Collection* collection = ctx.getCollection();

        // Query for both 'a' and 'b' and sort on 'b'.
        auto statusWithCQ = CanonicalQuery::canonicalize(nss,
                                                         BSON("a" << 1 << "b" << 1),  // query
                                                         BSON("b" << 1),              // sort
                                                         BSONObj());                  // proj
        verify(statusWithCQ.isOK());
        unique_ptr<CanonicalQuery> cq = std::move(statusWithCQ.getValue());
        ASSERT(NULL != cq.get());

        // Force index intersection.
        bool forceIxisectOldValue = internalQueryForceIntersectionPlans;
        internalQueryForceIntersectionPlans = true;

        // Get planner params.
        QueryPlannerParams plannerParams;
        fillOutPlannerParams(&_txn, collection, cq.get(), &plannerParams);
        // Turn this off otherwise it pops up in some plans.
        plannerParams.options &= ~QueryPlannerParams::KEEP_MUTATIONS;

        // Plan.
        vector<QuerySolution*> solutions;
        Status status = QueryPlanner::plan(*cq, plannerParams, &solutions);
        ASSERT(status.isOK());

        // We expect a plan using index {a: 1} and plan using index {b: 1} and
        // an index intersection plan.
        ASSERT_EQUALS(solutions.size(), 3U);

        // Fill out the MultiPlanStage.
        unique_ptr<MultiPlanStage> mps(new MultiPlanStage(&_txn, collection, cq.get()));
        unique_ptr<WorkingSet> ws(new WorkingSet());
        // Put each solution from the planner into the MPR.
        for (size_t i = 0; i < solutions.size(); ++i) {
            PlanStage* root;
            ASSERT(StageBuilder::build(&_txn, collection, *solutions[i], ws.get(), &root));
            // Takes ownership of 'solutions[i]' and 'root'.
            mps->addPlan(solutions[i], root, ws.get());
        }

        // This sets a backup plan.
        PlanYieldPolicy yieldPolicy(NULL, PlanExecutor::YIELD_MANUAL);
        mps->pickBestPlan(&yieldPolicy);
        ASSERT(mps->bestPlanChosen());
        ASSERT(mps->hasBackupPlan());

        // We should have picked the index intersection plan due to forcing ixisect.
        QuerySolution* soln = mps->bestSolution();
        ASSERT(QueryPlannerTestLib::solutionMatches(
            "{sort: {pattern: {b: 1}, limit: 0, node: "
            "{fetch: {node: {andSorted: {nodes: ["
            "{ixscan: {filter: null, pattern: {a:1}}},"
            "{ixscan: {filter: null, pattern: {b:1}}}]}}}}}}",
            soln->root.get()));

        // Get the resulting document.
        PlanStage::StageState state = PlanStage::NEED_TIME;
        WorkingSetID wsid;
        while (state != PlanStage::ADVANCED) {
            state = mps->work(&wsid);
        }
        WorkingSetMember* member = ws->get(wsid);

        // Check the document returned by the query.
        ASSERT(member->hasObj());
        BSONObj expectedDoc = BSON("_id" << 1 << "a" << 1 << "b" << 1);
        ASSERT(expectedDoc.woCompare(member->obj.value()) == 0);

        // The blocking plan became unblocked, so we should no longer have a backup plan,
        // and the winning plan should still be the index intersection one.
        ASSERT(!mps->hasBackupPlan());
        soln = mps->bestSolution();
        ASSERT(QueryPlannerTestLib::solutionMatches(
            "{sort: {pattern: {b: 1}, limit: 0, node: "
            "{fetch: {node: {andSorted: {nodes: ["
            "{ixscan: {filter: null, pattern: {a:1}}},"
            "{ixscan: {filter: null, pattern: {b:1}}}]}}}}}}",
            soln->root.get()));

        // Restore index intersection force parameter.
        internalQueryForceIntersectionPlans = forceIxisectOldValue;
    }
Esempio n. 17
0
        void run() {
            Client::WriteContext ctx(&_txn, ns());
            
            Database* db = ctx.ctx().db();
            Collection* coll = db->getCollection(&_txn, ns());
            if (!coll) {
                coll = db->createCollection(&_txn, ns());
            }
            fillData();

            // The data we're going to later invalidate.
            set<DiskLoc> locs;
            getLocs(&locs, coll);

            // Build the mock scan stage which feeds the data.
            WorkingSet ws;
            auto_ptr<MockStage> ms(new MockStage(&ws));
            insertVarietyOfObjects(ms.get(), coll);

            SortStageParams params;
            params.collection = coll;
            params.pattern = BSON("foo" << 1);
            params.limit = limit();
            auto_ptr<SortStage> ss(new SortStage(&_txn, params, &ws, ms.get()));

            const int firstRead = 10;

            // Have sort read in data from the mock stage.
            for (int i = 0; i < firstRead; ++i) {
                WorkingSetID id = WorkingSet::INVALID_ID;
                PlanStage::StageState status = ss->work(&id);
                ASSERT_NOT_EQUALS(PlanStage::ADVANCED, status);
            }

            // We should have read in the first 'firstRead' locs.  Invalidate the first.
            ss->saveState();
            set<DiskLoc>::iterator it = locs.begin();
            ss->invalidate(*it++, INVALIDATION_DELETION);
            ss->restoreState(&_txn);

            // Read the rest of the data from the mock stage.
            while (!ms->isEOF()) {
                WorkingSetID id = WorkingSet::INVALID_ID;
                ss->work(&id);
            }

            // Release to prevent double-deletion.
            ms.release();

            // Let's just invalidate everything now.
            ss->saveState();
            while (it != locs.end()) {
                ss->invalidate(*it++, INVALIDATION_DELETION);
            }
            ss->restoreState(&_txn);

            // Invalidation of data in the sort stage fetches it but passes it through.
            int count = 0;
            while (!ss->isEOF()) {
                WorkingSetID id = WorkingSet::INVALID_ID;
                PlanStage::StageState status = ss->work(&id);
                if (PlanStage::ADVANCED != status) { continue; }
                WorkingSetMember* member = ws.get(id);
                ASSERT(member->hasObj());
                ASSERT(!member->hasLoc());
                ++count;
            }
            ctx.commit();

            // Returns all docs.
            ASSERT_EQUALS(limit() ? limit() : numObj(), count);
        }
Esempio n. 18
0
PlanStage::StageState UpdateStage::doWork(WorkingSetID* out) {
    if (isEOF()) {
        return PlanStage::IS_EOF;
    }

    if (doneUpdating()) {
        // Even if we're done updating, we may have some inserting left to do.
        if (needInsert()) {
            // TODO we may want to handle WriteConflictException here. Currently we bounce it
            // out to a higher level since if this WCEs it is likely that we raced with another
            // upsert that may have matched our query, and therefore this may need to perform an
            // update rather than an insert. Bouncing to the higher level allows restarting the
            // query in this case.
            doInsert();

            invariant(isEOF());
            if (_params.request->shouldReturnNewDocs()) {
                // Want to return the document we just inserted, create it as a WorkingSetMember
                // so that we can return it.
                BSONObj newObj = _specificStats.objInserted;
                *out = _ws->allocate();
                WorkingSetMember* member = _ws->get(*out);
                member->obj = Snapshotted<BSONObj>(getOpCtx()->recoveryUnit()->getSnapshotId(),
                                                   newObj.getOwned());
                member->transitionToOwnedObj();
                return PlanStage::ADVANCED;
            }
        }

        // At this point either we're done updating and there was no insert to do,
        // or we're done updating and we're done inserting. Either way, we're EOF.
        invariant(isEOF());
        return PlanStage::IS_EOF;
    }

    // If we're here, then we still have to ask for results from the child and apply
    // updates to them. We should only get here if the collection exists.
    invariant(_collection);

    // It is possible that after an update was applied, a WriteConflictException
    // occurred and prevented us from returning ADVANCED with the requested version
    // of the document.
    if (_idReturning != WorkingSet::INVALID_ID) {
        // We should only get here if we were trying to return something before.
        invariant(_params.request->shouldReturnAnyDocs());

        WorkingSetMember* member = _ws->get(_idReturning);
        invariant(member->getState() == WorkingSetMember::OWNED_OBJ);

        *out = _idReturning;
        _idReturning = WorkingSet::INVALID_ID;
        return PlanStage::ADVANCED;
    }

    // Either retry the last WSM we worked on or get a new one from our child.
    WorkingSetID id;
    StageState status;
    if (_idRetrying == WorkingSet::INVALID_ID) {
        status = child()->work(&id);
    } else {
        status = ADVANCED;
        id = _idRetrying;
        _idRetrying = WorkingSet::INVALID_ID;
    }

    if (PlanStage::ADVANCED == status) {
        // Need to get these things from the result returned by the child.
        RecordId recordId;

        WorkingSetMember* member = _ws->get(id);

        // We want to free this member when we return, unless we need to retry updating or returning
        // it.
        ScopeGuard memberFreer = MakeGuard(&WorkingSet::free, _ws, id);

        if (!member->hasRecordId()) {
            // We expect to be here because of an invalidation causing a force-fetch.
            ++_specificStats.nInvalidateSkips;
            return PlanStage::NEED_TIME;
        }
        recordId = member->recordId;

        // Updates can't have projections. This means that covering analysis will always add
        // a fetch. We should always get fetched data, and never just key data.
        invariant(member->hasObj());

        // We fill this with the new RecordIds of moved doc so we don't double-update.
        if (_updatedRecordIds && _updatedRecordIds->count(recordId) > 0) {
            // Found a RecordId that refers to a document we had already updated. Note that
            // we can never remove from _updatedRecordIds because updates by other clients
            // could cause us to encounter a document again later.
            return PlanStage::NEED_TIME;
        }

        bool docStillMatches;
        try {
            docStillMatches = write_stage_common::ensureStillMatches(
                _collection, getOpCtx(), _ws, id, _params.canonicalQuery);
        } catch (const WriteConflictException&) {
            // There was a problem trying to detect if the document still exists, so retry.
            memberFreer.Dismiss();
            return prepareToRetryWSM(id, out);
        }

        if (!docStillMatches) {
            // Either the document has been deleted, or it has been updated such that it no longer
            // matches the predicate.
            if (shouldRestartUpdateIfNoLongerMatches(_params)) {
                throw WriteConflictException();
            }
            return PlanStage::NEED_TIME;
        }

        // Ensure that the BSONObj underlying the WorkingSetMember is owned because saveState()
        // is allowed to free the memory.
        member->makeObjOwnedIfNeeded();

        // Save state before making changes
        WorkingSetCommon::prepareForSnapshotChange(_ws);
        try {
            child()->saveState();
        } catch (const WriteConflictException&) {
            std::terminate();
        }

        // If we care about the pre-updated version of the doc, save it out here.
        BSONObj oldObj;
        if (_params.request->shouldReturnOldDocs()) {
            oldObj = member->obj.value().getOwned();
        }

        BSONObj newObj;
        try {
            // Do the update, get us the new version of the doc.
            newObj = transformAndUpdate(member->obj, recordId);
        } catch (const WriteConflictException&) {
            memberFreer.Dismiss();  // Keep this member around so we can retry updating it.
            return prepareToRetryWSM(id, out);
        }

        // Set member's obj to be the doc we want to return.
        if (_params.request->shouldReturnAnyDocs()) {
            if (_params.request->shouldReturnNewDocs()) {
                member->obj = Snapshotted<BSONObj>(getOpCtx()->recoveryUnit()->getSnapshotId(),
                                                   newObj.getOwned());
            } else {
                invariant(_params.request->shouldReturnOldDocs());
                member->obj.setValue(oldObj);
            }
            member->recordId = RecordId();
            member->transitionToOwnedObj();
        }

        // This should be after transformAndUpdate to make sure we actually updated this doc.
        ++_specificStats.nMatched;

        // Restore state after modification

        // As restoreState may restore (recreate) cursors, make sure to restore the
        // state outside of the WritUnitOfWork.
        try {
            child()->restoreState();
        } catch (const WriteConflictException&) {
            // Note we don't need to retry updating anything in this case since the update
            // already was committed. However, we still need to return the updated document
            // (if it was requested).
            if (_params.request->shouldReturnAnyDocs()) {
                // member->obj should refer to the document we want to return.
                invariant(member->getState() == WorkingSetMember::OWNED_OBJ);

                _idReturning = id;
                // Keep this member around so that we can return it on the next work() call.
                memberFreer.Dismiss();
            }
            *out = WorkingSet::INVALID_ID;
            return NEED_YIELD;
        }

        if (_params.request->shouldReturnAnyDocs()) {
            // member->obj should refer to the document we want to return.
            invariant(member->getState() == WorkingSetMember::OWNED_OBJ);

            memberFreer.Dismiss();  // Keep this member around so we can return it.
            *out = id;
            return PlanStage::ADVANCED;
        }

        return PlanStage::NEED_TIME;
    } else if (PlanStage::IS_EOF == status) {
        // The child is out of results, but we might not be done yet because we still might
        // have to do an insert.
        return PlanStage::NEED_TIME;
    } else if (PlanStage::FAILURE == status) {
        *out = id;
        // If a stage fails, it may create a status WSM to indicate why it failed, in which case
        // 'id' is valid.  If ID is invalid, we create our own error message.
        if (WorkingSet::INVALID_ID == id) {
            const std::string errmsg = "update stage failed to read in results from child";
            *out = WorkingSetCommon::allocateStatusMember(
                _ws, Status(ErrorCodes::InternalError, errmsg));
            return PlanStage::FAILURE;
        }
        return status;
    } else if (PlanStage::NEED_YIELD == status) {
        *out = id;
    }

    return status;
}
Esempio n. 19
0
    void run() {
        OldClientWriteContext ctx(&_txn, ns());
        Database* db = ctx.db();
        Collection* coll = db->getCollection(ns());
        if (!coll) {
            WriteUnitOfWork wuow(&_txn);
            coll = db->createCollection(&_txn, ns());
            wuow.commit();
        }

        WorkingSet ws;
        // Sort by foo:1
        MergeSortStageParams msparams;
        msparams.pattern = BSON("foo" << 1);
        auto ms = make_unique<MergeSortStage>(&_txn, msparams, &ws, coll);

        IndexScanParams params;
        params.bounds.isSimpleRange = true;
        params.bounds.startKey = objWithMinKey(1);
        params.bounds.endKey = objWithMaxKey(1);
        params.bounds.endKeyInclusive = true;
        params.direction = 1;

        // Index 'a'+i has foo equal to 'i'.

        int numIndices = 20;
        for (int i = 0; i < numIndices; ++i) {
            // 'a', 'b', ...
            string index(1, 'a' + i);
            insert(BSON(index << 1 << "foo" << i));

            BSONObj indexSpec = BSON(index << 1 << "foo" << 1);
            addIndex(indexSpec);
            params.descriptor = getIndex(indexSpec, coll);
            ms->addChild(new IndexScan(&_txn, params, &ws, NULL));
        }

        set<RecordId> recordIds;
        getRecordIds(&recordIds, coll);

        set<RecordId>::iterator it = recordIds.begin();

        // Get 10 results.  Should be getting results in order of 'recordIds'.
        int count = 0;
        while (!ms->isEOF() && count < 10) {
            WorkingSetID id = WorkingSet::INVALID_ID;
            PlanStage::StageState status = ms->work(&id);
            if (PlanStage::ADVANCED != status) {
                continue;
            }

            WorkingSetMember* member = ws.get(id);
            ASSERT_EQUALS(member->recordId, *it);
            BSONElement elt;
            string index(1, 'a' + count);
            ASSERT(member->getFieldDotted(index, &elt));
            ASSERT_EQUALS(1, elt.numberInt());
            ASSERT(member->getFieldDotted("foo", &elt));
            ASSERT_EQUALS(count, elt.numberInt());
            ++count;
            ++it;
        }

        // Invalidate recordIds[11].  Should force a fetch and return the deleted document.
        ms->saveState();
        ms->invalidate(&_txn, *it, INVALIDATION_DELETION);
        ms->restoreState();

        // Make sure recordIds[11] was fetched for us.
        {
            WorkingSetID id = WorkingSet::INVALID_ID;
            PlanStage::StageState status;
            do {
                status = ms->work(&id);
            } while (PlanStage::ADVANCED != status);

            WorkingSetMember* member = ws.get(id);
            ASSERT(!member->hasRecordId());
            ASSERT(member->hasObj());
            string index(1, 'a' + count);
            BSONElement elt;
            ASSERT_TRUE(member->getFieldDotted(index, &elt));
            ASSERT_EQUALS(1, elt.numberInt());
            ASSERT(member->getFieldDotted("foo", &elt));
            ASSERT_EQUALS(count, elt.numberInt());

            ++it;
            ++count;
        }

        // And get the rest.
        while (!ms->isEOF()) {
            WorkingSetID id = WorkingSet::INVALID_ID;
            PlanStage::StageState status = ms->work(&id);
            if (PlanStage::ADVANCED != status) {
                continue;
            }

            WorkingSetMember* member = ws.get(id);
            ASSERT_EQUALS(member->recordId, *it);
            BSONElement elt;
            string index(1, 'a' + count);
            ASSERT_TRUE(member->getFieldDotted(index, &elt));
            ASSERT_EQUALS(1, elt.numberInt());
            ASSERT(member->getFieldDotted("foo", &elt));
            ASSERT_EQUALS(count, elt.numberInt());
            ++count;
            ++it;
        }
    }
Esempio n. 20
0
// static
Status WorkingSetCommon::getMemberStatus(const WorkingSetMember& member) {
    invariant(member.hasObj());
    return getMemberObjectStatus(member.obj.value());
}
Esempio n. 21
0
PlanStage::StageState GroupStage::work(WorkingSetID* out) {
    ++_commonStats.works;

    ScopedTimer timer(&_commonStats.executionTimeMillis);

    if (isEOF()) {
        return PlanStage::IS_EOF;
    }

    // On the first call to work(), call initGroupScripting().
    if (_groupState == GroupState_Initializing) {
        Status status = initGroupScripting();
        if (!status.isOK()) {
            *out = WorkingSetCommon::allocateStatusMember(_ws, status);
            return PlanStage::FAILURE;
        }
        _groupState = GroupState_ReadingFromChild;
        ++_commonStats.needTime;
        return PlanStage::NEED_TIME;
    }

    // Otherwise, read from our child.
    invariant(_groupState == GroupState_ReadingFromChild);
    WorkingSetID id = WorkingSet::INVALID_ID;
    StageState state = child()->work(&id);

    if (PlanStage::NEED_TIME == state) {
        ++_commonStats.needTime;
        return state;
    } else if (PlanStage::NEED_YIELD == state) {
        ++_commonStats.needYield;
        *out = id;
        return state;
    } else if (PlanStage::FAILURE == state) {
        *out = id;
        // If a stage fails, it may create a status WSM to indicate why it failed, in which
        // case 'id' is valid.  If ID is invalid, we create our own error message.
        if (WorkingSet::INVALID_ID == id) {
            const std::string errmsg = "group stage failed to read in results from child";
            *out = WorkingSetCommon::allocateStatusMember(
                _ws, Status(ErrorCodes::InternalError, errmsg));
        }
        return state;
    } else if (PlanStage::DEAD == state) {
        return state;
    } else if (PlanStage::ADVANCED == state) {
        WorkingSetMember* member = _ws->get(id);
        // Group queries can't have projections. This means that covering analysis will always
        // add a fetch. We should always get fetched data, and never just key data.
        invariant(member->hasObj());

        Status status = processObject(member->obj.value());
        if (!status.isOK()) {
            *out = WorkingSetCommon::allocateStatusMember(_ws, status);
            return PlanStage::FAILURE;
        }

        _ws->free(id);

        ++_commonStats.needTime;
        return PlanStage::NEED_TIME;
    } else {
        // We're done reading from our child.
        invariant(PlanStage::IS_EOF == state);

        auto results = finalizeResults();
        if (!results.isOK()) {
            *out = WorkingSetCommon::allocateStatusMember(_ws, results.getStatus());
            return PlanStage::FAILURE;
        }

        // Transition to state "done."  Future calls to work() will return IS_EOF.
        _groupState = GroupState_Done;

        *out = _ws->allocate();
        WorkingSetMember* member = _ws->get(*out);
        member->obj = Snapshotted<BSONObj>(SnapshotId(), results.getValue());
        member->transitionToOwnedObj();

        ++_commonStats.advanced;
        return PlanStage::ADVANCED;
    }
}
Esempio n. 22
0
PlanStage::StageState TextOrStage::addTerm(WorkingSetID wsid, WorkingSetID* out) {
    WorkingSetMember* wsm = _ws->get(wsid);
    invariant(wsm->getState() == WorkingSetMember::RID_AND_IDX);
    invariant(1 == wsm->keyData.size());
    const IndexKeyDatum newKeyData = wsm->keyData.back();  // copy to keep it around.
    TextRecordData* textRecordData = &_scores[wsm->recordId];

    if (textRecordData->score < 0) {
        // We have already rejected this document for not matching the filter.
        invariant(WorkingSet::INVALID_ID == textRecordData->wsid);
        _ws->free(wsid);
        return NEED_TIME;
    }

    if (WorkingSet::INVALID_ID == textRecordData->wsid) {
        // We haven't seen this RecordId before.
        invariant(textRecordData->score == 0);
        bool shouldKeep = true;
        if (_filter) {
            // We have not seen this document before and need to apply a filter.
            bool wasDeleted = false;
            try {
                TextMatchableDocument tdoc(getOpCtx(),
                                           newKeyData.indexKeyPattern,
                                           newKeyData.keyData,
                                           _ws,
                                           wsid,
                                           _recordCursor);
                shouldKeep = _filter->matches(&tdoc);
            } catch (const WriteConflictException& wce) {
                // Ensure that the BSONObj underlying the WorkingSetMember is owned because it may
                // be freed when we yield.
                wsm->makeObjOwnedIfNeeded();
                _idRetrying = wsid;
                *out = WorkingSet::INVALID_ID;
                return NEED_YIELD;
            } catch (const TextMatchableDocument::DocumentDeletedException&) {
                // We attempted to fetch the document but decided it should be excluded from the
                // result set.
                shouldKeep = false;
                wasDeleted = true;
            }

            if (wasDeleted || wsm->hasObj()) {
                ++_specificStats.fetches;
            }
        }

        if (shouldKeep && !wsm->hasObj()) {
            // Our parent expects RID_AND_OBJ members, so we fetch the document here if we haven't
            // already.
            try {
                shouldKeep = WorkingSetCommon::fetch(getOpCtx(), _ws, wsid, _recordCursor);
                ++_specificStats.fetches;
            } catch (const WriteConflictException& wce) {
                wsm->makeObjOwnedIfNeeded();
                _idRetrying = wsid;
                *out = WorkingSet::INVALID_ID;
                return NEED_YIELD;
            }
        }

        if (!shouldKeep) {
            _ws->free(wsid);
            textRecordData->score = -1;
            return NEED_TIME;
        }

        textRecordData->wsid = wsid;

        // Ensure that the BSONObj underlying the WorkingSetMember is owned in case we yield.
        wsm->makeObjOwnedIfNeeded();
    } else {
        // We already have a working set member for this RecordId. Free the new WSM and retrieve the
        // old one. Note that since we don't keep all index keys, we could get a score that doesn't
        // match the document, but this has always been a problem.
        // TODO something to improve the situation.
        invariant(wsid != textRecordData->wsid);
        _ws->free(wsid);
        wsm = _ws->get(textRecordData->wsid);
    }

    // Locate score within possibly compound key: {prefix,term,score,suffix}.
    BSONObjIterator keyIt(newKeyData.keyData);
    for (unsigned i = 0; i < _ftsSpec.numExtraBefore(); i++) {
        keyIt.next();
    }

    keyIt.next();  // Skip past 'term'.

    BSONElement scoreElement = keyIt.next();
    double documentTermScore = scoreElement.number();

    // Aggregate relevance score, term keys.
    textRecordData->score += documentTermScore;
    return NEED_TIME;
}
Esempio n. 23
0
    Runner::RunnerState PlanExecutor::getNext(BSONObj* objOut, DiskLoc* dlOut) {
        if (_killed) { return Runner::RUNNER_DEAD; }

        for (;;) {
            WorkingSetID id = WorkingSet::INVALID_ID;
            PlanStage::StageState code = _root->work(&id);

            if (PlanStage::ADVANCED == code) {
                // Fast count.
                if (WorkingSet::INVALID_ID == id) {
                    invariant(NULL == objOut);
                    invariant(NULL == dlOut);
                    return Runner::RUNNER_ADVANCED;
                }

                WorkingSetMember* member = _workingSet->get(id);
                bool hasRequestedData = true;

                if (NULL != objOut) {
                    if (WorkingSetMember::LOC_AND_IDX == member->state) {
                        if (1 != member->keyData.size()) {
                            _workingSet->free(id);
                            hasRequestedData = false;
                        }
                        else {
                            *objOut = member->keyData[0].keyData;
                        }
                    }
                    else if (member->hasObj()) {
                        *objOut = member->obj;
                    }
                    else {
                        _workingSet->free(id);
                        hasRequestedData = false;
                    }
                }

                if (NULL != dlOut) {
                    if (member->hasLoc()) {
                        *dlOut = member->loc;
                    }
                    else {
                        _workingSet->free(id);
                        hasRequestedData = false;
                    }
                }

                if (hasRequestedData) {
                    _workingSet->free(id);
                    return Runner::RUNNER_ADVANCED;
                }
                // This result didn't have the data the caller wanted, try again.
            }
            else if (PlanStage::NEED_TIME == code) {
                // Fall through to yield check at end of large conditional.
            }
            else if (PlanStage::NEED_FETCH == code) {
                // id has a loc and refers to an obj we need to fetch.
                WorkingSetMember* member = _workingSet->get(id);

                // This must be true for somebody to request a fetch and can only change when an
                // invalidation happens, which is when we give up a lock.  Don't give up the
                // lock between receiving the NEED_FETCH and actually fetching(?).
                verify(member->hasLoc());

                // XXX: remove NEED_FETCH
            }
            else if (PlanStage::IS_EOF == code) {
                return Runner::RUNNER_EOF;
            }
            else if (PlanStage::DEAD == code) {
                return Runner::RUNNER_DEAD;
            }
            else {
                verify(PlanStage::FAILURE == code);
                if (NULL != objOut) {
                    WorkingSetCommon::getStatusMemberObject(*_workingSet, id, objOut);
                }
                return Runner::RUNNER_ERROR;
            }
        }
    }
Esempio n. 24
0
    PlanStage::StageState S2NearStage::addResultToQueue(WorkingSetID* out) {
        PlanStage::StageState state = _child->work(out);

        // All done reading from _child.
        if (PlanStage::IS_EOF == state) {
            _child.reset();

            // Adjust the annulus size depending on how many results we got.
            if (_results.empty()) {
                _radiusIncrement *= 2;
            } else if (_results.size() < 300) {
                _radiusIncrement *= 2;
            } else if (_results.size() > 600) {
                _radiusIncrement /= 2;
            }

            // Make a new ixscan next time.
            return PlanStage::NEED_TIME;
        }

        // Nothing to do unless we advance.
        if (PlanStage::ADVANCED != state) { return state; }

        // TODO Speed improvements:
        //
        // 0. Modify fetch to preserve key data and test for intersection w/annulus.
        //
        // 1. keep track of what we've seen in this scan and possibly ignore it.
        //
        // 2. keep track of results we've returned before and ignore them.

        WorkingSetMember* member = _ws->get(*out);
        // Must have an object in order to get geometry out of it.
        verify(member->hasObj());

        // Get all the fields with that name from the document.
        BSONElementSet geom;
        member->obj.getFieldsDotted(_params.nearQuery.field, geom, false);
        if (geom.empty()) {return PlanStage::NEED_TIME; }

        // Some value that any distance we can calculate will be less than.
        double minDistance = numeric_limits<double>::max();
        BSONObj minDistanceObj;
        for (BSONElementSet::iterator git = geom.begin(); git != geom.end(); ++git) {
            if (!git->isABSONObj()) {
                mongoutils::str::stream ss;
                ss << "s2near stage read invalid geometry element " << *git << " from child";
                Status status(ErrorCodes::InternalError, ss);
                *out = WorkingSetCommon::allocateStatusMember( _ws, status);
                return PlanStage::FAILURE;
            }
            BSONObj obj = git->Obj();

            double distToObj;
            if (S2SearchUtil::distanceBetween(_params.nearQuery.centroid.point, obj, &distToObj)) {
                if (distToObj < minDistance) {
                    minDistance = distToObj;
                    minDistanceObj = obj;
                }
            }
            else {
                warning() << "unknown geometry: " << obj.toString();
            }
        }

        // If the distance to the doc satisfies our distance criteria, add it to our buffered
        // results.
        if (minDistance >= _innerRadius &&
            (_outerRadiusInclusive ? minDistance <= _outerRadius : minDistance < _outerRadius)) {
            _results.push(Result(*out, minDistance));
            if (_params.addDistMeta) {
                member->addComputed(new GeoDistanceComputedData(minDistance));
            }
            if (_params.addPointMeta) {
                member->addComputed(new GeoNearPointComputedData(minDistanceObj));
            }
            if (member->hasLoc()) {
                _invalidationMap[member->loc] = *out;
            }
        }

        return PlanStage::NEED_TIME;
    }
Esempio n. 25
0
PlanStage::StageState DeleteStage::work(WorkingSetID* out) {
    ++_commonStats.works;

    // Adds the amount of time taken by work() to executionTimeMillis.
    ScopedTimer timer(&_commonStats.executionTimeMillis);

    if (isEOF()) {
        return PlanStage::IS_EOF;
    }
    invariant(_collection);  // If isEOF() returns false, we must have a collection.

    // It is possible that after a delete was executed, a WriteConflictException occurred
    // and prevented us from returning ADVANCED with the old version of the document.
    if (_idReturning != WorkingSet::INVALID_ID) {
        // We should only get here if we were trying to return something before.
        invariant(_params.returnDeleted);

        WorkingSetMember* member = _ws->get(_idReturning);
        invariant(member->getState() == WorkingSetMember::OWNED_OBJ);

        *out = _idReturning;
        _idReturning = WorkingSet::INVALID_ID;
        ++_commonStats.advanced;
        return PlanStage::ADVANCED;
    }

    // Either retry the last WSM we worked on or get a new one from our child.
    WorkingSetID id;
    StageState status;
    if (_idRetrying == WorkingSet::INVALID_ID) {
        status = child()->work(&id);
    } else {
        status = ADVANCED;
        id = _idRetrying;
        _idRetrying = WorkingSet::INVALID_ID;
    }

    if (PlanStage::ADVANCED == status) {
        WorkingSetMember* member = _ws->get(id);

        // We want to free this member when we return, unless we need to retry it.
        ScopeGuard memberFreer = MakeGuard(&WorkingSet::free, _ws, id);

        if (!member->hasLoc()) {
            // We expect to be here because of an invalidation causing a force-fetch, and
            // doc-locking storage engines do not issue invalidations.
            ++_specificStats.nInvalidateSkips;
            ++_commonStats.needTime;
            return PlanStage::NEED_TIME;
        }
        RecordId rloc = member->loc;
        // Deletes can't have projections. This means that covering analysis will always add
        // a fetch. We should always get fetched data, and never just key data.
        invariant(member->hasObj());

        try {
            // If the snapshot changed, then we have to make sure we have the latest copy of the
            // doc and that it still matches.
            std::unique_ptr<RecordCursor> cursor;
            if (getOpCtx()->recoveryUnit()->getSnapshotId() != member->obj.snapshotId()) {
                cursor = _collection->getCursor(getOpCtx());
                if (!WorkingSetCommon::fetch(getOpCtx(), _ws, id, cursor)) {
                    // Doc is already deleted. Nothing more to do.
                    ++_commonStats.needTime;
                    return PlanStage::NEED_TIME;
                }

                // Make sure the re-fetched doc still matches the predicate.
                if (_params.canonicalQuery &&
                    !_params.canonicalQuery->root()->matchesBSON(member->obj.value(), NULL)) {
                    // Doesn't match.
                    ++_commonStats.needTime;
                    return PlanStage::NEED_TIME;
                }
            }

            // Ensure that the BSONObj underlying the WorkingSetMember is owned because saveState()
            // is allowed to free the memory.
            if (_params.returnDeleted) {
                member->makeObjOwnedIfNeeded();
            }

            // TODO: Do we want to buffer docs and delete them in a group rather than
            // saving/restoring state repeatedly?

            try {
                if (supportsDocLocking()) {
                    // Doc-locking engines require this before saveState() since they don't use
                    // invalidations.
                    WorkingSetCommon::prepareForSnapshotChange(_ws);
                }
                child()->saveState();
            } catch (const WriteConflictException& wce) {
                std::terminate();
            }

            if (_params.returnDeleted) {
                // Save a copy of the document that is about to get deleted.
                BSONObj deletedDoc = member->obj.value();
                member->obj.setValue(deletedDoc.getOwned());
                member->loc = RecordId();
                member->transitionToOwnedObj();
            }

            // Do the write, unless this is an explain.
            if (!_params.isExplain) {
                WriteUnitOfWork wunit(getOpCtx());
                _collection->deleteDocument(getOpCtx(), rloc);
                wunit.commit();
            }

            ++_specificStats.docsDeleted;
        } catch (const WriteConflictException& wce) {
            // Ensure that the BSONObj underlying the WorkingSetMember is owned because it may be
            // freed when we yield.
            member->makeObjOwnedIfNeeded();
            _idRetrying = id;
            memberFreer.Dismiss();  // Keep this member around so we can retry deleting it.
            *out = WorkingSet::INVALID_ID;
            _commonStats.needYield++;
            return NEED_YIELD;
        }

        //  As restoreState may restore (recreate) cursors, cursors are tied to the
        //  transaction in which they are created, and a WriteUnitOfWork is a
        //  transaction, make sure to restore the state outside of the WritUnitOfWork.
        try {
            child()->restoreState();
        } catch (const WriteConflictException& wce) {
            // Note we don't need to retry anything in this case since the delete already
            // was committed. However, we still need to return the deleted document
            // (if it was requested).
            if (_params.returnDeleted) {
                // member->obj should refer to the deleted document.
                invariant(member->getState() == WorkingSetMember::OWNED_OBJ);

                _idReturning = id;
                // Keep this member around so that we can return it on the next work() call.
                memberFreer.Dismiss();
            }
            *out = WorkingSet::INVALID_ID;
            _commonStats.needYield++;
            return NEED_YIELD;
        }

        if (_params.returnDeleted) {
            // member->obj should refer to the deleted document.
            invariant(member->getState() == WorkingSetMember::OWNED_OBJ);

            memberFreer.Dismiss();  // Keep this member around so we can return it.
            *out = id;
            ++_commonStats.advanced;
            return PlanStage::ADVANCED;
        }

        ++_commonStats.needTime;
        return PlanStage::NEED_TIME;
    } else if (PlanStage::FAILURE == status || PlanStage::DEAD == status) {
        *out = id;
        // If a stage fails, it may create a status WSM to indicate why it failed, in which case
        // 'id' is valid.  If ID is invalid, we create our own error message.
        if (WorkingSet::INVALID_ID == id) {
            const std::string errmsg = "delete stage failed to read in results from child";
            *out = WorkingSetCommon::allocateStatusMember(
                _ws, Status(ErrorCodes::InternalError, errmsg));
        }
        return status;
    } else if (PlanStage::NEED_TIME == status) {
        ++_commonStats.needTime;
    } else if (PlanStage::NEED_YIELD == status) {
        *out = id;
        ++_commonStats.needYield;
    }

    return status;
}
Esempio n. 26
0
PlanStage::StageState DeleteStage::doWork(WorkingSetID* out) {
    if (isEOF()) {
        return PlanStage::IS_EOF;
    }
    invariant(_collection);  // If isEOF() returns false, we must have a collection.

    // It is possible that after a delete was executed, a WriteConflictException occurred
    // and prevented us from returning ADVANCED with the old version of the document.
    if (_idReturning != WorkingSet::INVALID_ID) {
        // We should only get here if we were trying to return something before.
        invariant(_params.returnDeleted);

        WorkingSetMember* member = _ws->get(_idReturning);
        invariant(member->getState() == WorkingSetMember::OWNED_OBJ);

        *out = _idReturning;
        _idReturning = WorkingSet::INVALID_ID;
        return PlanStage::ADVANCED;
    }

    // Either retry the last WSM we worked on or get a new one from our child.
    WorkingSetID id;
    if (_idRetrying != WorkingSet::INVALID_ID) {
        id = _idRetrying;
        _idRetrying = WorkingSet::INVALID_ID;
    } else {
        auto status = child()->work(&id);

        switch (status) {
            case PlanStage::ADVANCED:
                break;

            case PlanStage::FAILURE:
            case PlanStage::DEAD:
                // The stage which produces a failure is responsible for allocating a working set
                // member with error details.
                invariant(WorkingSet::INVALID_ID != id);
                *out = id;
                return status;

            case PlanStage::NEED_TIME:
                return status;

            case PlanStage::NEED_YIELD:
                *out = id;
                return status;

            case PlanStage::IS_EOF:
                return status;

            default:
                MONGO_UNREACHABLE;
        }
    }

    // We advanced, or are retrying, and id is set to the WSM to work on.
    WorkingSetMember* member = _ws->get(id);

    // We want to free this member when we return, unless we need to retry deleting or returning it.
    ScopeGuard memberFreer = MakeGuard(&WorkingSet::free, _ws, id);

    invariant(member->hasRecordId());
    RecordId recordId = member->recordId;
    // Deletes can't have projections. This means that covering analysis will always add
    // a fetch. We should always get fetched data, and never just key data.
    invariant(member->hasObj());

    // Ensure the document still exists and matches the predicate.
    bool docStillMatches;
    try {
        docStillMatches = write_stage_common::ensureStillMatches(
            _collection, getOpCtx(), _ws, id, _params.canonicalQuery);
    } catch (const WriteConflictException&) {
        // There was a problem trying to detect if the document still exists, so retry.
        memberFreer.Dismiss();
        return prepareToRetryWSM(id, out);
    }

    if (!docStillMatches) {
        // Either the document has already been deleted, or it has been updated such that it no
        // longer matches the predicate.
        if (shouldRestartDeleteIfNoLongerMatches(_params)) {
            throw WriteConflictException();
        }
        return PlanStage::NEED_TIME;
    }

    // Ensure that the BSONObj underlying the WorkingSetMember is owned because saveState() is
    // allowed to free the memory.
    if (_params.returnDeleted) {
        // Save a copy of the document that is about to get deleted, but keep it in the RID_AND_OBJ
        // state in case we need to retry deleting it.
        BSONObj deletedDoc = member->obj.value();
        member->obj.setValue(deletedDoc.getOwned());
    }

    // TODO: Do we want to buffer docs and delete them in a group rather than saving/restoring state
    // repeatedly?

    WorkingSetCommon::prepareForSnapshotChange(_ws);
    try {
        child()->saveState();
    } catch (const WriteConflictException&) {
        std::terminate();
    }

    // Do the write, unless this is an explain.
    if (!_params.isExplain) {
        try {
            WriteUnitOfWork wunit(getOpCtx());
            _collection->deleteDocument(getOpCtx(),
                                        _params.stmtId,
                                        recordId,
                                        _params.opDebug,
                                        _params.fromMigrate,
                                        false,
                                        _params.returnDeleted ? Collection::StoreDeletedDoc::On
                                                              : Collection::StoreDeletedDoc::Off);
            wunit.commit();
        } catch (const WriteConflictException&) {
            memberFreer.Dismiss();  // Keep this member around so we can retry deleting it.
            return prepareToRetryWSM(id, out);
        }
    }
    ++_specificStats.docsDeleted;

    if (_params.returnDeleted) {
        // After deleting the document, the RecordId associated with this member is invalid.
        // Remove the 'recordId' from the WorkingSetMember before returning it.
        member->recordId = RecordId();
        member->transitionToOwnedObj();
    }

    // As restoreState may restore (recreate) cursors, cursors are tied to the transaction in which
    // they are created, and a WriteUnitOfWork is a transaction, make sure to restore the state
    // outside of the WriteUnitOfWork.
    try {
        child()->restoreState();
    } catch (const WriteConflictException&) {
        // Note we don't need to retry anything in this case since the delete already was committed.
        // However, we still need to return the deleted document (if it was requested).
        if (_params.returnDeleted) {
            // member->obj should refer to the deleted document.
            invariant(member->getState() == WorkingSetMember::OWNED_OBJ);

            _idReturning = id;
            // Keep this member around so that we can return it on the next work() call.
            memberFreer.Dismiss();
        }
        *out = WorkingSet::INVALID_ID;
        return NEED_YIELD;
    }

    if (_params.returnDeleted) {
        // member->obj should refer to the deleted document.
        invariant(member->getState() == WorkingSetMember::OWNED_OBJ);

        memberFreer.Dismiss();  // Keep this member around so we can return it.
        *out = id;
        return PlanStage::ADVANCED;
    }

    return PlanStage::NEED_TIME;
}
Esempio n. 27
0
    PlanStage::StageState S2NearStage::addResultToQueue(WorkingSetID* out) {
        PlanStage::StageState state = _child->work(out);

        // All done reading from _child.
        if (PlanStage::IS_EOF == state) {
            _child.reset();
            _keyGeoFilter.reset();

            // Adjust the annulus size depending on how many results we got.
            if (_results.empty()) {
                _radiusIncrement *= 2;
            } else if (_results.size() < 300) {
                _radiusIncrement *= 2;
            } else if (_results.size() > 600) {
                _radiusIncrement /= 2;
            }

            // Make a new ixscan next time.
            return PlanStage::NEED_TIME;
        }

        // Nothing to do unless we advance.
        if (PlanStage::ADVANCED != state) { return state; }

        WorkingSetMember* member = _ws->get(*out);
        // Must have an object in order to get geometry out of it.
        verify(member->hasObj());

        // The scans we use don't dedup so we must dedup them ourselves.  We only put locs into here
        // if we know for sure whether or not we'll return them in this annulus.
        if (member->hasLoc()) {
            if (_seenInScan.end() != _seenInScan.find(member->loc)) {
                return PlanStage::NEED_TIME;
            }
        }

        // Get all the fields with that name from the document.
        BSONElementSet geom;
        member->obj.getFieldsDotted(_params.nearQuery.field, geom, false);
        if (geom.empty()) {
            return PlanStage::NEED_TIME;
        }

        // Some value that any distance we can calculate will be less than.
        double minDistance = numeric_limits<double>::max();
        BSONObj minDistanceObj;
        for (BSONElementSet::iterator git = geom.begin(); git != geom.end(); ++git) {
            if (!git->isABSONObj()) {
                mongoutils::str::stream ss;
                ss << "s2near stage read invalid geometry element " << *git << " from child";
                Status status(ErrorCodes::InternalError, ss);
                *out = WorkingSetCommon::allocateStatusMember( _ws, status);
                return PlanStage::FAILURE;
            }
            BSONObj obj = git->Obj();

            double distToObj;
            if (S2SearchUtil::distanceBetween(_params.nearQuery.centroid.point, obj, &distToObj)) {
                if (distToObj < minDistance) {
                    minDistance = distToObj;
                    minDistanceObj = obj;
                }
            }
            else {
                warning() << "unknown geometry: " << obj.toString();
            }
        }

        // If we're here we'll either include the doc in this annulus or reject it.  It's safe to
        // ignore it if it pops up again in this annulus.
        if (member->hasLoc()) {
            _seenInScan.insert(member->loc);
        }

        // If the distance to the doc satisfies our distance criteria, add it to our buffered
        // results.
        if (minDistance >= _innerRadius &&
            (_outerRadiusInclusive ? minDistance <= _outerRadius : minDistance < _outerRadius)) {
            _results.push(Result(*out, minDistance));
            if (_params.addDistMeta) {
                // FLAT implies the output distances are in radians.  Convert to meters.
                if (FLAT == _params.nearQuery.centroid.crs) {
                    member->addComputed(new GeoDistanceComputedData(minDistance
                                                                    / kRadiusOfEarthInMeters));
                }
                else {
                    member->addComputed(new GeoDistanceComputedData(minDistance));
                }
            }
            if (_params.addPointMeta) {
                member->addComputed(new GeoNearPointComputedData(minDistanceObj));
            }
            if (member->hasLoc()) {
                _invalidationMap[member->loc] = *out;
            }
        }

        return PlanStage::NEED_TIME;
    }
Esempio n. 28
0
    PlanStage::StageState SortStage::work(WorkingSetID* out) {
        ++_commonStats.works;

        if (_memUsage > kMaxBytes) {
            return PlanStage::FAILURE;
        }

        if (isEOF()) { return PlanStage::IS_EOF; }

        // Still reading in results to sort.
        if (!_sorted) {
            WorkingSetID id;
            StageState code = _child->work(&id);

            if (PlanStage::ADVANCED == code) {
                // Add it into the map for quick invalidation if it has a valid DiskLoc.
                // A DiskLoc may be invalidated at any time (during a yield).  We need to get into
                // the WorkingSet as quickly as possible to handle it.
                WorkingSetMember* member = _ws->get(id);
                if (member->hasLoc()) {
                    _wsidByDiskLoc[member->loc] = id;
                }

                // Do some accounting to make sure we're not using too much memory.
                if (member->hasLoc()) {
                    _memUsage += sizeof(DiskLoc);
                }

                // We are not supposed (yet) to sort over anything other than objects.  In other
                // words, the query planner wouldn't put a sort atop anything that wouldn't have a
                // collection scan as a leaf.
                verify(member->hasObj());
                _memUsage += member->obj.objsize();

                // We will sort '_data' in the same order an index over '_pattern' would
                // have. This has very nuanced implications. Consider the sort pattern {a:1}
                // and the document {a:[1,10]}. We have potentially two keys we could use to
                // sort on. Here we extract these keys. In the next step we decide which one to
                // use.
                BSONObjCmp patternCmp(_pattern);
                BSONObjSet keys(patternCmp);
                // XXX keyGen will throw on a "parallel array"
                _keyGen->getKeys(member->obj, &keys);
                // dumpKeys(keys);

                // To decide which key to use in sorting, we consider not only the sort pattern
                // but also if a given key, matches the query. Assume a query {a: {$gte: 5}} and
                // a document {a:1}. That document wouldn't match. In the same sense, the key '1'
                // in an array {a: [1,10]} should not be considered as being part of the result
                // set and thus that array should sort based on the '10' key. To find such key,
                // we use the bounds for the query.
                BSONObj sortKey;
                for (BSONObjSet::const_iterator it = keys.begin(); it != keys.end(); ++it) {
                    if (!_hasBounds) {
                        sortKey = *it;
                        break;
                    }

                    if (_boundsChecker->isValidKey(*it)) {
                        sortKey = *it;
                        break;
                    }
                }

                if (sortKey.isEmpty()) {
                    // We assume that if the document made it throught the sort stage, than it
                    // matches the query and thus should contain at least on array item that
                    // is within the query bounds.
                    cout << "can't find bounds for obj " << member->obj.toString() << endl;
                    cout << "bounds are " << _bounds.toString() << endl;
                    verify(0);
                }

                // We let the data stay in the WorkingSet and sort using the selected portion
                // of the object in that working set member.
                SortableDataItem item;
                item.wsid = id;
                item.sortKey = sortKey;
                _data.push_back(item);

                ++_commonStats.needTime;
                return PlanStage::NEED_TIME;
            }
            else if (PlanStage::IS_EOF == code) {
                // TODO: We don't need the lock for this.  We could ask for a yield and do this work
                // unlocked.  Also, this is performing a lot of work for one call to work(...)
                std::sort(_data.begin(), _data.end(), *_cmp);
                _resultIterator = _data.begin();
                _sorted = true;
                ++_commonStats.needTime;
                return PlanStage::NEED_TIME;
            }
            else {
                if (PlanStage::NEED_FETCH == code) {
                    *out = id;
                    ++_commonStats.needFetch;
                }
                else if (PlanStage::NEED_TIME == code) {
                    ++_commonStats.needTime;
                }
                return code;
            }
        }

        // Returning results.
        verify(_resultIterator != _data.end());
        verify(_sorted);
        *out = _resultIterator->wsid;
        _resultIterator++;

        // If we're returning something, take it out of our DL -> WSID map so that future
        // calls to invalidate don't cause us to take action for a DL we're done with.
        WorkingSetMember* member = _ws->get(*out);
        if (member->hasLoc()) {
            _wsidByDiskLoc.erase(member->loc);
        }

        // If it was flagged, we just drop it on the floor, assuming the caller wants a DiskLoc.  We
        // could make this triggerable somehow.
        if (_ws->isFlagged(*out)) {
            _ws->free(*out);
            return PlanStage::NEED_TIME;
        }

        ++_commonStats.advanced;
        return PlanStage::ADVANCED;
    }
PlanStage::StageState DeleteStage::doWork(WorkingSetID* out) {
    if (isEOF()) {
        return PlanStage::IS_EOF;
    }
    invariant(_collection);  // If isEOF() returns false, we must have a collection.

    // It is possible that after a delete was executed, a WriteConflictException occurred
    // and prevented us from returning ADVANCED with the old version of the document.
    if (_idReturning != WorkingSet::INVALID_ID) {
        // We should only get here if we were trying to return something before.
        invariant(_params.returnDeleted);

        WorkingSetMember* member = _ws->get(_idReturning);
        invariant(member->getState() == WorkingSetMember::OWNED_OBJ);

        *out = _idReturning;
        _idReturning = WorkingSet::INVALID_ID;
        return PlanStage::ADVANCED;
    }

    // Either retry the last WSM we worked on or get a new one from our child.
    WorkingSetID id;
    if (_idRetrying != WorkingSet::INVALID_ID) {
        id = _idRetrying;
        _idRetrying = WorkingSet::INVALID_ID;
    } else {
        auto status = child()->work(&id);

        switch (status) {
        case PlanStage::ADVANCED:
            break;

        case PlanStage::FAILURE:
        case PlanStage::DEAD:
            *out = id;

            // If a stage fails, it may create a status WSM to indicate why it failed, in which
            // case 'id' is valid.  If ID is invalid, we create our own error message.
            if (WorkingSet::INVALID_ID == id) {
                const std::string errmsg = "delete stage failed to read in results from child";
                *out = WorkingSetCommon::allocateStatusMember(
                           _ws, Status(ErrorCodes::InternalError, errmsg));
            }
            return status;

        case PlanStage::NEED_TIME:
            return status;

        case PlanStage::NEED_YIELD:
            *out = id;
            return status;

        case PlanStage::IS_EOF:
            return status;

        default:
            MONGO_UNREACHABLE;
        }
    }

    // We advanced, or are retrying, and id is set to the WSM to work on.
    WorkingSetMember* member = _ws->get(id);

    // We want to free this member when we return, unless we need to retry it.
    ScopeGuard memberFreer = MakeGuard(&WorkingSet::free, _ws, id);

    if (!member->hasRecordId()) {
        // We expect to be here because of an invalidation causing a force-fetch.
        ++_specificStats.nInvalidateSkips;
        return PlanStage::NEED_TIME;
    }
    RecordId recordId = member->recordId;
    // Deletes can't have projections. This means that covering analysis will always add
    // a fetch. We should always get fetched data, and never just key data.
    invariant(member->hasObj());

    try {
        // If the snapshot changed, then we have to make sure we have the latest copy of the
        // doc and that it still matches.
        std::unique_ptr<SeekableRecordCursor> cursor;
        if (getOpCtx()->recoveryUnit()->getSnapshotId() != member->obj.snapshotId()) {
            cursor = _collection->getCursor(getOpCtx());
            if (!WorkingSetCommon::fetch(getOpCtx(), _ws, id, cursor)) {
                // Doc is already deleted. Nothing more to do.
                return PlanStage::NEED_TIME;
            }

            // Make sure the re-fetched doc still matches the predicate.
            if (_params.canonicalQuery &&
                    !_params.canonicalQuery->root()->matchesBSON(member->obj.value(), NULL)) {
                // Doesn't match.
                return PlanStage::NEED_TIME;
            }
        }

        // Ensure that the BSONObj underlying the WorkingSetMember is owned because saveState()
        // is allowed to free the memory.
        if (_params.returnDeleted) {
            // Save a copy of the document that is about to get deleted, but keep it in the
            // RID_AND_OBJ state in case we need to retry deleting it.
            BSONObj deletedDoc = member->obj.value();
            member->obj.setValue(deletedDoc.getOwned());
        }

        // TODO: Do we want to buffer docs and delete them in a group rather than
        // saving/restoring state repeatedly?

        try {
            WorkingSetCommon::prepareForSnapshotChange(_ws);
            child()->saveState();
        } catch (const WriteConflictException& wce) {
            std::terminate();
        }

        // Do the write, unless this is an explain.
        if (!_params.isExplain) {
            WriteUnitOfWork wunit(getOpCtx());
            _collection->deleteDocument(getOpCtx(), recordId, _params.fromMigrate);
            wunit.commit();
        }

        ++_specificStats.docsDeleted;
    } catch (const WriteConflictException& wce) {
        // When we're doing a findAndModify with a sort, the sort will have a limit of 1, so will
        // not produce any more results even if there is another matching document. Re-throw the WCE
        // here so that these operations get another chance to find a matching document. The
        // findAndModify command should automatically retry if it gets a WCE.
        // TODO: this is not necessary if there was no sort specified.
        if (_params.returnDeleted) {
            throw;
        }
        _idRetrying = id;
        memberFreer.Dismiss();  // Keep this member around so we can retry deleting it.
        *out = WorkingSet::INVALID_ID;
        return NEED_YIELD;
    }

    if (_params.returnDeleted) {
        // After deleting the document, the RecordId associated with this member is invalid.
        // Remove the 'recordId' from the WorkingSetMember before returning it.
        member->recordId = RecordId();
        member->transitionToOwnedObj();
    }

    //  As restoreState may restore (recreate) cursors, cursors are tied to the
    //  transaction in which they are created, and a WriteUnitOfWork is a
    //  transaction, make sure to restore the state outside of the WritUnitOfWork.
    try {
        child()->restoreState();
    } catch (const WriteConflictException& wce) {
        // Note we don't need to retry anything in this case since the delete already
        // was committed. However, we still need to return the deleted document
        // (if it was requested).
        if (_params.returnDeleted) {
            // member->obj should refer to the deleted document.
            invariant(member->getState() == WorkingSetMember::OWNED_OBJ);

            _idReturning = id;
            // Keep this member around so that we can return it on the next work() call.
            memberFreer.Dismiss();
        }
        *out = WorkingSet::INVALID_ID;
        return NEED_YIELD;
    }

    if (_params.returnDeleted) {
        // member->obj should refer to the deleted document.
        invariant(member->getState() == WorkingSetMember::OWNED_OBJ);

        memberFreer.Dismiss();  // Keep this member around so we can return it.
        *out = id;
        return PlanStage::ADVANCED;
    }

    return PlanStage::NEED_TIME;
}
Esempio n. 30
0
PlanStage::StageState SortStage::work(WorkingSetID* out) {
    ++_commonStats.works;

    // Adds the amount of time taken by work() to executionTimeMillis.
    ScopedTimer timer(&_commonStats.executionTimeMillis);

    if (NULL == _sortKeyGen) {
        // This is heavy and should be done as part of work().
        _sortKeyGen.reset(new SortStageKeyGenerator(_collection, _pattern, _query));
        _sortKeyComparator.reset(new WorkingSetComparator(_sortKeyGen->getSortComparator()));
        // If limit > 1, we need to initialize _dataSet here to maintain ordered
        // set of data items while fetching from the child stage.
        if (_limit > 1) {
            const WorkingSetComparator& cmp = *_sortKeyComparator;
            _dataSet.reset(new SortableDataItemSet(cmp));
        }
        return PlanStage::NEED_TIME;
    }

    const size_t maxBytes = static_cast<size_t>(internalQueryExecMaxBlockingSortBytes);
    if (_memUsage > maxBytes) {
        mongoutils::str::stream ss;
        ss << "Sort operation used more than the maximum " << maxBytes
           << " bytes of RAM. Add an index, or specify a smaller limit.";
        Status status(ErrorCodes::OperationFailed, ss);
        *out = WorkingSetCommon::allocateStatusMember(_ws, status);
        return PlanStage::FAILURE;
    }

    if (isEOF()) {
        return PlanStage::IS_EOF;
    }

    // Still reading in results to sort.
    if (!_sorted) {
        WorkingSetID id = WorkingSet::INVALID_ID;
        StageState code = child()->work(&id);

        if (PlanStage::ADVANCED == code) {
            // Add it into the map for quick invalidation if it has a valid RecordId.
            // A RecordId may be invalidated at any time (during a yield).  We need to get into
            // the WorkingSet as quickly as possible to handle it.
            WorkingSetMember* member = _ws->get(id);

            // Planner must put a fetch before we get here.
            verify(member->hasObj());

            // We might be sorting something that was invalidated at some point.
            if (member->hasLoc()) {
                _wsidByDiskLoc[member->loc] = id;
            }

            // The data remains in the WorkingSet and we wrap the WSID with the sort key.
            SortableDataItem item;
            Status sortKeyStatus = _sortKeyGen->getSortKey(*member, &item.sortKey);
            if (!_sortKeyGen->getSortKey(*member, &item.sortKey).isOK()) {
                *out = WorkingSetCommon::allocateStatusMember(_ws, sortKeyStatus);
                return PlanStage::FAILURE;
            }
            item.wsid = id;
            if (member->hasLoc()) {
                // The RecordId breaks ties when sorting two WSMs with the same sort key.
                item.loc = member->loc;
            }

            addToBuffer(item);

            ++_commonStats.needTime;
            return PlanStage::NEED_TIME;
        } else if (PlanStage::IS_EOF == code) {
            // TODO: We don't need the lock for this.  We could ask for a yield and do this work
            // unlocked.  Also, this is performing a lot of work for one call to work(...)
            sortBuffer();
            _resultIterator = _data.begin();
            _sorted = true;
            ++_commonStats.needTime;
            return PlanStage::NEED_TIME;
        } else if (PlanStage::FAILURE == code || PlanStage::DEAD == code) {
            *out = id;
            // If a stage fails, it may create a status WSM to indicate why it
            // failed, in which case 'id' is valid.  If ID is invalid, we
            // create our own error message.
            if (WorkingSet::INVALID_ID == id) {
                mongoutils::str::stream ss;
                ss << "sort stage failed to read in results to sort from child";
                Status status(ErrorCodes::InternalError, ss);
                *out = WorkingSetCommon::allocateStatusMember(_ws, status);
            }
            return code;
        } else if (PlanStage::NEED_TIME == code) {
            ++_commonStats.needTime;
        } else if (PlanStage::NEED_YIELD == code) {
            ++_commonStats.needYield;
            *out = id;
        }

        return code;
    }

    // Returning results.
    verify(_resultIterator != _data.end());
    verify(_sorted);
    *out = _resultIterator->wsid;
    _resultIterator++;

    // If we're returning something, take it out of our DL -> WSID map so that future
    // calls to invalidate don't cause us to take action for a DL we're done with.
    WorkingSetMember* member = _ws->get(*out);
    if (member->hasLoc()) {
        _wsidByDiskLoc.erase(member->loc);
    }

    ++_commonStats.advanced;
    return PlanStage::ADVANCED;
}