virtual ElementIterator* allocateIterator(const ElementPath* path) const { WorkingSetMember* member = _ws->get(_id); if (!member->hasObj()) { // Try to look in the key. BSONObjIterator keyPatternIt(_keyPattern); BSONObjIterator keyDataIt(_key); while (keyPatternIt.more()) { BSONElement keyPatternElt = keyPatternIt.next(); verify(keyDataIt.more()); BSONElement keyDataElt = keyDataIt.next(); if (path->fieldRef().equalsDottedField(keyPatternElt.fieldName())) { if (Array == keyDataElt.type()) { return new SimpleArrayElementIterator(keyDataElt, true); } else { return new SingleElementElementIterator(keyDataElt); } } } } // Go to the raw document, fetching if needed. return new BSONElementIterator(path, getObj()); }
PlanStage::StageState FetchStage::fetchCompleted(WorkingSetID* out) { WorkingSetMember* member = _ws->get(_idBeingPagedIn); // The DiskLoc we're waiting to page in was invalidated (forced fetch). Test for // matching and maybe pass it up. if (member->state == WorkingSetMember::OWNED_OBJ) { WorkingSetID memberID = _idBeingPagedIn; _idBeingPagedIn = WorkingSet::INVALID_ID; return returnIfMatches(member, memberID, out); } // Assume that the caller has fetched appropriately. // TODO: Do we want to double-check the runner? Not sure how reliable likelyInMemory is // on all platforms. verify(member->hasLoc()); verify(!member->hasObj()); // Make the (unowned) object. Record* record = member->loc.rec(); const char* data = record->dataNoThrowing(); member->obj = BSONObj(data); // Don't need index data anymore as we have an obj. member->keyData.clear(); member->state = WorkingSetMember::LOC_AND_UNOWNED_OBJ; verify(!member->obj.isOwned()); // Return the obj if it passes our filter. WorkingSetID memberID = _idBeingPagedIn; _idBeingPagedIn = WorkingSet::INVALID_ID; return returnIfMatches(member, memberID, out); }
PlanStage::StageState OplogStart::workBackwardsScan(WorkingSetID* out) { PlanStage::StageState state = child()->work(out); // EOF. Just start from the beginning, which is where we've hit. if (PlanStage::IS_EOF == state) { _done = true; return state; } if (PlanStage::ADVANCED != state) { return state; } WorkingSetMember* member = _workingSet->get(*out); verify(member->hasObj()); verify(member->hasRecordId()); if (!_filter->matchesBSON(member->obj.value())) { _done = true; // RecordId is returned in *out. return PlanStage::ADVANCED; } else { _workingSet->free(*out); return PlanStage::NEED_TIME; } }
PlanStage::StageState FetchStage::work(WorkingSetID* out) { ++_commonStats.works; if (isEOF()) { return PlanStage::IS_EOF; } // If we asked our parent for a page-in last time work(...) was called, finish the fetch. if (WorkingSet::INVALID_ID != _idBeingPagedIn) { cout << "fetch completed, id being paged on " << _idBeingPagedIn << endl; return fetchCompleted(out); } // If we're here, we're not waiting for a DiskLoc to be fetched. Get another to-be-fetched // result from our child. WorkingSetID id; StageState status = _child->work(&id); if (PlanStage::ADVANCED == status) { WorkingSetMember* member = _ws->get(id); // If there's an obj there, there is no fetching to perform. if (member->hasObj()) { ++_specificStats.alreadyHasObj; return returnIfMatches(member, id, out); } // We need a valid loc to fetch from and this is the only state that has one. verify(WorkingSetMember::LOC_AND_IDX == member->state); verify(member->hasLoc()); Record* record = member->loc.rec(); const char* data = record->dataNoThrowing(); if (!recordInMemory(data)) { // member->loc points to a record that's NOT in memory. Pass a fetch request up. verify(WorkingSet::INVALID_ID == _idBeingPagedIn); _idBeingPagedIn = id; *out = id; ++_commonStats.needFetch; return PlanStage::NEED_FETCH; } else { // Don't need index data anymore as we have an obj. member->keyData.clear(); member->obj = BSONObj(data); member->state = WorkingSetMember::LOC_AND_UNOWNED_OBJ; return returnIfMatches(member, id, out); } } else { if (PlanStage::NEED_FETCH == status) { *out = id; ++_commonStats.needFetch; } else if (PlanStage::NEED_TIME == status) { ++_commonStats.needTime; } return status; } }
PlanStage::StageState ShardFilterStage::doWork(WorkingSetID* out) { // If we've returned as many results as we're limited to, isEOF will be true. if (isEOF()) { return PlanStage::IS_EOF; } StageState status = child()->work(out); if (PlanStage::ADVANCED == status) { // If we're sharded make sure that we don't return data that is not owned by us, // including pending documents from in-progress migrations and orphaned documents from // aborted migrations if (_metadata->isSharded()) { ShardKeyPattern shardKeyPattern(_metadata->getKeyPattern()); WorkingSetMember* member = _ws->get(*out); WorkingSetMatchableDocument matchable(member); BSONObj shardKey = shardKeyPattern.extractShardKeyFromMatchable(matchable); if (shardKey.isEmpty()) { // We can't find a shard key for this document - this should never happen with // a non-fetched result unless our query planning is screwed up if (!member->hasObj()) { Status status(ErrorCodes::InternalError, "shard key not found after a covered stage, " "query planning has failed"); // Fail loudly and cleanly in production, fatally in debug error() << redact(status); dassert(false); _ws->free(*out); *out = WorkingSetCommon::allocateStatusMember(_ws, status); return PlanStage::FAILURE; } // Skip this document with a warning - no shard key should not be possible // unless manually inserting data into a shard warning() << "no shard key found in document " << redact(member->obj.value()) << " " << "for shard key pattern " << _metadata->getKeyPattern() << ", " << "document may have been inserted manually into shard"; } if (!_metadata->keyBelongsToMe(shardKey)) { _ws->free(*out); ++_specificStats.chunkSkips; return PlanStage::NEED_TIME; } } // If we're here either we have shard state and our doc passed, or we have no shard // state. Either way, we advance. return status; } return status; }
PlanStage::StageState FetchStage::work(WorkingSetID* out) { ++_commonStats.works; // Adds the amount of time taken by work() to executionTimeMillis. ScopedTimer timer(&_commonStats.executionTimeMillis); if (isEOF()) { return PlanStage::IS_EOF; } // If we're here, we're not waiting for a DiskLoc to be fetched. Get another to-be-fetched // result from our child. WorkingSetID id = WorkingSet::INVALID_ID; StageState status = _child->work(&id); if (PlanStage::ADVANCED == status) { WorkingSetMember* member = _ws->get(id); // If there's an obj there, there is no fetching to perform. if (member->hasObj()) { ++_specificStats.alreadyHasObj; } else { // We need a valid loc to fetch from and this is the only state that has one. verify(WorkingSetMember::LOC_AND_IDX == member->state); verify(member->hasLoc()); // Don't need index data anymore as we have an obj. member->keyData.clear(); member->obj = _collection->docFor(member->loc); member->state = WorkingSetMember::LOC_AND_UNOWNED_OBJ; } ++_specificStats.docsExamined; return returnIfMatches(member, id, out); } else if (PlanStage::FAILURE == status) { *out = id; // If a stage fails, it may create a status WSM to indicate why it // failed, in which case 'id' is valid. If ID is invalid, we // create our own error message. if (WorkingSet::INVALID_ID == id) { mongoutils::str::stream ss; ss << "fetch stage failed to read in results from child"; Status status(ErrorCodes::InternalError, ss); *out = WorkingSetCommon::allocateStatusMember( _ws, status); } return status; } else { if (PlanStage::NEED_TIME == status) { ++_commonStats.needTime; } return status; } }
Runner::RunnerState MultiPlanRunner::getNext(BSONObj* objOut, DiskLoc* dlOut) { if (_killed) { return Runner::RUNNER_DEAD; } if (_failure) { return Runner::RUNNER_ERROR; } // If we haven't picked the best plan yet... if (NULL == _bestPlan) { if (!pickBestPlan(NULL)) { verify(_failure || _killed); if (_killed) { return Runner::RUNNER_DEAD; } if (_failure) { return Runner::RUNNER_ERROR; } } } if (!_alreadyProduced.empty()) { WorkingSetID id = _alreadyProduced.front(); _alreadyProduced.pop_front(); WorkingSetMember* member = _bestPlan->getWorkingSet()->get(id); // Note that this copies code from PlanExecutor. if (NULL != objOut) { if (WorkingSetMember::LOC_AND_IDX == member->state) { if (1 != member->keyData.size()) { _bestPlan->getWorkingSet()->free(id); return Runner::RUNNER_ERROR; } *objOut = member->keyData[0].keyData; } else if (member->hasObj()) { *objOut = member->obj; } else { // TODO: Checking the WSM for covered fields goes here. _bestPlan->getWorkingSet()->free(id); return Runner::RUNNER_ERROR; } } if (NULL != dlOut) { if (member->hasLoc()) { *dlOut = member->loc; } else { _bestPlan->getWorkingSet()->free(id); return Runner::RUNNER_ERROR; } } _bestPlan->getWorkingSet()->free(id); return Runner::RUNNER_ADVANCED; } return _bestPlan->getNext(objOut, dlOut); }
void run() { OldClientWriteContext ctx(&_txn, nss.ns()); addIndex(BSON("b" << 1 << "a" << 1)); addIndex(BSON("c" << 1 << "a" << 1)); BSONObj query = fromjson("{a: 1, $or: [{b: 2}, {c: 3}]}"); // Two of these documents match. insert(BSON("_id" << 1 << "a" << 1 << "b" << 2)); insert(BSON("_id" << 2 << "a" << 2 << "b" << 2)); insert(BSON("_id" << 3 << "a" << 1 << "c" << 3)); insert(BSON("_id" << 4 << "a" << 1 << "c" << 4)); auto qr = stdx::make_unique<QueryRequest>(nss); qr->setFilter(query); auto cq = unittest::assertGet(CanonicalQuery::canonicalize( txn(), std::move(qr), ExtensionsCallbackDisallowExtensions())); Collection* collection = ctx.getCollection(); // Get planner params. QueryPlannerParams plannerParams; fillOutPlannerParams(&_txn, collection, cq.get(), &plannerParams); WorkingSet ws; std::unique_ptr<SubplanStage> subplan( new SubplanStage(&_txn, collection, &ws, plannerParams, cq.get())); // Plan selection should succeed due to falling back on regular planning. PlanYieldPolicy yieldPolicy(PlanExecutor::YIELD_MANUAL, _clock); ASSERT_OK(subplan->pickBestPlan(&yieldPolicy)); // Work the stage until it produces all results. size_t numResults = 0; PlanStage::StageState stageState = PlanStage::NEED_TIME; while (stageState != PlanStage::IS_EOF) { WorkingSetID id = WorkingSet::INVALID_ID; stageState = subplan->work(&id); ASSERT_NE(stageState, PlanStage::DEAD); ASSERT_NE(stageState, PlanStage::FAILURE); if (stageState == PlanStage::ADVANCED) { ++numResults; WorkingSetMember* member = ws.get(id); ASSERT(member->hasObj()); ASSERT(member->obj.value() == BSON("_id" << 1 << "a" << 1 << "b" << 2) || member->obj.value() == BSON("_id" << 3 << "a" << 1 << "c" << 3)); } } ASSERT_EQ(numResults, 2U); }
/** * Returns a vector of all of the documents currently in 'collection'. * * Uses a forward collection scan stage to get the docs, and populates 'out' with * the results. */ void getCollContents(Collection* collection, vector<BSONObj>* out) { WorkingSet ws; CollectionScanParams params; params.direction = CollectionScanParams::FORWARD; params.tailable = false; unique_ptr<CollectionScan> scan(new CollectionScan(&_opCtx, collection, params, &ws, NULL)); while (!scan->isEOF()) { WorkingSetID id = WorkingSet::INVALID_ID; PlanStage::StageState state = scan->work(&id); if (PlanStage::ADVANCED == state) { WorkingSetMember* member = ws.get(id); verify(member->hasObj()); out->push_back(member->obj.value().getOwned()); } } }
/** * Returns the projected value from the working set that would * be returned in the 'values' field of the distinct command result. * Limited to NumberInt BSON types because this is the only * BSON type used in this suite of tests. */ static int getIntFieldDotted(const WorkingSet& ws, WorkingSetID wsid, const std::string& field) { // For some reason (at least under OS X clang), we cannot refer to INVALID_ID // inside the test assertion macro. WorkingSetID invalid = WorkingSet::INVALID_ID; ASSERT_NOT_EQUALS(invalid, wsid); WorkingSetMember* member = ws.get(wsid); // Distinct hack execution is always covered. // Key value is retrieved from working set key data // instead of RecordId. ASSERT_FALSE(member->hasObj()); BSONElement keyElt; ASSERT_TRUE(member->getFieldDotted(field, &keyElt)); ASSERT_TRUE(keyElt.isNumber()); return keyElt.numberInt(); }
Status SortKeyGenerator::getSortKey(const WorkingSetMember& member, BSONObj* objOut) const { StatusWith<BSONObj> sortKey = BSONObj(); if (member.hasObj()) { sortKey = getSortKeyFromObject(member); } else { sortKey = getSortKeyFromIndexKey(member); } if (!sortKey.isOK()) { return sortKey.getStatus(); } if (!_sortHasMeta) { *objOut = sortKey.getValue(); return Status::OK(); } BSONObjBuilder mergedKeyBob; // Merge metadata into the key. BSONObjIterator it(_rawSortSpec); BSONObjIterator sortKeyIt(sortKey.getValue()); while (it.more()) { BSONElement elt = it.next(); if (elt.isNumber()) { // Merge btree key elt. mergedKeyBob.append(sortKeyIt.next()); } else if (LiteParsedQuery::isTextScoreMeta(elt)) { // Add text score metadata double score = 0.0; if (member.hasComputed(WSM_COMPUTED_TEXT_SCORE)) { const TextScoreComputedData* scoreData = static_cast<const TextScoreComputedData*>( member.getComputed(WSM_COMPUTED_TEXT_SCORE)); score = scoreData->getScore(); } mergedKeyBob.append("$metaTextScore", score); } } *objOut = mergedKeyBob.obj(); return Status::OK(); }
PlanStage::StageState SortStage::doWork(WorkingSetID* out) { const size_t maxBytes = static_cast<size_t>(internalQueryExecMaxBlockingSortBytes); if (_memUsage > maxBytes) { mongoutils::str::stream ss; ss << "Sort operation used more than the maximum " << maxBytes << " bytes of RAM. Add an index, or specify a smaller limit."; Status status(ErrorCodes::OperationFailed, ss); *out = WorkingSetCommon::allocateStatusMember(_ws, status); return PlanStage::FAILURE; } if (isEOF()) { return PlanStage::IS_EOF; } // Still reading in results to sort. if (!_sorted) { WorkingSetID id = WorkingSet::INVALID_ID; StageState code = child()->work(&id); if (PlanStage::ADVANCED == code) { // Add it into the map for quick invalidation if it has a valid RecordId. // A RecordId may be invalidated at any time (during a yield). We need to get into // the WorkingSet as quickly as possible to handle it. WorkingSetMember* member = _ws->get(id); // Planner must put a fetch before we get here. verify(member->hasObj()); // We might be sorting something that was invalidated at some point. if (member->hasLoc()) { _wsidByDiskLoc[member->loc] = id; } SortableDataItem item; item.wsid = id; // We extract the sort key from the WSM's computed data. This must have been generated // by a SortKeyGeneratorStage descendent in the execution tree. auto sortKeyComputedData = static_cast<const SortKeyComputedData*>(member->getComputed(WSM_SORT_KEY)); item.sortKey = sortKeyComputedData->getSortKey(); if (member->hasLoc()) { // The RecordId breaks ties when sorting two WSMs with the same sort key. item.loc = member->loc; } addToBuffer(item); return PlanStage::NEED_TIME; } else if (PlanStage::IS_EOF == code) { // TODO: We don't need the lock for this. We could ask for a yield and do this work // unlocked. Also, this is performing a lot of work for one call to work(...) sortBuffer(); _resultIterator = _data.begin(); _sorted = true; return PlanStage::NEED_TIME; } else if (PlanStage::FAILURE == code || PlanStage::DEAD == code) { *out = id; // If a stage fails, it may create a status WSM to indicate why it // failed, in which case 'id' is valid. If ID is invalid, we // create our own error message. if (WorkingSet::INVALID_ID == id) { mongoutils::str::stream ss; ss << "sort stage failed to read in results to sort from child"; Status status(ErrorCodes::InternalError, ss); *out = WorkingSetCommon::allocateStatusMember(_ws, status); } return code; } else if (PlanStage::NEED_YIELD == code) { *out = id; } return code; } // Returning results. verify(_resultIterator != _data.end()); verify(_sorted); *out = _resultIterator->wsid; _resultIterator++; // If we're returning something, take it out of our DL -> WSID map so that future // calls to invalidate don't cause us to take action for a DL we're done with. WorkingSetMember* member = _ws->get(*out); if (member->hasLoc()) { _wsidByDiskLoc.erase(member->loc); } return PlanStage::ADVANCED; }
Runner::RunnerState MultiPlanRunner::getNext(BSONObj* objOut, DiskLoc* dlOut) { if (_killed) { return Runner::RUNNER_DEAD; } if (_failure) { return Runner::RUNNER_ERROR; } // If we haven't picked the best plan yet... if (NULL == _bestPlan) { if (!pickBestPlan(NULL, objOut)) { verify(_failure || _killed); if (_killed) { return Runner::RUNNER_DEAD; } if (_failure) { return Runner::RUNNER_ERROR; } } } // Look for an already produced result that provides the data the caller wants. while (!_alreadyProduced.empty()) { WorkingSetID id = _alreadyProduced.front(); _alreadyProduced.pop_front(); WorkingSetMember* member = _bestPlan->getWorkingSet()->get(id); // Note that this copies code from PlanExecutor. if (NULL != objOut) { if (WorkingSetMember::LOC_AND_IDX == member->state) { if (1 != member->keyData.size()) { _bestPlan->getWorkingSet()->free(id); // If the caller needs the key data and the WSM doesn't have it, drop the // result and carry on. continue; } *objOut = member->keyData[0].keyData; } else if (member->hasObj()) { *objOut = member->obj; } else { // If the caller needs an object and the WSM doesn't have it, drop and // try the next result. _bestPlan->getWorkingSet()->free(id); continue; } } if (NULL != dlOut) { if (member->hasLoc()) { *dlOut = member->loc; } else { // If the caller needs a DiskLoc and the WSM doesn't have it, drop and carry on. _bestPlan->getWorkingSet()->free(id); continue; } } // If we're here, the caller has all the data needed and we've set the out // parameters. Remove the result from the WorkingSet. _bestPlan->getWorkingSet()->free(id); return Runner::RUNNER_ADVANCED; } RunnerState state = _bestPlan->getNext(objOut, dlOut); if (Runner::RUNNER_ERROR == state && (NULL != _backupSolution)) { QLOG() << "Best plan errored out switching to backup\n"; // Uncache the bad solution if we fall back // on the backup solution. // // XXX: Instead of uncaching we should find a way for the // cached plan runner to fall back on a different solution // if the best solution fails. Alternatively we could try to // defer cache insertion to be after the first produced result. Database* db = cc().database(); verify(NULL != db); Collection* collection = db->getCollection(_query->ns()); verify(NULL != collection); PlanCache* cache = collection->infoCache()->getPlanCache(); cache->remove(*_query); _bestPlan.reset(_backupPlan); _backupPlan = NULL; _bestSolution.reset(_backupSolution); _backupSolution = NULL; _alreadyProduced = _backupAlreadyProduced; return getNext(objOut, dlOut); } if (NULL != _backupSolution && Runner::RUNNER_ADVANCED == state) { QLOG() << "Best plan had a blocking sort, became unblocked, deleting backup plan\n"; delete _backupSolution; delete _backupPlan; _backupSolution = NULL; _backupPlan = NULL; // TODO: free from WS? _backupAlreadyProduced.clear(); } return state; }
PlanStage::StageState FetchStage::work(WorkingSetID* out) { ++_commonStats.works; // Adds the amount of time taken by work() to executionTimeMillis. ScopedTimer timer(&_commonStats.executionTimeMillis); if (isEOF()) { return PlanStage::IS_EOF; } // Either retry the last WSM we worked on or get a new one from our child. WorkingSetID id; StageState status; if (_idRetrying == WorkingSet::INVALID_ID) { status = child()->work(&id); } else { status = ADVANCED; id = _idRetrying; _idRetrying = WorkingSet::INVALID_ID; } if (PlanStage::ADVANCED == status) { WorkingSetMember* member = _ws->get(id); // If there's an obj there, there is no fetching to perform. if (member->hasObj()) { ++_specificStats.alreadyHasObj; } else { // We need a valid loc to fetch from and this is the only state that has one. verify(WorkingSetMember::LOC_AND_IDX == member->getState()); verify(member->hasLoc()); try { if (!_cursor) _cursor = _collection->getCursor(getOpCtx()); if (auto fetcher = _cursor->fetcherForId(member->loc)) { // There's something to fetch. Hand the fetcher off to the WSM, and pass up // a fetch request. _idRetrying = id; member->setFetcher(fetcher.release()); *out = id; _commonStats.needYield++; return NEED_YIELD; } // The doc is already in memory, so go ahead and grab it. Now we have a RecordId // as well as an unowned object if (!WorkingSetCommon::fetch(getOpCtx(), _ws, id, _cursor)) { _ws->free(id); _commonStats.needTime++; return NEED_TIME; } } catch (const WriteConflictException& wce) { _idRetrying = id; *out = WorkingSet::INVALID_ID; _commonStats.needYield++; return NEED_YIELD; } } return returnIfMatches(member, id, out); } else if (PlanStage::FAILURE == status || PlanStage::DEAD == status) { *out = id; // If a stage fails, it may create a status WSM to indicate why it // failed, in which case 'id' is valid. If ID is invalid, we // create our own error message. if (WorkingSet::INVALID_ID == id) { mongoutils::str::stream ss; ss << "fetch stage failed to read in results from child"; Status status(ErrorCodes::InternalError, ss); *out = WorkingSetCommon::allocateStatusMember(_ws, status); } return status; } else if (PlanStage::NEED_TIME == status) { ++_commonStats.needTime; } else if (PlanStage::NEED_YIELD == status) { ++_commonStats.needYield; *out = id; } return status; }
PlanStage::StageState TextStage::addTerm(WorkingSetID wsid, WorkingSetID* out) { WorkingSetMember* wsm = _ws->get(wsid); invariant(wsm->state == WorkingSetMember::LOC_AND_IDX); invariant(1 == wsm->keyData.size()); const IndexKeyDatum newKeyData = wsm->keyData.back(); // copy to keep it around. TextRecordData* textRecordData = &_scores[wsm->loc]; double* documentAggregateScore = &textRecordData->score; if (WorkingSet::INVALID_ID == textRecordData->wsid) { // We haven't seen this RecordId before. Keep the working set member around // (it may be force-fetched on saveState()). textRecordData->wsid = wsid; if (_filter) { // We have not seen this document before and need to apply a filter. bool shouldKeep; bool wasDeleted = false; try { TextMatchableDocument tdoc( _txn, newKeyData.indexKeyPattern, newKeyData.keyData, wsm, _recordCursor); shouldKeep = _filter->matches(&tdoc); } catch (const WriteConflictException& wce) { _idRetrying = wsid; *out = WorkingSet::INVALID_ID; return NEED_YIELD; } catch (const TextMatchableDocument::DocumentDeletedException&) { // We attempted to fetch the document but decided it should be excluded from the // result set. shouldKeep = false; wasDeleted = true; } if (!shouldKeep) { if (wasDeleted || wsm->hasObj()) { // We had to fetch but we're not going to return it. ++_specificStats.fetches; } _ws->free(textRecordData->wsid); textRecordData->wsid = WorkingSet::INVALID_ID; *documentAggregateScore = -1; return NEED_TIME; } } else { // If we're here, we're going to return the doc, and we do a fetch later. ++_specificStats.fetches; } } else { // We already have a working set member for this RecordId. Free the new // WSM and retrieve the old one. // Note that since we don't keep all index keys, we could get a score that doesn't match // the document, but this has always been a problem. // TODO something to improve the situation. invariant(wsid != textRecordData->wsid); _ws->free(wsid); wsm = _ws->get(textRecordData->wsid); } ++_specificStats.keysExamined; if (*documentAggregateScore < 0) { // We have already rejected this document for not matching the filter. return NEED_TIME; } // Locate score within possibly compound key: {prefix,term,score,suffix}. BSONObjIterator keyIt(newKeyData.keyData); for (unsigned i = 0; i < _params.spec.numExtraBefore(); i++) { keyIt.next(); } keyIt.next(); // Skip past 'term'. BSONElement scoreElement = keyIt.next(); double documentTermScore = scoreElement.number(); // Aggregate relevance score, term keys. *documentAggregateScore += documentTermScore; return NEED_TIME; }
void run() { // Data is just a single {_id: 1, a: 1, b: 1} document. insert(BSON("_id" << 1 << "a" << 1 << "b" << 1)); // Indices on 'a' and 'b'. addIndex(BSON("a" << 1)); addIndex(BSON("b" << 1)); AutoGetCollectionForRead ctx(&_txn, nss.ns()); Collection* collection = ctx.getCollection(); // Query for both 'a' and 'b' and sort on 'b'. auto statusWithCQ = CanonicalQuery::canonicalize(nss, BSON("a" << 1 << "b" << 1), // query BSON("b" << 1), // sort BSONObj()); // proj verify(statusWithCQ.isOK()); unique_ptr<CanonicalQuery> cq = std::move(statusWithCQ.getValue()); ASSERT(NULL != cq.get()); // Force index intersection. bool forceIxisectOldValue = internalQueryForceIntersectionPlans; internalQueryForceIntersectionPlans = true; // Get planner params. QueryPlannerParams plannerParams; fillOutPlannerParams(&_txn, collection, cq.get(), &plannerParams); // Turn this off otherwise it pops up in some plans. plannerParams.options &= ~QueryPlannerParams::KEEP_MUTATIONS; // Plan. vector<QuerySolution*> solutions; Status status = QueryPlanner::plan(*cq, plannerParams, &solutions); ASSERT(status.isOK()); // We expect a plan using index {a: 1} and plan using index {b: 1} and // an index intersection plan. ASSERT_EQUALS(solutions.size(), 3U); // Fill out the MultiPlanStage. unique_ptr<MultiPlanStage> mps(new MultiPlanStage(&_txn, collection, cq.get())); unique_ptr<WorkingSet> ws(new WorkingSet()); // Put each solution from the planner into the MPR. for (size_t i = 0; i < solutions.size(); ++i) { PlanStage* root; ASSERT(StageBuilder::build(&_txn, collection, *solutions[i], ws.get(), &root)); // Takes ownership of 'solutions[i]' and 'root'. mps->addPlan(solutions[i], root, ws.get()); } // This sets a backup plan. PlanYieldPolicy yieldPolicy(NULL, PlanExecutor::YIELD_MANUAL); mps->pickBestPlan(&yieldPolicy); ASSERT(mps->bestPlanChosen()); ASSERT(mps->hasBackupPlan()); // We should have picked the index intersection plan due to forcing ixisect. QuerySolution* soln = mps->bestSolution(); ASSERT(QueryPlannerTestLib::solutionMatches( "{sort: {pattern: {b: 1}, limit: 0, node: " "{fetch: {node: {andSorted: {nodes: [" "{ixscan: {filter: null, pattern: {a:1}}}," "{ixscan: {filter: null, pattern: {b:1}}}]}}}}}}", soln->root.get())); // Get the resulting document. PlanStage::StageState state = PlanStage::NEED_TIME; WorkingSetID wsid; while (state != PlanStage::ADVANCED) { state = mps->work(&wsid); } WorkingSetMember* member = ws->get(wsid); // Check the document returned by the query. ASSERT(member->hasObj()); BSONObj expectedDoc = BSON("_id" << 1 << "a" << 1 << "b" << 1); ASSERT(expectedDoc.woCompare(member->obj.value()) == 0); // The blocking plan became unblocked, so we should no longer have a backup plan, // and the winning plan should still be the index intersection one. ASSERT(!mps->hasBackupPlan()); soln = mps->bestSolution(); ASSERT(QueryPlannerTestLib::solutionMatches( "{sort: {pattern: {b: 1}, limit: 0, node: " "{fetch: {node: {andSorted: {nodes: [" "{ixscan: {filter: null, pattern: {a:1}}}," "{ixscan: {filter: null, pattern: {b:1}}}]}}}}}}", soln->root.get())); // Restore index intersection force parameter. internalQueryForceIntersectionPlans = forceIxisectOldValue; }
void run() { Client::WriteContext ctx(&_txn, ns()); Database* db = ctx.ctx().db(); Collection* coll = db->getCollection(&_txn, ns()); if (!coll) { coll = db->createCollection(&_txn, ns()); } fillData(); // The data we're going to later invalidate. set<DiskLoc> locs; getLocs(&locs, coll); // Build the mock scan stage which feeds the data. WorkingSet ws; auto_ptr<MockStage> ms(new MockStage(&ws)); insertVarietyOfObjects(ms.get(), coll); SortStageParams params; params.collection = coll; params.pattern = BSON("foo" << 1); params.limit = limit(); auto_ptr<SortStage> ss(new SortStage(&_txn, params, &ws, ms.get())); const int firstRead = 10; // Have sort read in data from the mock stage. for (int i = 0; i < firstRead; ++i) { WorkingSetID id = WorkingSet::INVALID_ID; PlanStage::StageState status = ss->work(&id); ASSERT_NOT_EQUALS(PlanStage::ADVANCED, status); } // We should have read in the first 'firstRead' locs. Invalidate the first. ss->saveState(); set<DiskLoc>::iterator it = locs.begin(); ss->invalidate(*it++, INVALIDATION_DELETION); ss->restoreState(&_txn); // Read the rest of the data from the mock stage. while (!ms->isEOF()) { WorkingSetID id = WorkingSet::INVALID_ID; ss->work(&id); } // Release to prevent double-deletion. ms.release(); // Let's just invalidate everything now. ss->saveState(); while (it != locs.end()) { ss->invalidate(*it++, INVALIDATION_DELETION); } ss->restoreState(&_txn); // Invalidation of data in the sort stage fetches it but passes it through. int count = 0; while (!ss->isEOF()) { WorkingSetID id = WorkingSet::INVALID_ID; PlanStage::StageState status = ss->work(&id); if (PlanStage::ADVANCED != status) { continue; } WorkingSetMember* member = ws.get(id); ASSERT(member->hasObj()); ASSERT(!member->hasLoc()); ++count; } ctx.commit(); // Returns all docs. ASSERT_EQUALS(limit() ? limit() : numObj(), count); }
PlanStage::StageState UpdateStage::doWork(WorkingSetID* out) { if (isEOF()) { return PlanStage::IS_EOF; } if (doneUpdating()) { // Even if we're done updating, we may have some inserting left to do. if (needInsert()) { // TODO we may want to handle WriteConflictException here. Currently we bounce it // out to a higher level since if this WCEs it is likely that we raced with another // upsert that may have matched our query, and therefore this may need to perform an // update rather than an insert. Bouncing to the higher level allows restarting the // query in this case. doInsert(); invariant(isEOF()); if (_params.request->shouldReturnNewDocs()) { // Want to return the document we just inserted, create it as a WorkingSetMember // so that we can return it. BSONObj newObj = _specificStats.objInserted; *out = _ws->allocate(); WorkingSetMember* member = _ws->get(*out); member->obj = Snapshotted<BSONObj>(getOpCtx()->recoveryUnit()->getSnapshotId(), newObj.getOwned()); member->transitionToOwnedObj(); return PlanStage::ADVANCED; } } // At this point either we're done updating and there was no insert to do, // or we're done updating and we're done inserting. Either way, we're EOF. invariant(isEOF()); return PlanStage::IS_EOF; } // If we're here, then we still have to ask for results from the child and apply // updates to them. We should only get here if the collection exists. invariant(_collection); // It is possible that after an update was applied, a WriteConflictException // occurred and prevented us from returning ADVANCED with the requested version // of the document. if (_idReturning != WorkingSet::INVALID_ID) { // We should only get here if we were trying to return something before. invariant(_params.request->shouldReturnAnyDocs()); WorkingSetMember* member = _ws->get(_idReturning); invariant(member->getState() == WorkingSetMember::OWNED_OBJ); *out = _idReturning; _idReturning = WorkingSet::INVALID_ID; return PlanStage::ADVANCED; } // Either retry the last WSM we worked on or get a new one from our child. WorkingSetID id; StageState status; if (_idRetrying == WorkingSet::INVALID_ID) { status = child()->work(&id); } else { status = ADVANCED; id = _idRetrying; _idRetrying = WorkingSet::INVALID_ID; } if (PlanStage::ADVANCED == status) { // Need to get these things from the result returned by the child. RecordId recordId; WorkingSetMember* member = _ws->get(id); // We want to free this member when we return, unless we need to retry updating or returning // it. ScopeGuard memberFreer = MakeGuard(&WorkingSet::free, _ws, id); if (!member->hasRecordId()) { // We expect to be here because of an invalidation causing a force-fetch. ++_specificStats.nInvalidateSkips; return PlanStage::NEED_TIME; } recordId = member->recordId; // Updates can't have projections. This means that covering analysis will always add // a fetch. We should always get fetched data, and never just key data. invariant(member->hasObj()); // We fill this with the new RecordIds of moved doc so we don't double-update. if (_updatedRecordIds && _updatedRecordIds->count(recordId) > 0) { // Found a RecordId that refers to a document we had already updated. Note that // we can never remove from _updatedRecordIds because updates by other clients // could cause us to encounter a document again later. return PlanStage::NEED_TIME; } bool docStillMatches; try { docStillMatches = write_stage_common::ensureStillMatches( _collection, getOpCtx(), _ws, id, _params.canonicalQuery); } catch (const WriteConflictException&) { // There was a problem trying to detect if the document still exists, so retry. memberFreer.Dismiss(); return prepareToRetryWSM(id, out); } if (!docStillMatches) { // Either the document has been deleted, or it has been updated such that it no longer // matches the predicate. if (shouldRestartUpdateIfNoLongerMatches(_params)) { throw WriteConflictException(); } return PlanStage::NEED_TIME; } // Ensure that the BSONObj underlying the WorkingSetMember is owned because saveState() // is allowed to free the memory. member->makeObjOwnedIfNeeded(); // Save state before making changes WorkingSetCommon::prepareForSnapshotChange(_ws); try { child()->saveState(); } catch (const WriteConflictException&) { std::terminate(); } // If we care about the pre-updated version of the doc, save it out here. BSONObj oldObj; if (_params.request->shouldReturnOldDocs()) { oldObj = member->obj.value().getOwned(); } BSONObj newObj; try { // Do the update, get us the new version of the doc. newObj = transformAndUpdate(member->obj, recordId); } catch (const WriteConflictException&) { memberFreer.Dismiss(); // Keep this member around so we can retry updating it. return prepareToRetryWSM(id, out); } // Set member's obj to be the doc we want to return. if (_params.request->shouldReturnAnyDocs()) { if (_params.request->shouldReturnNewDocs()) { member->obj = Snapshotted<BSONObj>(getOpCtx()->recoveryUnit()->getSnapshotId(), newObj.getOwned()); } else { invariant(_params.request->shouldReturnOldDocs()); member->obj.setValue(oldObj); } member->recordId = RecordId(); member->transitionToOwnedObj(); } // This should be after transformAndUpdate to make sure we actually updated this doc. ++_specificStats.nMatched; // Restore state after modification // As restoreState may restore (recreate) cursors, make sure to restore the // state outside of the WritUnitOfWork. try { child()->restoreState(); } catch (const WriteConflictException&) { // Note we don't need to retry updating anything in this case since the update // already was committed. However, we still need to return the updated document // (if it was requested). if (_params.request->shouldReturnAnyDocs()) { // member->obj should refer to the document we want to return. invariant(member->getState() == WorkingSetMember::OWNED_OBJ); _idReturning = id; // Keep this member around so that we can return it on the next work() call. memberFreer.Dismiss(); } *out = WorkingSet::INVALID_ID; return NEED_YIELD; } if (_params.request->shouldReturnAnyDocs()) { // member->obj should refer to the document we want to return. invariant(member->getState() == WorkingSetMember::OWNED_OBJ); memberFreer.Dismiss(); // Keep this member around so we can return it. *out = id; return PlanStage::ADVANCED; } return PlanStage::NEED_TIME; } else if (PlanStage::IS_EOF == status) { // The child is out of results, but we might not be done yet because we still might // have to do an insert. return PlanStage::NEED_TIME; } else if (PlanStage::FAILURE == status) { *out = id; // If a stage fails, it may create a status WSM to indicate why it failed, in which case // 'id' is valid. If ID is invalid, we create our own error message. if (WorkingSet::INVALID_ID == id) { const std::string errmsg = "update stage failed to read in results from child"; *out = WorkingSetCommon::allocateStatusMember( _ws, Status(ErrorCodes::InternalError, errmsg)); return PlanStage::FAILURE; } return status; } else if (PlanStage::NEED_YIELD == status) { *out = id; } return status; }
void run() { OldClientWriteContext ctx(&_txn, ns()); Database* db = ctx.db(); Collection* coll = db->getCollection(ns()); if (!coll) { WriteUnitOfWork wuow(&_txn); coll = db->createCollection(&_txn, ns()); wuow.commit(); } WorkingSet ws; // Sort by foo:1 MergeSortStageParams msparams; msparams.pattern = BSON("foo" << 1); auto ms = make_unique<MergeSortStage>(&_txn, msparams, &ws, coll); IndexScanParams params; params.bounds.isSimpleRange = true; params.bounds.startKey = objWithMinKey(1); params.bounds.endKey = objWithMaxKey(1); params.bounds.endKeyInclusive = true; params.direction = 1; // Index 'a'+i has foo equal to 'i'. int numIndices = 20; for (int i = 0; i < numIndices; ++i) { // 'a', 'b', ... string index(1, 'a' + i); insert(BSON(index << 1 << "foo" << i)); BSONObj indexSpec = BSON(index << 1 << "foo" << 1); addIndex(indexSpec); params.descriptor = getIndex(indexSpec, coll); ms->addChild(new IndexScan(&_txn, params, &ws, NULL)); } set<RecordId> recordIds; getRecordIds(&recordIds, coll); set<RecordId>::iterator it = recordIds.begin(); // Get 10 results. Should be getting results in order of 'recordIds'. int count = 0; while (!ms->isEOF() && count < 10) { WorkingSetID id = WorkingSet::INVALID_ID; PlanStage::StageState status = ms->work(&id); if (PlanStage::ADVANCED != status) { continue; } WorkingSetMember* member = ws.get(id); ASSERT_EQUALS(member->recordId, *it); BSONElement elt; string index(1, 'a' + count); ASSERT(member->getFieldDotted(index, &elt)); ASSERT_EQUALS(1, elt.numberInt()); ASSERT(member->getFieldDotted("foo", &elt)); ASSERT_EQUALS(count, elt.numberInt()); ++count; ++it; } // Invalidate recordIds[11]. Should force a fetch and return the deleted document. ms->saveState(); ms->invalidate(&_txn, *it, INVALIDATION_DELETION); ms->restoreState(); // Make sure recordIds[11] was fetched for us. { WorkingSetID id = WorkingSet::INVALID_ID; PlanStage::StageState status; do { status = ms->work(&id); } while (PlanStage::ADVANCED != status); WorkingSetMember* member = ws.get(id); ASSERT(!member->hasRecordId()); ASSERT(member->hasObj()); string index(1, 'a' + count); BSONElement elt; ASSERT_TRUE(member->getFieldDotted(index, &elt)); ASSERT_EQUALS(1, elt.numberInt()); ASSERT(member->getFieldDotted("foo", &elt)); ASSERT_EQUALS(count, elt.numberInt()); ++it; ++count; } // And get the rest. while (!ms->isEOF()) { WorkingSetID id = WorkingSet::INVALID_ID; PlanStage::StageState status = ms->work(&id); if (PlanStage::ADVANCED != status) { continue; } WorkingSetMember* member = ws.get(id); ASSERT_EQUALS(member->recordId, *it); BSONElement elt; string index(1, 'a' + count); ASSERT_TRUE(member->getFieldDotted(index, &elt)); ASSERT_EQUALS(1, elt.numberInt()); ASSERT(member->getFieldDotted("foo", &elt)); ASSERT_EQUALS(count, elt.numberInt()); ++count; ++it; } }
// static Status WorkingSetCommon::getMemberStatus(const WorkingSetMember& member) { invariant(member.hasObj()); return getMemberObjectStatus(member.obj.value()); }
PlanStage::StageState GroupStage::work(WorkingSetID* out) { ++_commonStats.works; ScopedTimer timer(&_commonStats.executionTimeMillis); if (isEOF()) { return PlanStage::IS_EOF; } // On the first call to work(), call initGroupScripting(). if (_groupState == GroupState_Initializing) { Status status = initGroupScripting(); if (!status.isOK()) { *out = WorkingSetCommon::allocateStatusMember(_ws, status); return PlanStage::FAILURE; } _groupState = GroupState_ReadingFromChild; ++_commonStats.needTime; return PlanStage::NEED_TIME; } // Otherwise, read from our child. invariant(_groupState == GroupState_ReadingFromChild); WorkingSetID id = WorkingSet::INVALID_ID; StageState state = child()->work(&id); if (PlanStage::NEED_TIME == state) { ++_commonStats.needTime; return state; } else if (PlanStage::NEED_YIELD == state) { ++_commonStats.needYield; *out = id; return state; } else if (PlanStage::FAILURE == state) { *out = id; // If a stage fails, it may create a status WSM to indicate why it failed, in which // case 'id' is valid. If ID is invalid, we create our own error message. if (WorkingSet::INVALID_ID == id) { const std::string errmsg = "group stage failed to read in results from child"; *out = WorkingSetCommon::allocateStatusMember( _ws, Status(ErrorCodes::InternalError, errmsg)); } return state; } else if (PlanStage::DEAD == state) { return state; } else if (PlanStage::ADVANCED == state) { WorkingSetMember* member = _ws->get(id); // Group queries can't have projections. This means that covering analysis will always // add a fetch. We should always get fetched data, and never just key data. invariant(member->hasObj()); Status status = processObject(member->obj.value()); if (!status.isOK()) { *out = WorkingSetCommon::allocateStatusMember(_ws, status); return PlanStage::FAILURE; } _ws->free(id); ++_commonStats.needTime; return PlanStage::NEED_TIME; } else { // We're done reading from our child. invariant(PlanStage::IS_EOF == state); auto results = finalizeResults(); if (!results.isOK()) { *out = WorkingSetCommon::allocateStatusMember(_ws, results.getStatus()); return PlanStage::FAILURE; } // Transition to state "done." Future calls to work() will return IS_EOF. _groupState = GroupState_Done; *out = _ws->allocate(); WorkingSetMember* member = _ws->get(*out); member->obj = Snapshotted<BSONObj>(SnapshotId(), results.getValue()); member->transitionToOwnedObj(); ++_commonStats.advanced; return PlanStage::ADVANCED; } }
PlanStage::StageState TextOrStage::addTerm(WorkingSetID wsid, WorkingSetID* out) { WorkingSetMember* wsm = _ws->get(wsid); invariant(wsm->getState() == WorkingSetMember::RID_AND_IDX); invariant(1 == wsm->keyData.size()); const IndexKeyDatum newKeyData = wsm->keyData.back(); // copy to keep it around. TextRecordData* textRecordData = &_scores[wsm->recordId]; if (textRecordData->score < 0) { // We have already rejected this document for not matching the filter. invariant(WorkingSet::INVALID_ID == textRecordData->wsid); _ws->free(wsid); return NEED_TIME; } if (WorkingSet::INVALID_ID == textRecordData->wsid) { // We haven't seen this RecordId before. invariant(textRecordData->score == 0); bool shouldKeep = true; if (_filter) { // We have not seen this document before and need to apply a filter. bool wasDeleted = false; try { TextMatchableDocument tdoc(getOpCtx(), newKeyData.indexKeyPattern, newKeyData.keyData, _ws, wsid, _recordCursor); shouldKeep = _filter->matches(&tdoc); } catch (const WriteConflictException& wce) { // Ensure that the BSONObj underlying the WorkingSetMember is owned because it may // be freed when we yield. wsm->makeObjOwnedIfNeeded(); _idRetrying = wsid; *out = WorkingSet::INVALID_ID; return NEED_YIELD; } catch (const TextMatchableDocument::DocumentDeletedException&) { // We attempted to fetch the document but decided it should be excluded from the // result set. shouldKeep = false; wasDeleted = true; } if (wasDeleted || wsm->hasObj()) { ++_specificStats.fetches; } } if (shouldKeep && !wsm->hasObj()) { // Our parent expects RID_AND_OBJ members, so we fetch the document here if we haven't // already. try { shouldKeep = WorkingSetCommon::fetch(getOpCtx(), _ws, wsid, _recordCursor); ++_specificStats.fetches; } catch (const WriteConflictException& wce) { wsm->makeObjOwnedIfNeeded(); _idRetrying = wsid; *out = WorkingSet::INVALID_ID; return NEED_YIELD; } } if (!shouldKeep) { _ws->free(wsid); textRecordData->score = -1; return NEED_TIME; } textRecordData->wsid = wsid; // Ensure that the BSONObj underlying the WorkingSetMember is owned in case we yield. wsm->makeObjOwnedIfNeeded(); } else { // We already have a working set member for this RecordId. Free the new WSM and retrieve the // old one. Note that since we don't keep all index keys, we could get a score that doesn't // match the document, but this has always been a problem. // TODO something to improve the situation. invariant(wsid != textRecordData->wsid); _ws->free(wsid); wsm = _ws->get(textRecordData->wsid); } // Locate score within possibly compound key: {prefix,term,score,suffix}. BSONObjIterator keyIt(newKeyData.keyData); for (unsigned i = 0; i < _ftsSpec.numExtraBefore(); i++) { keyIt.next(); } keyIt.next(); // Skip past 'term'. BSONElement scoreElement = keyIt.next(); double documentTermScore = scoreElement.number(); // Aggregate relevance score, term keys. textRecordData->score += documentTermScore; return NEED_TIME; }
Runner::RunnerState PlanExecutor::getNext(BSONObj* objOut, DiskLoc* dlOut) { if (_killed) { return Runner::RUNNER_DEAD; } for (;;) { WorkingSetID id = WorkingSet::INVALID_ID; PlanStage::StageState code = _root->work(&id); if (PlanStage::ADVANCED == code) { // Fast count. if (WorkingSet::INVALID_ID == id) { invariant(NULL == objOut); invariant(NULL == dlOut); return Runner::RUNNER_ADVANCED; } WorkingSetMember* member = _workingSet->get(id); bool hasRequestedData = true; if (NULL != objOut) { if (WorkingSetMember::LOC_AND_IDX == member->state) { if (1 != member->keyData.size()) { _workingSet->free(id); hasRequestedData = false; } else { *objOut = member->keyData[0].keyData; } } else if (member->hasObj()) { *objOut = member->obj; } else { _workingSet->free(id); hasRequestedData = false; } } if (NULL != dlOut) { if (member->hasLoc()) { *dlOut = member->loc; } else { _workingSet->free(id); hasRequestedData = false; } } if (hasRequestedData) { _workingSet->free(id); return Runner::RUNNER_ADVANCED; } // This result didn't have the data the caller wanted, try again. } else if (PlanStage::NEED_TIME == code) { // Fall through to yield check at end of large conditional. } else if (PlanStage::NEED_FETCH == code) { // id has a loc and refers to an obj we need to fetch. WorkingSetMember* member = _workingSet->get(id); // This must be true for somebody to request a fetch and can only change when an // invalidation happens, which is when we give up a lock. Don't give up the // lock between receiving the NEED_FETCH and actually fetching(?). verify(member->hasLoc()); // XXX: remove NEED_FETCH } else if (PlanStage::IS_EOF == code) { return Runner::RUNNER_EOF; } else if (PlanStage::DEAD == code) { return Runner::RUNNER_DEAD; } else { verify(PlanStage::FAILURE == code); if (NULL != objOut) { WorkingSetCommon::getStatusMemberObject(*_workingSet, id, objOut); } return Runner::RUNNER_ERROR; } } }
PlanStage::StageState S2NearStage::addResultToQueue(WorkingSetID* out) { PlanStage::StageState state = _child->work(out); // All done reading from _child. if (PlanStage::IS_EOF == state) { _child.reset(); // Adjust the annulus size depending on how many results we got. if (_results.empty()) { _radiusIncrement *= 2; } else if (_results.size() < 300) { _radiusIncrement *= 2; } else if (_results.size() > 600) { _radiusIncrement /= 2; } // Make a new ixscan next time. return PlanStage::NEED_TIME; } // Nothing to do unless we advance. if (PlanStage::ADVANCED != state) { return state; } // TODO Speed improvements: // // 0. Modify fetch to preserve key data and test for intersection w/annulus. // // 1. keep track of what we've seen in this scan and possibly ignore it. // // 2. keep track of results we've returned before and ignore them. WorkingSetMember* member = _ws->get(*out); // Must have an object in order to get geometry out of it. verify(member->hasObj()); // Get all the fields with that name from the document. BSONElementSet geom; member->obj.getFieldsDotted(_params.nearQuery.field, geom, false); if (geom.empty()) {return PlanStage::NEED_TIME; } // Some value that any distance we can calculate will be less than. double minDistance = numeric_limits<double>::max(); BSONObj minDistanceObj; for (BSONElementSet::iterator git = geom.begin(); git != geom.end(); ++git) { if (!git->isABSONObj()) { mongoutils::str::stream ss; ss << "s2near stage read invalid geometry element " << *git << " from child"; Status status(ErrorCodes::InternalError, ss); *out = WorkingSetCommon::allocateStatusMember( _ws, status); return PlanStage::FAILURE; } BSONObj obj = git->Obj(); double distToObj; if (S2SearchUtil::distanceBetween(_params.nearQuery.centroid.point, obj, &distToObj)) { if (distToObj < minDistance) { minDistance = distToObj; minDistanceObj = obj; } } else { warning() << "unknown geometry: " << obj.toString(); } } // If the distance to the doc satisfies our distance criteria, add it to our buffered // results. if (minDistance >= _innerRadius && (_outerRadiusInclusive ? minDistance <= _outerRadius : minDistance < _outerRadius)) { _results.push(Result(*out, minDistance)); if (_params.addDistMeta) { member->addComputed(new GeoDistanceComputedData(minDistance)); } if (_params.addPointMeta) { member->addComputed(new GeoNearPointComputedData(minDistanceObj)); } if (member->hasLoc()) { _invalidationMap[member->loc] = *out; } } return PlanStage::NEED_TIME; }
PlanStage::StageState DeleteStage::work(WorkingSetID* out) { ++_commonStats.works; // Adds the amount of time taken by work() to executionTimeMillis. ScopedTimer timer(&_commonStats.executionTimeMillis); if (isEOF()) { return PlanStage::IS_EOF; } invariant(_collection); // If isEOF() returns false, we must have a collection. // It is possible that after a delete was executed, a WriteConflictException occurred // and prevented us from returning ADVANCED with the old version of the document. if (_idReturning != WorkingSet::INVALID_ID) { // We should only get here if we were trying to return something before. invariant(_params.returnDeleted); WorkingSetMember* member = _ws->get(_idReturning); invariant(member->getState() == WorkingSetMember::OWNED_OBJ); *out = _idReturning; _idReturning = WorkingSet::INVALID_ID; ++_commonStats.advanced; return PlanStage::ADVANCED; } // Either retry the last WSM we worked on or get a new one from our child. WorkingSetID id; StageState status; if (_idRetrying == WorkingSet::INVALID_ID) { status = child()->work(&id); } else { status = ADVANCED; id = _idRetrying; _idRetrying = WorkingSet::INVALID_ID; } if (PlanStage::ADVANCED == status) { WorkingSetMember* member = _ws->get(id); // We want to free this member when we return, unless we need to retry it. ScopeGuard memberFreer = MakeGuard(&WorkingSet::free, _ws, id); if (!member->hasLoc()) { // We expect to be here because of an invalidation causing a force-fetch, and // doc-locking storage engines do not issue invalidations. ++_specificStats.nInvalidateSkips; ++_commonStats.needTime; return PlanStage::NEED_TIME; } RecordId rloc = member->loc; // Deletes can't have projections. This means that covering analysis will always add // a fetch. We should always get fetched data, and never just key data. invariant(member->hasObj()); try { // If the snapshot changed, then we have to make sure we have the latest copy of the // doc and that it still matches. std::unique_ptr<RecordCursor> cursor; if (getOpCtx()->recoveryUnit()->getSnapshotId() != member->obj.snapshotId()) { cursor = _collection->getCursor(getOpCtx()); if (!WorkingSetCommon::fetch(getOpCtx(), _ws, id, cursor)) { // Doc is already deleted. Nothing more to do. ++_commonStats.needTime; return PlanStage::NEED_TIME; } // Make sure the re-fetched doc still matches the predicate. if (_params.canonicalQuery && !_params.canonicalQuery->root()->matchesBSON(member->obj.value(), NULL)) { // Doesn't match. ++_commonStats.needTime; return PlanStage::NEED_TIME; } } // Ensure that the BSONObj underlying the WorkingSetMember is owned because saveState() // is allowed to free the memory. if (_params.returnDeleted) { member->makeObjOwnedIfNeeded(); } // TODO: Do we want to buffer docs and delete them in a group rather than // saving/restoring state repeatedly? try { if (supportsDocLocking()) { // Doc-locking engines require this before saveState() since they don't use // invalidations. WorkingSetCommon::prepareForSnapshotChange(_ws); } child()->saveState(); } catch (const WriteConflictException& wce) { std::terminate(); } if (_params.returnDeleted) { // Save a copy of the document that is about to get deleted. BSONObj deletedDoc = member->obj.value(); member->obj.setValue(deletedDoc.getOwned()); member->loc = RecordId(); member->transitionToOwnedObj(); } // Do the write, unless this is an explain. if (!_params.isExplain) { WriteUnitOfWork wunit(getOpCtx()); _collection->deleteDocument(getOpCtx(), rloc); wunit.commit(); } ++_specificStats.docsDeleted; } catch (const WriteConflictException& wce) { // Ensure that the BSONObj underlying the WorkingSetMember is owned because it may be // freed when we yield. member->makeObjOwnedIfNeeded(); _idRetrying = id; memberFreer.Dismiss(); // Keep this member around so we can retry deleting it. *out = WorkingSet::INVALID_ID; _commonStats.needYield++; return NEED_YIELD; } // As restoreState may restore (recreate) cursors, cursors are tied to the // transaction in which they are created, and a WriteUnitOfWork is a // transaction, make sure to restore the state outside of the WritUnitOfWork. try { child()->restoreState(); } catch (const WriteConflictException& wce) { // Note we don't need to retry anything in this case since the delete already // was committed. However, we still need to return the deleted document // (if it was requested). if (_params.returnDeleted) { // member->obj should refer to the deleted document. invariant(member->getState() == WorkingSetMember::OWNED_OBJ); _idReturning = id; // Keep this member around so that we can return it on the next work() call. memberFreer.Dismiss(); } *out = WorkingSet::INVALID_ID; _commonStats.needYield++; return NEED_YIELD; } if (_params.returnDeleted) { // member->obj should refer to the deleted document. invariant(member->getState() == WorkingSetMember::OWNED_OBJ); memberFreer.Dismiss(); // Keep this member around so we can return it. *out = id; ++_commonStats.advanced; return PlanStage::ADVANCED; } ++_commonStats.needTime; return PlanStage::NEED_TIME; } else if (PlanStage::FAILURE == status || PlanStage::DEAD == status) { *out = id; // If a stage fails, it may create a status WSM to indicate why it failed, in which case // 'id' is valid. If ID is invalid, we create our own error message. if (WorkingSet::INVALID_ID == id) { const std::string errmsg = "delete stage failed to read in results from child"; *out = WorkingSetCommon::allocateStatusMember( _ws, Status(ErrorCodes::InternalError, errmsg)); } return status; } else if (PlanStage::NEED_TIME == status) { ++_commonStats.needTime; } else if (PlanStage::NEED_YIELD == status) { *out = id; ++_commonStats.needYield; } return status; }
PlanStage::StageState DeleteStage::doWork(WorkingSetID* out) { if (isEOF()) { return PlanStage::IS_EOF; } invariant(_collection); // If isEOF() returns false, we must have a collection. // It is possible that after a delete was executed, a WriteConflictException occurred // and prevented us from returning ADVANCED with the old version of the document. if (_idReturning != WorkingSet::INVALID_ID) { // We should only get here if we were trying to return something before. invariant(_params.returnDeleted); WorkingSetMember* member = _ws->get(_idReturning); invariant(member->getState() == WorkingSetMember::OWNED_OBJ); *out = _idReturning; _idReturning = WorkingSet::INVALID_ID; return PlanStage::ADVANCED; } // Either retry the last WSM we worked on or get a new one from our child. WorkingSetID id; if (_idRetrying != WorkingSet::INVALID_ID) { id = _idRetrying; _idRetrying = WorkingSet::INVALID_ID; } else { auto status = child()->work(&id); switch (status) { case PlanStage::ADVANCED: break; case PlanStage::FAILURE: case PlanStage::DEAD: // The stage which produces a failure is responsible for allocating a working set // member with error details. invariant(WorkingSet::INVALID_ID != id); *out = id; return status; case PlanStage::NEED_TIME: return status; case PlanStage::NEED_YIELD: *out = id; return status; case PlanStage::IS_EOF: return status; default: MONGO_UNREACHABLE; } } // We advanced, or are retrying, and id is set to the WSM to work on. WorkingSetMember* member = _ws->get(id); // We want to free this member when we return, unless we need to retry deleting or returning it. ScopeGuard memberFreer = MakeGuard(&WorkingSet::free, _ws, id); invariant(member->hasRecordId()); RecordId recordId = member->recordId; // Deletes can't have projections. This means that covering analysis will always add // a fetch. We should always get fetched data, and never just key data. invariant(member->hasObj()); // Ensure the document still exists and matches the predicate. bool docStillMatches; try { docStillMatches = write_stage_common::ensureStillMatches( _collection, getOpCtx(), _ws, id, _params.canonicalQuery); } catch (const WriteConflictException&) { // There was a problem trying to detect if the document still exists, so retry. memberFreer.Dismiss(); return prepareToRetryWSM(id, out); } if (!docStillMatches) { // Either the document has already been deleted, or it has been updated such that it no // longer matches the predicate. if (shouldRestartDeleteIfNoLongerMatches(_params)) { throw WriteConflictException(); } return PlanStage::NEED_TIME; } // Ensure that the BSONObj underlying the WorkingSetMember is owned because saveState() is // allowed to free the memory. if (_params.returnDeleted) { // Save a copy of the document that is about to get deleted, but keep it in the RID_AND_OBJ // state in case we need to retry deleting it. BSONObj deletedDoc = member->obj.value(); member->obj.setValue(deletedDoc.getOwned()); } // TODO: Do we want to buffer docs and delete them in a group rather than saving/restoring state // repeatedly? WorkingSetCommon::prepareForSnapshotChange(_ws); try { child()->saveState(); } catch (const WriteConflictException&) { std::terminate(); } // Do the write, unless this is an explain. if (!_params.isExplain) { try { WriteUnitOfWork wunit(getOpCtx()); _collection->deleteDocument(getOpCtx(), _params.stmtId, recordId, _params.opDebug, _params.fromMigrate, false, _params.returnDeleted ? Collection::StoreDeletedDoc::On : Collection::StoreDeletedDoc::Off); wunit.commit(); } catch (const WriteConflictException&) { memberFreer.Dismiss(); // Keep this member around so we can retry deleting it. return prepareToRetryWSM(id, out); } } ++_specificStats.docsDeleted; if (_params.returnDeleted) { // After deleting the document, the RecordId associated with this member is invalid. // Remove the 'recordId' from the WorkingSetMember before returning it. member->recordId = RecordId(); member->transitionToOwnedObj(); } // As restoreState may restore (recreate) cursors, cursors are tied to the transaction in which // they are created, and a WriteUnitOfWork is a transaction, make sure to restore the state // outside of the WriteUnitOfWork. try { child()->restoreState(); } catch (const WriteConflictException&) { // Note we don't need to retry anything in this case since the delete already was committed. // However, we still need to return the deleted document (if it was requested). if (_params.returnDeleted) { // member->obj should refer to the deleted document. invariant(member->getState() == WorkingSetMember::OWNED_OBJ); _idReturning = id; // Keep this member around so that we can return it on the next work() call. memberFreer.Dismiss(); } *out = WorkingSet::INVALID_ID; return NEED_YIELD; } if (_params.returnDeleted) { // member->obj should refer to the deleted document. invariant(member->getState() == WorkingSetMember::OWNED_OBJ); memberFreer.Dismiss(); // Keep this member around so we can return it. *out = id; return PlanStage::ADVANCED; } return PlanStage::NEED_TIME; }
PlanStage::StageState S2NearStage::addResultToQueue(WorkingSetID* out) { PlanStage::StageState state = _child->work(out); // All done reading from _child. if (PlanStage::IS_EOF == state) { _child.reset(); _keyGeoFilter.reset(); // Adjust the annulus size depending on how many results we got. if (_results.empty()) { _radiusIncrement *= 2; } else if (_results.size() < 300) { _radiusIncrement *= 2; } else if (_results.size() > 600) { _radiusIncrement /= 2; } // Make a new ixscan next time. return PlanStage::NEED_TIME; } // Nothing to do unless we advance. if (PlanStage::ADVANCED != state) { return state; } WorkingSetMember* member = _ws->get(*out); // Must have an object in order to get geometry out of it. verify(member->hasObj()); // The scans we use don't dedup so we must dedup them ourselves. We only put locs into here // if we know for sure whether or not we'll return them in this annulus. if (member->hasLoc()) { if (_seenInScan.end() != _seenInScan.find(member->loc)) { return PlanStage::NEED_TIME; } } // Get all the fields with that name from the document. BSONElementSet geom; member->obj.getFieldsDotted(_params.nearQuery.field, geom, false); if (geom.empty()) { return PlanStage::NEED_TIME; } // Some value that any distance we can calculate will be less than. double minDistance = numeric_limits<double>::max(); BSONObj minDistanceObj; for (BSONElementSet::iterator git = geom.begin(); git != geom.end(); ++git) { if (!git->isABSONObj()) { mongoutils::str::stream ss; ss << "s2near stage read invalid geometry element " << *git << " from child"; Status status(ErrorCodes::InternalError, ss); *out = WorkingSetCommon::allocateStatusMember( _ws, status); return PlanStage::FAILURE; } BSONObj obj = git->Obj(); double distToObj; if (S2SearchUtil::distanceBetween(_params.nearQuery.centroid.point, obj, &distToObj)) { if (distToObj < minDistance) { minDistance = distToObj; minDistanceObj = obj; } } else { warning() << "unknown geometry: " << obj.toString(); } } // If we're here we'll either include the doc in this annulus or reject it. It's safe to // ignore it if it pops up again in this annulus. if (member->hasLoc()) { _seenInScan.insert(member->loc); } // If the distance to the doc satisfies our distance criteria, add it to our buffered // results. if (minDistance >= _innerRadius && (_outerRadiusInclusive ? minDistance <= _outerRadius : minDistance < _outerRadius)) { _results.push(Result(*out, minDistance)); if (_params.addDistMeta) { // FLAT implies the output distances are in radians. Convert to meters. if (FLAT == _params.nearQuery.centroid.crs) { member->addComputed(new GeoDistanceComputedData(minDistance / kRadiusOfEarthInMeters)); } else { member->addComputed(new GeoDistanceComputedData(minDistance)); } } if (_params.addPointMeta) { member->addComputed(new GeoNearPointComputedData(minDistanceObj)); } if (member->hasLoc()) { _invalidationMap[member->loc] = *out; } } return PlanStage::NEED_TIME; }
PlanStage::StageState SortStage::work(WorkingSetID* out) { ++_commonStats.works; if (_memUsage > kMaxBytes) { return PlanStage::FAILURE; } if (isEOF()) { return PlanStage::IS_EOF; } // Still reading in results to sort. if (!_sorted) { WorkingSetID id; StageState code = _child->work(&id); if (PlanStage::ADVANCED == code) { // Add it into the map for quick invalidation if it has a valid DiskLoc. // A DiskLoc may be invalidated at any time (during a yield). We need to get into // the WorkingSet as quickly as possible to handle it. WorkingSetMember* member = _ws->get(id); if (member->hasLoc()) { _wsidByDiskLoc[member->loc] = id; } // Do some accounting to make sure we're not using too much memory. if (member->hasLoc()) { _memUsage += sizeof(DiskLoc); } // We are not supposed (yet) to sort over anything other than objects. In other // words, the query planner wouldn't put a sort atop anything that wouldn't have a // collection scan as a leaf. verify(member->hasObj()); _memUsage += member->obj.objsize(); // We will sort '_data' in the same order an index over '_pattern' would // have. This has very nuanced implications. Consider the sort pattern {a:1} // and the document {a:[1,10]}. We have potentially two keys we could use to // sort on. Here we extract these keys. In the next step we decide which one to // use. BSONObjCmp patternCmp(_pattern); BSONObjSet keys(patternCmp); // XXX keyGen will throw on a "parallel array" _keyGen->getKeys(member->obj, &keys); // dumpKeys(keys); // To decide which key to use in sorting, we consider not only the sort pattern // but also if a given key, matches the query. Assume a query {a: {$gte: 5}} and // a document {a:1}. That document wouldn't match. In the same sense, the key '1' // in an array {a: [1,10]} should not be considered as being part of the result // set and thus that array should sort based on the '10' key. To find such key, // we use the bounds for the query. BSONObj sortKey; for (BSONObjSet::const_iterator it = keys.begin(); it != keys.end(); ++it) { if (!_hasBounds) { sortKey = *it; break; } if (_boundsChecker->isValidKey(*it)) { sortKey = *it; break; } } if (sortKey.isEmpty()) { // We assume that if the document made it throught the sort stage, than it // matches the query and thus should contain at least on array item that // is within the query bounds. cout << "can't find bounds for obj " << member->obj.toString() << endl; cout << "bounds are " << _bounds.toString() << endl; verify(0); } // We let the data stay in the WorkingSet and sort using the selected portion // of the object in that working set member. SortableDataItem item; item.wsid = id; item.sortKey = sortKey; _data.push_back(item); ++_commonStats.needTime; return PlanStage::NEED_TIME; } else if (PlanStage::IS_EOF == code) { // TODO: We don't need the lock for this. We could ask for a yield and do this work // unlocked. Also, this is performing a lot of work for one call to work(...) std::sort(_data.begin(), _data.end(), *_cmp); _resultIterator = _data.begin(); _sorted = true; ++_commonStats.needTime; return PlanStage::NEED_TIME; } else { if (PlanStage::NEED_FETCH == code) { *out = id; ++_commonStats.needFetch; } else if (PlanStage::NEED_TIME == code) { ++_commonStats.needTime; } return code; } } // Returning results. verify(_resultIterator != _data.end()); verify(_sorted); *out = _resultIterator->wsid; _resultIterator++; // If we're returning something, take it out of our DL -> WSID map so that future // calls to invalidate don't cause us to take action for a DL we're done with. WorkingSetMember* member = _ws->get(*out); if (member->hasLoc()) { _wsidByDiskLoc.erase(member->loc); } // If it was flagged, we just drop it on the floor, assuming the caller wants a DiskLoc. We // could make this triggerable somehow. if (_ws->isFlagged(*out)) { _ws->free(*out); return PlanStage::NEED_TIME; } ++_commonStats.advanced; return PlanStage::ADVANCED; }
PlanStage::StageState DeleteStage::doWork(WorkingSetID* out) { if (isEOF()) { return PlanStage::IS_EOF; } invariant(_collection); // If isEOF() returns false, we must have a collection. // It is possible that after a delete was executed, a WriteConflictException occurred // and prevented us from returning ADVANCED with the old version of the document. if (_idReturning != WorkingSet::INVALID_ID) { // We should only get here if we were trying to return something before. invariant(_params.returnDeleted); WorkingSetMember* member = _ws->get(_idReturning); invariant(member->getState() == WorkingSetMember::OWNED_OBJ); *out = _idReturning; _idReturning = WorkingSet::INVALID_ID; return PlanStage::ADVANCED; } // Either retry the last WSM we worked on or get a new one from our child. WorkingSetID id; if (_idRetrying != WorkingSet::INVALID_ID) { id = _idRetrying; _idRetrying = WorkingSet::INVALID_ID; } else { auto status = child()->work(&id); switch (status) { case PlanStage::ADVANCED: break; case PlanStage::FAILURE: case PlanStage::DEAD: *out = id; // If a stage fails, it may create a status WSM to indicate why it failed, in which // case 'id' is valid. If ID is invalid, we create our own error message. if (WorkingSet::INVALID_ID == id) { const std::string errmsg = "delete stage failed to read in results from child"; *out = WorkingSetCommon::allocateStatusMember( _ws, Status(ErrorCodes::InternalError, errmsg)); } return status; case PlanStage::NEED_TIME: return status; case PlanStage::NEED_YIELD: *out = id; return status; case PlanStage::IS_EOF: return status; default: MONGO_UNREACHABLE; } } // We advanced, or are retrying, and id is set to the WSM to work on. WorkingSetMember* member = _ws->get(id); // We want to free this member when we return, unless we need to retry it. ScopeGuard memberFreer = MakeGuard(&WorkingSet::free, _ws, id); if (!member->hasRecordId()) { // We expect to be here because of an invalidation causing a force-fetch. ++_specificStats.nInvalidateSkips; return PlanStage::NEED_TIME; } RecordId recordId = member->recordId; // Deletes can't have projections. This means that covering analysis will always add // a fetch. We should always get fetched data, and never just key data. invariant(member->hasObj()); try { // If the snapshot changed, then we have to make sure we have the latest copy of the // doc and that it still matches. std::unique_ptr<SeekableRecordCursor> cursor; if (getOpCtx()->recoveryUnit()->getSnapshotId() != member->obj.snapshotId()) { cursor = _collection->getCursor(getOpCtx()); if (!WorkingSetCommon::fetch(getOpCtx(), _ws, id, cursor)) { // Doc is already deleted. Nothing more to do. return PlanStage::NEED_TIME; } // Make sure the re-fetched doc still matches the predicate. if (_params.canonicalQuery && !_params.canonicalQuery->root()->matchesBSON(member->obj.value(), NULL)) { // Doesn't match. return PlanStage::NEED_TIME; } } // Ensure that the BSONObj underlying the WorkingSetMember is owned because saveState() // is allowed to free the memory. if (_params.returnDeleted) { // Save a copy of the document that is about to get deleted, but keep it in the // RID_AND_OBJ state in case we need to retry deleting it. BSONObj deletedDoc = member->obj.value(); member->obj.setValue(deletedDoc.getOwned()); } // TODO: Do we want to buffer docs and delete them in a group rather than // saving/restoring state repeatedly? try { WorkingSetCommon::prepareForSnapshotChange(_ws); child()->saveState(); } catch (const WriteConflictException& wce) { std::terminate(); } // Do the write, unless this is an explain. if (!_params.isExplain) { WriteUnitOfWork wunit(getOpCtx()); _collection->deleteDocument(getOpCtx(), recordId, _params.fromMigrate); wunit.commit(); } ++_specificStats.docsDeleted; } catch (const WriteConflictException& wce) { // When we're doing a findAndModify with a sort, the sort will have a limit of 1, so will // not produce any more results even if there is another matching document. Re-throw the WCE // here so that these operations get another chance to find a matching document. The // findAndModify command should automatically retry if it gets a WCE. // TODO: this is not necessary if there was no sort specified. if (_params.returnDeleted) { throw; } _idRetrying = id; memberFreer.Dismiss(); // Keep this member around so we can retry deleting it. *out = WorkingSet::INVALID_ID; return NEED_YIELD; } if (_params.returnDeleted) { // After deleting the document, the RecordId associated with this member is invalid. // Remove the 'recordId' from the WorkingSetMember before returning it. member->recordId = RecordId(); member->transitionToOwnedObj(); } // As restoreState may restore (recreate) cursors, cursors are tied to the // transaction in which they are created, and a WriteUnitOfWork is a // transaction, make sure to restore the state outside of the WritUnitOfWork. try { child()->restoreState(); } catch (const WriteConflictException& wce) { // Note we don't need to retry anything in this case since the delete already // was committed. However, we still need to return the deleted document // (if it was requested). if (_params.returnDeleted) { // member->obj should refer to the deleted document. invariant(member->getState() == WorkingSetMember::OWNED_OBJ); _idReturning = id; // Keep this member around so that we can return it on the next work() call. memberFreer.Dismiss(); } *out = WorkingSet::INVALID_ID; return NEED_YIELD; } if (_params.returnDeleted) { // member->obj should refer to the deleted document. invariant(member->getState() == WorkingSetMember::OWNED_OBJ); memberFreer.Dismiss(); // Keep this member around so we can return it. *out = id; return PlanStage::ADVANCED; } return PlanStage::NEED_TIME; }
PlanStage::StageState SortStage::work(WorkingSetID* out) { ++_commonStats.works; // Adds the amount of time taken by work() to executionTimeMillis. ScopedTimer timer(&_commonStats.executionTimeMillis); if (NULL == _sortKeyGen) { // This is heavy and should be done as part of work(). _sortKeyGen.reset(new SortStageKeyGenerator(_collection, _pattern, _query)); _sortKeyComparator.reset(new WorkingSetComparator(_sortKeyGen->getSortComparator())); // If limit > 1, we need to initialize _dataSet here to maintain ordered // set of data items while fetching from the child stage. if (_limit > 1) { const WorkingSetComparator& cmp = *_sortKeyComparator; _dataSet.reset(new SortableDataItemSet(cmp)); } return PlanStage::NEED_TIME; } const size_t maxBytes = static_cast<size_t>(internalQueryExecMaxBlockingSortBytes); if (_memUsage > maxBytes) { mongoutils::str::stream ss; ss << "Sort operation used more than the maximum " << maxBytes << " bytes of RAM. Add an index, or specify a smaller limit."; Status status(ErrorCodes::OperationFailed, ss); *out = WorkingSetCommon::allocateStatusMember(_ws, status); return PlanStage::FAILURE; } if (isEOF()) { return PlanStage::IS_EOF; } // Still reading in results to sort. if (!_sorted) { WorkingSetID id = WorkingSet::INVALID_ID; StageState code = child()->work(&id); if (PlanStage::ADVANCED == code) { // Add it into the map for quick invalidation if it has a valid RecordId. // A RecordId may be invalidated at any time (during a yield). We need to get into // the WorkingSet as quickly as possible to handle it. WorkingSetMember* member = _ws->get(id); // Planner must put a fetch before we get here. verify(member->hasObj()); // We might be sorting something that was invalidated at some point. if (member->hasLoc()) { _wsidByDiskLoc[member->loc] = id; } // The data remains in the WorkingSet and we wrap the WSID with the sort key. SortableDataItem item; Status sortKeyStatus = _sortKeyGen->getSortKey(*member, &item.sortKey); if (!_sortKeyGen->getSortKey(*member, &item.sortKey).isOK()) { *out = WorkingSetCommon::allocateStatusMember(_ws, sortKeyStatus); return PlanStage::FAILURE; } item.wsid = id; if (member->hasLoc()) { // The RecordId breaks ties when sorting two WSMs with the same sort key. item.loc = member->loc; } addToBuffer(item); ++_commonStats.needTime; return PlanStage::NEED_TIME; } else if (PlanStage::IS_EOF == code) { // TODO: We don't need the lock for this. We could ask for a yield and do this work // unlocked. Also, this is performing a lot of work for one call to work(...) sortBuffer(); _resultIterator = _data.begin(); _sorted = true; ++_commonStats.needTime; return PlanStage::NEED_TIME; } else if (PlanStage::FAILURE == code || PlanStage::DEAD == code) { *out = id; // If a stage fails, it may create a status WSM to indicate why it // failed, in which case 'id' is valid. If ID is invalid, we // create our own error message. if (WorkingSet::INVALID_ID == id) { mongoutils::str::stream ss; ss << "sort stage failed to read in results to sort from child"; Status status(ErrorCodes::InternalError, ss); *out = WorkingSetCommon::allocateStatusMember(_ws, status); } return code; } else if (PlanStage::NEED_TIME == code) { ++_commonStats.needTime; } else if (PlanStage::NEED_YIELD == code) { ++_commonStats.needYield; *out = id; } return code; } // Returning results. verify(_resultIterator != _data.end()); verify(_sorted); *out = _resultIterator->wsid; _resultIterator++; // If we're returning something, take it out of our DL -> WSID map so that future // calls to invalidate don't cause us to take action for a DL we're done with. WorkingSetMember* member = _ws->get(*out); if (member->hasLoc()) { _wsidByDiskLoc.erase(member->loc); } ++_commonStats.advanced; return PlanStage::ADVANCED; }