void run() { OldClientWriteContext ctx(&_txn, ns()); Database* db = ctx.db(); Collection* coll = db->getCollection(ns()); if (!coll) { WriteUnitOfWork wuow(&_txn); coll = db->createCollection(&_txn, ns()); wuow.commit(); } WorkingSet ws; std::set<WorkingSetID> expectedResultIds; std::set<WorkingSetID> resultIds; // Create a KeepMutationsStage with an EOF child, and flag 50 objects. We expect these // objects to be returned by the KeepMutationsStage. MatchExpression* nullFilter = NULL; auto keep = make_unique<KeepMutationsStage>(&_txn, nullFilter, &ws, new EOFStage(&_txn)); for (size_t i = 0; i < 50; ++i) { WorkingSetID id = ws.allocate(); WorkingSetMember* member = ws.get(id); member->obj = Snapshotted<BSONObj>(SnapshotId(), BSON("x" << 1)); member->transitionToOwnedObj(); ws.flagForReview(id); expectedResultIds.insert(id); } // Call work() on the KeepMutationsStage. The stage should start streaming the // already-flagged objects. WorkingSetID id = getNextResult(keep.get()); resultIds.insert(id); // Flag more objects, then call work() again on the KeepMutationsStage, and expect none // of the newly-flagged objects to be returned (the KeepMutationsStage does not // incorporate objects flagged since the streaming phase started). // // This condition triggers SERVER-15580 (the new flagging causes a rehash of the // unordered_set "WorkingSet::_flagged", which invalidates all iterators, which were // previously being dereferenced in KeepMutationsStage::work()). // Note that stdx::unordered_set<>::insert() triggers a rehash if the new number of // elements is greater than or equal to max_load_factor()*bucket_count(). size_t rehashSize = static_cast<size_t>(ws.getFlagged().max_load_factor() * ws.getFlagged().bucket_count()); while (ws.getFlagged().size() <= rehashSize) { WorkingSetID id = ws.allocate(); WorkingSetMember* member = ws.get(id); member->obj = Snapshotted<BSONObj>(SnapshotId(), BSON("x" << 1)); member->transitionToOwnedObj(); ws.flagForReview(id); } while ((id = getNextResult(keep.get())) != WorkingSet::INVALID_ID) { resultIds.insert(id); } // Assert that only the first 50 objects were returned. ASSERT(expectedResultIds == resultIds); }
void run() { OldClientWriteContext ctx(&_opCtx, ns()); Database* db = ctx.db(); Collection* coll = db->getCollection(&_opCtx, ns()); if (!coll) { WriteUnitOfWork wuow(&_opCtx); coll = db->createCollection(&_opCtx, ns()); wuow.commit(); } auto ws = make_unique<WorkingSet>(); auto queuedDataStage = make_unique<QueuedDataStage>(&_opCtx, ws.get()); for (int i = 0; i < numObj(); ++i) { { WorkingSetID id = ws->allocate(); WorkingSetMember* member = ws->get(id); member->obj = Snapshotted<BSONObj>( SnapshotId(), fromjson("{a: [1,2,3], b:[1,2,3], c:[1,2,3], d:[1,2,3,4]}")); member->transitionToOwnedObj(); queuedDataStage->pushBack(id); } { WorkingSetID id = ws->allocate(); WorkingSetMember* member = ws->get(id); member->obj = Snapshotted<BSONObj>(SnapshotId(), fromjson("{a:1, b:1, c:1}")); member->transitionToOwnedObj(); queuedDataStage->pushBack(id); } } SortStageParams params; params.collection = coll; params.pattern = BSON("b" << -1 << "c" << 1 << "a" << 1); params.limit = 0; auto keyGenStage = make_unique<SortKeyGeneratorStage>( &_opCtx, queuedDataStage.release(), ws.get(), params.pattern, BSONObj(), nullptr); auto sortStage = make_unique<SortStage>(&_opCtx, params, ws.get(), keyGenStage.release()); auto fetchStage = make_unique<FetchStage>(&_opCtx, ws.get(), sortStage.release(), nullptr, coll); // We don't get results back since we're sorting some parallel arrays. auto statusWithPlanExecutor = PlanExecutor::make( &_opCtx, std::move(ws), std::move(fetchStage), coll, PlanExecutor::NO_YIELD); auto exec = std::move(statusWithPlanExecutor.getValue()); PlanExecutor::ExecState runnerState = exec->getNext(NULL, NULL); ASSERT_EQUALS(PlanExecutor::FAILURE, runnerState); }
/** * Allocates a new WorkingSetMember with data 'dataObj' in 'ws', and adds the WorkingSetMember * to 'qds'. */ void addMember(QueuedDataStage* qds, WorkingSet* ws, BSONObj dataObj) { WorkingSetID id = ws->allocate(); WorkingSetMember* wsm = ws->get(id); wsm->obj = Snapshotted<BSONObj>(SnapshotId(), BSON("x" << 1)); wsm->transitionToOwnedObj(); qds->pushBack(id); }
void run() { OldClientWriteContext ctx(&_txn, ns()); Database* db = ctx.db(); Collection* coll = db->getCollection(ns()); if (!coll) { WriteUnitOfWork wuow(&_txn); coll = db->createCollection(&_txn, ns()); wuow.commit(); } WorkingSet ws; // Add 10 objects to the collection. for (size_t i = 0; i < 10; ++i) { insert(BSON("x" << 1)); } // Create 10 objects that are flagged. for (size_t i = 0; i < 10; ++i) { WorkingSetID id = ws.allocate(); WorkingSetMember* member = ws.get(id); member->obj = Snapshotted<BSONObj>(SnapshotId(), BSON("x" << 2)); member->transitionToOwnedObj(); ws.flagForReview(id); } // Create a collscan to provide the 10 objects in the collection. CollectionScanParams params; params.collection = coll; params.direction = CollectionScanParams::FORWARD; params.tailable = false; params.start = RecordId(); CollectionScan* cs = new CollectionScan(&_txn, params, &ws, NULL); // Create a KeepMutations stage to merge in the 10 flagged objects. // Takes ownership of 'cs' MatchExpression* nullFilter = NULL; auto keep = make_unique<KeepMutationsStage>(&_txn, nullFilter, &ws, cs); for (size_t i = 0; i < 10; ++i) { WorkingSetID id = getNextResult(keep.get()); WorkingSetMember* member = ws.get(id); ASSERT_FALSE(ws.isFlagged(id)); ASSERT_EQUALS(member->obj.value()["x"].numberInt(), 1); } { WorkingSetID out; ASSERT_EQ(cs->work(&out), PlanStage::IS_EOF); } // Flagged results *must* be at the end. for (size_t i = 0; i < 10; ++i) { WorkingSetID id = getNextResult(keep.get()); WorkingSetMember* member = ws.get(id); ASSERT(ws.isFlagged(id)); ASSERT_EQUALS(member->obj.value()["x"].numberInt(), 2); } }
void run() { dbtests::WriteContextForTests ctx(&_opCtx, ns()); Database* db = ctx.db(); Collection* coll = db->getCollection(&_opCtx, ns()); if (!coll) { WriteUnitOfWork wuow(&_opCtx); coll = db->createCollection(&_opCtx, ns()); wuow.commit(); } WorkingSet ws; // Add an object to the DB. insert(BSON("foo" << 5)); set<RecordId> recordIds; getRecordIds(&recordIds, coll); ASSERT_EQUALS(size_t(1), recordIds.size()); // Create a mock stage that returns the WSM. auto mockStage = make_unique<QueuedDataStage>(&_opCtx, &ws); // Mock data. { WorkingSetID id = ws.allocate(); WorkingSetMember* mockMember = ws.get(id); mockMember->recordId = *recordIds.begin(); mockMember->obj = coll->docFor(&_opCtx, mockMember->recordId); ws.transitionToRecordIdAndObj(id); // Points into our DB. mockStage->pushBack(id); } { WorkingSetID id = ws.allocate(); WorkingSetMember* mockMember = ws.get(id); mockMember->recordId = RecordId(); mockMember->obj = Snapshotted<BSONObj>(SnapshotId(), BSON("foo" << 6)); mockMember->transitionToOwnedObj(); ASSERT_TRUE(mockMember->obj.value().isOwned()); mockStage->pushBack(id); } unique_ptr<FetchStage> fetchStage( new FetchStage(&_opCtx, &ws, mockStage.release(), NULL, coll)); WorkingSetID id = WorkingSet::INVALID_ID; PlanStage::StageState state; // Don't bother doing any fetching if an obj exists already. state = fetchStage->work(&id); ASSERT_EQUALS(PlanStage::ADVANCED, state); state = fetchStage->work(&id); ASSERT_EQUALS(PlanStage::ADVANCED, state); // No more data to fetch, so, EOF. state = fetchStage->work(&id); ASSERT_EQUALS(PlanStage::IS_EOF, state); }
// static WorkingSetID WorkingSetCommon::allocateStatusMember(WorkingSet* ws, const Status& status) { invariant(ws); WorkingSetID wsid = ws->allocate(); WorkingSetMember* member = ws->get(wsid); member->obj = Snapshotted<BSONObj>(SnapshotId(), buildMemberStatusObject(status)); member->transitionToOwnedObj(); return wsid; }
void run() { // Various variables we'll need. OldClientWriteContext ctx(&_txn, nss.ns()); OpDebug* opDebug = &CurOp::get(_txn)->debug(); Collection* coll = ctx.getCollection(); UpdateLifecycleImpl updateLifecycle(false, nss); UpdateRequest request(nss); UpdateDriver driver((UpdateDriver::Options())); const BSONObj query = BSONObj(); const unique_ptr<WorkingSet> ws(stdx::make_unique<WorkingSet>()); const unique_ptr<CanonicalQuery> cq(canonicalize(query)); // Populate the request. request.setQuery(query); request.setUpdates(fromjson("{$set: {x: 0}}")); request.setSort(BSONObj()); request.setMulti(false); request.setReturnDocs(UpdateRequest::ReturnDocOption::RETURN_OLD); request.setLifecycle(&updateLifecycle); ASSERT_OK(driver.parse(request.getUpdates(), request.isMulti())); // Configure a QueuedDataStage to pass an OWNED_OBJ to the update stage. unique_ptr<QueuedDataStage> qds(stdx::make_unique<QueuedDataStage>(ws.get())); { WorkingSetID id = ws->allocate(); WorkingSetMember* member = ws->get(id); member->obj = Snapshotted<BSONObj>(SnapshotId(), fromjson("{x: 1}")); member->transitionToOwnedObj(); qds->pushBack(id); } // Configure the update. UpdateStageParams updateParams(&request, &driver, opDebug); updateParams.canonicalQuery = cq.get(); const unique_ptr<UpdateStage> updateStage( stdx::make_unique<UpdateStage>(&_txn, updateParams, ws.get(), coll, qds.release())); const UpdateStats* stats = static_cast<const UpdateStats*>(updateStage->getSpecificStats()); // Call work, passing the set up member to the update stage. WorkingSetID id = WorkingSet::INVALID_ID; PlanStage::StageState state = updateStage->work(&id); // Should return NEED_TIME, not modifying anything. ASSERT_EQUALS(PlanStage::NEED_TIME, state); ASSERT_EQUALS(stats->nModified, 0U); id = WorkingSet::INVALID_ID; state = updateStage->work(&id); ASSERT_EQUALS(PlanStage::IS_EOF, state); }
PlanStage::StageState PipelineProxyStage::doWork(WorkingSetID* out) { if (!out) { return PlanStage::FAILURE; } if (!_stash.empty()) { *out = _ws->allocate(); WorkingSetMember* member = _ws->get(*out); member->obj = Snapshotted<BSONObj>(SnapshotId(), _stash.back()); _stash.pop_back(); member->transitionToOwnedObj(); return PlanStage::ADVANCED; } if (boost::optional<BSONObj> next = getNextBson()) { *out = _ws->allocate(); WorkingSetMember* member = _ws->get(*out); member->obj = Snapshotted<BSONObj>(SnapshotId(), *next); member->transitionToOwnedObj(); return PlanStage::ADVANCED; } return PlanStage::IS_EOF; }
void run() { // Various variables we'll need. OldClientWriteContext ctx(&_txn, nss.ns()); Collection* coll = ctx.getCollection(); const BSONObj query = BSONObj(); const auto ws = make_unique<WorkingSet>(); const unique_ptr<CanonicalQuery> cq(canonicalize(query)); // Configure a QueuedDataStage to pass an OWNED_OBJ to the delete stage. auto qds = make_unique<QueuedDataStage>(&_txn, ws.get()); { WorkingSetID id = ws->allocate(); WorkingSetMember* member = ws->get(id); member->obj = Snapshotted<BSONObj>(SnapshotId(), fromjson("{x: 1}")); member->transitionToOwnedObj(); qds->pushBack(id); } // Configure the delete. DeleteStageParams deleteParams; deleteParams.isMulti = false; deleteParams.canonicalQuery = cq.get(); const auto deleteStage = make_unique<DeleteStage>(&_txn, deleteParams, ws.get(), coll, qds.release()); const DeleteStats* stats = static_cast<const DeleteStats*>(deleteStage->getSpecificStats()); // Call work, passing the set up member to the delete stage. WorkingSetID id = WorkingSet::INVALID_ID; PlanStage::StageState state = deleteStage->work(&id); // Should return NEED_TIME, not deleting anything. ASSERT_EQUALS(PlanStage::NEED_TIME, state); ASSERT_EQUALS(stats->docsDeleted, 0U); id = WorkingSet::INVALID_ID; state = deleteStage->work(&id); ASSERT_EQUALS(PlanStage::IS_EOF, state); }
PlanStage::StageState DeleteStage::doWork(WorkingSetID* out) { if (isEOF()) { return PlanStage::IS_EOF; } invariant(_collection); // If isEOF() returns false, we must have a collection. // It is possible that after a delete was executed, a WriteConflictException occurred // and prevented us from returning ADVANCED with the old version of the document. if (_idReturning != WorkingSet::INVALID_ID) { // We should only get here if we were trying to return something before. invariant(_params.returnDeleted); WorkingSetMember* member = _ws->get(_idReturning); invariant(member->getState() == WorkingSetMember::OWNED_OBJ); *out = _idReturning; _idReturning = WorkingSet::INVALID_ID; return PlanStage::ADVANCED; } // Either retry the last WSM we worked on or get a new one from our child. WorkingSetID id; if (_idRetrying != WorkingSet::INVALID_ID) { id = _idRetrying; _idRetrying = WorkingSet::INVALID_ID; } else { auto status = child()->work(&id); switch (status) { case PlanStage::ADVANCED: break; case PlanStage::FAILURE: case PlanStage::DEAD: *out = id; // If a stage fails, it may create a status WSM to indicate why it failed, in which // case 'id' is valid. If ID is invalid, we create our own error message. if (WorkingSet::INVALID_ID == id) { const std::string errmsg = "delete stage failed to read in results from child"; *out = WorkingSetCommon::allocateStatusMember( _ws, Status(ErrorCodes::InternalError, errmsg)); } return status; case PlanStage::NEED_TIME: return status; case PlanStage::NEED_YIELD: *out = id; return status; case PlanStage::IS_EOF: return status; default: MONGO_UNREACHABLE; } } // We advanced, or are retrying, and id is set to the WSM to work on. WorkingSetMember* member = _ws->get(id); // We want to free this member when we return, unless we need to retry it. ScopeGuard memberFreer = MakeGuard(&WorkingSet::free, _ws, id); if (!member->hasRecordId()) { // We expect to be here because of an invalidation causing a force-fetch. ++_specificStats.nInvalidateSkips; return PlanStage::NEED_TIME; } RecordId recordId = member->recordId; // Deletes can't have projections. This means that covering analysis will always add // a fetch. We should always get fetched data, and never just key data. invariant(member->hasObj()); try { // If the snapshot changed, then we have to make sure we have the latest copy of the // doc and that it still matches. std::unique_ptr<SeekableRecordCursor> cursor; if (getOpCtx()->recoveryUnit()->getSnapshotId() != member->obj.snapshotId()) { cursor = _collection->getCursor(getOpCtx()); if (!WorkingSetCommon::fetch(getOpCtx(), _ws, id, cursor)) { // Doc is already deleted. Nothing more to do. return PlanStage::NEED_TIME; } // Make sure the re-fetched doc still matches the predicate. if (_params.canonicalQuery && !_params.canonicalQuery->root()->matchesBSON(member->obj.value(), NULL)) { // Doesn't match. return PlanStage::NEED_TIME; } } // Ensure that the BSONObj underlying the WorkingSetMember is owned because saveState() // is allowed to free the memory. if (_params.returnDeleted) { // Save a copy of the document that is about to get deleted, but keep it in the // RID_AND_OBJ state in case we need to retry deleting it. BSONObj deletedDoc = member->obj.value(); member->obj.setValue(deletedDoc.getOwned()); } // TODO: Do we want to buffer docs and delete them in a group rather than // saving/restoring state repeatedly? try { WorkingSetCommon::prepareForSnapshotChange(_ws); child()->saveState(); } catch (const WriteConflictException& wce) { std::terminate(); } // Do the write, unless this is an explain. if (!_params.isExplain) { WriteUnitOfWork wunit(getOpCtx()); _collection->deleteDocument(getOpCtx(), recordId, _params.fromMigrate); wunit.commit(); } ++_specificStats.docsDeleted; } catch (const WriteConflictException& wce) { // When we're doing a findAndModify with a sort, the sort will have a limit of 1, so will // not produce any more results even if there is another matching document. Re-throw the WCE // here so that these operations get another chance to find a matching document. The // findAndModify command should automatically retry if it gets a WCE. // TODO: this is not necessary if there was no sort specified. if (_params.returnDeleted) { throw; } _idRetrying = id; memberFreer.Dismiss(); // Keep this member around so we can retry deleting it. *out = WorkingSet::INVALID_ID; return NEED_YIELD; } if (_params.returnDeleted) { // After deleting the document, the RecordId associated with this member is invalid. // Remove the 'recordId' from the WorkingSetMember before returning it. member->recordId = RecordId(); member->transitionToOwnedObj(); } // As restoreState may restore (recreate) cursors, cursors are tied to the // transaction in which they are created, and a WriteUnitOfWork is a // transaction, make sure to restore the state outside of the WritUnitOfWork. try { child()->restoreState(); } catch (const WriteConflictException& wce) { // Note we don't need to retry anything in this case since the delete already // was committed. However, we still need to return the deleted document // (if it was requested). if (_params.returnDeleted) { // member->obj should refer to the deleted document. invariant(member->getState() == WorkingSetMember::OWNED_OBJ); _idReturning = id; // Keep this member around so that we can return it on the next work() call. memberFreer.Dismiss(); } *out = WorkingSet::INVALID_ID; return NEED_YIELD; } if (_params.returnDeleted) { // member->obj should refer to the deleted document. invariant(member->getState() == WorkingSetMember::OWNED_OBJ); memberFreer.Dismiss(); // Keep this member around so we can return it. *out = id; return PlanStage::ADVANCED; } return PlanStage::NEED_TIME; }
bool run(OperationContext* txn, const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result) { BSONElement first = cmdObj.firstElement(); uassert(28528, str::stream() << "Argument to listIndexes must be of type String, not " << typeName(first.type()), first.type() == String); StringData collectionName = first.valueStringData(); uassert(28529, str::stream() << "Argument to listIndexes must be a collection name, " << "not the empty string", !collectionName.empty()); const NamespaceString ns(dbname, collectionName); const long long defaultBatchSize = std::numeric_limits<long long>::max(); long long batchSize; Status parseCursorStatus = parseCommandCursorOptions(cmdObj, defaultBatchSize, &batchSize); if (!parseCursorStatus.isOK()) { return appendCommandStatus(result, parseCursorStatus); } AutoGetCollectionForRead autoColl(txn, ns); if (!autoColl.getDb()) { return appendCommandStatus(result, Status(ErrorCodes::NamespaceNotFound, "no database")); } const Collection* collection = autoColl.getCollection(); if (!collection) { return appendCommandStatus(result, Status(ErrorCodes::NamespaceNotFound, "no collection")); } const CollectionCatalogEntry* cce = collection->getCatalogEntry(); invariant(cce); vector<string> indexNames; MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN { indexNames.clear(); cce->getAllIndexes(txn, &indexNames); } MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "listIndexes", ns.ns()); std::unique_ptr<WorkingSet> ws(new WorkingSet()); std::unique_ptr<QueuedDataStage> root(new QueuedDataStage(ws.get())); for (size_t i = 0; i < indexNames.size(); i++) { BSONObj indexSpec; MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN { indexSpec = cce->getIndexSpec(txn, indexNames[i]); } MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "listIndexes", ns.ns()); WorkingSetID id = ws->allocate(); WorkingSetMember* member = ws->get(id); member->keyData.clear(); member->loc = RecordId(); member->obj = Snapshotted<BSONObj>(SnapshotId(), indexSpec.getOwned()); member->transitionToOwnedObj(); root->pushBack(id); } std::string cursorNamespace = str::stream() << dbname << ".$cmd." << name << "." << ns.coll(); dassert(NamespaceString(cursorNamespace).isValid()); dassert(NamespaceString(cursorNamespace).isListIndexesCursorNS()); dassert(ns == NamespaceString(cursorNamespace).getTargetNSForListIndexes()); auto statusWithPlanExecutor = PlanExecutor::make( txn, std::move(ws), std::move(root), cursorNamespace, PlanExecutor::YIELD_MANUAL); if (!statusWithPlanExecutor.isOK()) { return appendCommandStatus(result, statusWithPlanExecutor.getStatus()); } std::unique_ptr<PlanExecutor> exec = std::move(statusWithPlanExecutor.getValue()); BSONArrayBuilder firstBatch; const int byteLimit = MaxBytesToReturnToClientAtOnce; for (long long objCount = 0; objCount < batchSize && firstBatch.len() < byteLimit; objCount++) { BSONObj next; PlanExecutor::ExecState state = exec->getNext(&next, NULL); if (state == PlanExecutor::IS_EOF) { break; } invariant(state == PlanExecutor::ADVANCED); firstBatch.append(next); } CursorId cursorId = 0LL; if (!exec->isEOF()) { exec->saveState(); ClientCursor* cursor = new ClientCursor( CursorManager::getGlobalCursorManager(), exec.release(), cursorNamespace); cursorId = cursor->cursorid(); } appendCursorResponseObject(cursorId, cursorNamespace, firstBatch.arr(), &result); return true; }
PlanStage::StageState GroupStage::work(WorkingSetID* out) { ++_commonStats.works; ScopedTimer timer(&_commonStats.executionTimeMillis); if (isEOF()) { return PlanStage::IS_EOF; } // On the first call to work(), call initGroupScripting(). if (_groupState == GroupState_Initializing) { Status status = initGroupScripting(); if (!status.isOK()) { *out = WorkingSetCommon::allocateStatusMember(_ws, status); return PlanStage::FAILURE; } _groupState = GroupState_ReadingFromChild; ++_commonStats.needTime; return PlanStage::NEED_TIME; } // Otherwise, read from our child. invariant(_groupState == GroupState_ReadingFromChild); WorkingSetID id = WorkingSet::INVALID_ID; StageState state = child()->work(&id); if (PlanStage::NEED_TIME == state) { ++_commonStats.needTime; return state; } else if (PlanStage::NEED_YIELD == state) { ++_commonStats.needYield; *out = id; return state; } else if (PlanStage::FAILURE == state) { *out = id; // If a stage fails, it may create a status WSM to indicate why it failed, in which // case 'id' is valid. If ID is invalid, we create our own error message. if (WorkingSet::INVALID_ID == id) { const std::string errmsg = "group stage failed to read in results from child"; *out = WorkingSetCommon::allocateStatusMember( _ws, Status(ErrorCodes::InternalError, errmsg)); } return state; } else if (PlanStage::DEAD == state) { return state; } else if (PlanStage::ADVANCED == state) { WorkingSetMember* member = _ws->get(id); // Group queries can't have projections. This means that covering analysis will always // add a fetch. We should always get fetched data, and never just key data. invariant(member->hasObj()); Status status = processObject(member->obj.value()); if (!status.isOK()) { *out = WorkingSetCommon::allocateStatusMember(_ws, status); return PlanStage::FAILURE; } _ws->free(id); ++_commonStats.needTime; return PlanStage::NEED_TIME; } else { // We're done reading from our child. invariant(PlanStage::IS_EOF == state); auto results = finalizeResults(); if (!results.isOK()) { *out = WorkingSetCommon::allocateStatusMember(_ws, results.getStatus()); return PlanStage::FAILURE; } // Transition to state "done." Future calls to work() will return IS_EOF. _groupState = GroupState_Done; *out = _ws->allocate(); WorkingSetMember* member = _ws->get(*out); member->obj = Snapshotted<BSONObj>(SnapshotId(), results.getValue()); member->transitionToOwnedObj(); ++_commonStats.advanced; return PlanStage::ADVANCED; } }
bool run(OperationContext* txn, const string& dbname, BSONObj& jsobj, int, string& errmsg, BSONObjBuilder& result) { unique_ptr<MatchExpression> matcher; BSONElement filterElt = jsobj["filter"]; if (!filterElt.eoo()) { if (filterElt.type() != mongo::Object) { return appendCommandStatus( result, Status(ErrorCodes::BadValue, "\"filter\" must be an object")); } StatusWithMatchExpression statusWithMatcher = MatchExpressionParser::parse(filterElt.Obj()); if (!statusWithMatcher.isOK()) { return appendCommandStatus(result, statusWithMatcher.getStatus()); } matcher = std::move(statusWithMatcher.getValue()); } const long long defaultBatchSize = std::numeric_limits<long long>::max(); long long batchSize; Status parseCursorStatus = parseCommandCursorOptions(jsobj, defaultBatchSize, &batchSize); if (!parseCursorStatus.isOK()) { return appendCommandStatus(result, parseCursorStatus); } ScopedTransaction scopedXact(txn, MODE_IS); AutoGetDb autoDb(txn, dbname, MODE_S); const Database* d = autoDb.getDb(); const DatabaseCatalogEntry* dbEntry = NULL; list<string> names; if (d) { dbEntry = d->getDatabaseCatalogEntry(); dbEntry->getCollectionNamespaces(&names); names.sort(); } auto ws = make_unique<WorkingSet>(); auto root = make_unique<QueuedDataStage>(txn, ws.get()); for (std::list<std::string>::const_iterator i = names.begin(); i != names.end(); ++i) { const std::string& ns = *i; StringData collection = nsToCollectionSubstring(ns); if (collection == "system.namespaces") { continue; } BSONObjBuilder b; b.append("name", collection); CollectionOptions options = dbEntry->getCollectionCatalogEntry(ns)->getCollectionOptions(txn); b.append("options", options.toBSON()); BSONObj maybe = b.obj(); if (matcher && !matcher->matchesBSON(maybe)) { continue; } WorkingSetID id = ws->allocate(); WorkingSetMember* member = ws->get(id); member->keyData.clear(); member->loc = RecordId(); member->obj = Snapshotted<BSONObj>(SnapshotId(), maybe); member->transitionToOwnedObj(); root->pushBack(id); } std::string cursorNamespace = str::stream() << dbname << ".$cmd." << name; dassert(NamespaceString(cursorNamespace).isValid()); dassert(NamespaceString(cursorNamespace).isListCollectionsCursorNS()); auto statusWithPlanExecutor = PlanExecutor::make( txn, std::move(ws), std::move(root), cursorNamespace, PlanExecutor::YIELD_MANUAL); if (!statusWithPlanExecutor.isOK()) { return appendCommandStatus(result, statusWithPlanExecutor.getStatus()); } unique_ptr<PlanExecutor> exec = std::move(statusWithPlanExecutor.getValue()); BSONArrayBuilder firstBatch; const int byteLimit = FindCommon::kMaxBytesToReturnToClientAtOnce; for (long long objCount = 0; objCount < batchSize && firstBatch.len() < byteLimit; objCount++) { BSONObj next; PlanExecutor::ExecState state = exec->getNext(&next, NULL); if (state == PlanExecutor::IS_EOF) { break; } invariant(state == PlanExecutor::ADVANCED); firstBatch.append(next); } CursorId cursorId = 0LL; if (!exec->isEOF()) { exec->saveState(); exec->detachFromOperationContext(); ClientCursor* cursor = new ClientCursor(CursorManager::getGlobalCursorManager(), exec.release(), cursorNamespace, txn->recoveryUnit()->isReadingFromMajorityCommittedSnapshot()); cursorId = cursor->cursorid(); } appendCursorResponseObject(cursorId, cursorNamespace, firstBatch.arr(), &result); return true; }
PlanStage::StageState DeleteStage::work(WorkingSetID* out) { ++_commonStats.works; // Adds the amount of time taken by work() to executionTimeMillis. ScopedTimer timer(&_commonStats.executionTimeMillis); if (isEOF()) { return PlanStage::IS_EOF; } invariant(_collection); // If isEOF() returns false, we must have a collection. // It is possible that after a delete was executed, a WriteConflictException occurred // and prevented us from returning ADVANCED with the old version of the document. if (_idReturning != WorkingSet::INVALID_ID) { // We should only get here if we were trying to return something before. invariant(_params.returnDeleted); WorkingSetMember* member = _ws->get(_idReturning); invariant(member->getState() == WorkingSetMember::OWNED_OBJ); *out = _idReturning; _idReturning = WorkingSet::INVALID_ID; ++_commonStats.advanced; return PlanStage::ADVANCED; } // Either retry the last WSM we worked on or get a new one from our child. WorkingSetID id; StageState status; if (_idRetrying == WorkingSet::INVALID_ID) { status = child()->work(&id); } else { status = ADVANCED; id = _idRetrying; _idRetrying = WorkingSet::INVALID_ID; } if (PlanStage::ADVANCED == status) { WorkingSetMember* member = _ws->get(id); // We want to free this member when we return, unless we need to retry it. ScopeGuard memberFreer = MakeGuard(&WorkingSet::free, _ws, id); if (!member->hasLoc()) { // We expect to be here because of an invalidation causing a force-fetch, and // doc-locking storage engines do not issue invalidations. ++_specificStats.nInvalidateSkips; ++_commonStats.needTime; return PlanStage::NEED_TIME; } RecordId rloc = member->loc; // Deletes can't have projections. This means that covering analysis will always add // a fetch. We should always get fetched data, and never just key data. invariant(member->hasObj()); try { // If the snapshot changed, then we have to make sure we have the latest copy of the // doc and that it still matches. std::unique_ptr<RecordCursor> cursor; if (getOpCtx()->recoveryUnit()->getSnapshotId() != member->obj.snapshotId()) { cursor = _collection->getCursor(getOpCtx()); if (!WorkingSetCommon::fetch(getOpCtx(), _ws, id, cursor)) { // Doc is already deleted. Nothing more to do. ++_commonStats.needTime; return PlanStage::NEED_TIME; } // Make sure the re-fetched doc still matches the predicate. if (_params.canonicalQuery && !_params.canonicalQuery->root()->matchesBSON(member->obj.value(), NULL)) { // Doesn't match. ++_commonStats.needTime; return PlanStage::NEED_TIME; } } // Ensure that the BSONObj underlying the WorkingSetMember is owned because saveState() // is allowed to free the memory. if (_params.returnDeleted) { member->makeObjOwnedIfNeeded(); } // TODO: Do we want to buffer docs and delete them in a group rather than // saving/restoring state repeatedly? try { if (supportsDocLocking()) { // Doc-locking engines require this before saveState() since they don't use // invalidations. WorkingSetCommon::prepareForSnapshotChange(_ws); } child()->saveState(); } catch (const WriteConflictException& wce) { std::terminate(); } if (_params.returnDeleted) { // Save a copy of the document that is about to get deleted. BSONObj deletedDoc = member->obj.value(); member->obj.setValue(deletedDoc.getOwned()); member->loc = RecordId(); member->transitionToOwnedObj(); } // Do the write, unless this is an explain. if (!_params.isExplain) { WriteUnitOfWork wunit(getOpCtx()); _collection->deleteDocument(getOpCtx(), rloc); wunit.commit(); } ++_specificStats.docsDeleted; } catch (const WriteConflictException& wce) { // Ensure that the BSONObj underlying the WorkingSetMember is owned because it may be // freed when we yield. member->makeObjOwnedIfNeeded(); _idRetrying = id; memberFreer.Dismiss(); // Keep this member around so we can retry deleting it. *out = WorkingSet::INVALID_ID; _commonStats.needYield++; return NEED_YIELD; } // As restoreState may restore (recreate) cursors, cursors are tied to the // transaction in which they are created, and a WriteUnitOfWork is a // transaction, make sure to restore the state outside of the WritUnitOfWork. try { child()->restoreState(); } catch (const WriteConflictException& wce) { // Note we don't need to retry anything in this case since the delete already // was committed. However, we still need to return the deleted document // (if it was requested). if (_params.returnDeleted) { // member->obj should refer to the deleted document. invariant(member->getState() == WorkingSetMember::OWNED_OBJ); _idReturning = id; // Keep this member around so that we can return it on the next work() call. memberFreer.Dismiss(); } *out = WorkingSet::INVALID_ID; _commonStats.needYield++; return NEED_YIELD; } if (_params.returnDeleted) { // member->obj should refer to the deleted document. invariant(member->getState() == WorkingSetMember::OWNED_OBJ); memberFreer.Dismiss(); // Keep this member around so we can return it. *out = id; ++_commonStats.advanced; return PlanStage::ADVANCED; } ++_commonStats.needTime; return PlanStage::NEED_TIME; } else if (PlanStage::FAILURE == status || PlanStage::DEAD == status) { *out = id; // If a stage fails, it may create a status WSM to indicate why it failed, in which case // 'id' is valid. If ID is invalid, we create our own error message. if (WorkingSet::INVALID_ID == id) { const std::string errmsg = "delete stage failed to read in results from child"; *out = WorkingSetCommon::allocateStatusMember( _ws, Status(ErrorCodes::InternalError, errmsg)); } return status; } else if (PlanStage::NEED_TIME == status) { ++_commonStats.needTime; } else if (PlanStage::NEED_YIELD == status) { *out = id; ++_commonStats.needYield; } return status; }
void WorkingSet::transitionToOwnedObj(WorkingSetID id) { WorkingSetMember* member = get(id); member->transitionToOwnedObj(); }
PlanStage::StageState UpdateStage::doWork(WorkingSetID* out) { if (isEOF()) { return PlanStage::IS_EOF; } if (doneUpdating()) { // Even if we're done updating, we may have some inserting left to do. if (needInsert()) { // TODO we may want to handle WriteConflictException here. Currently we bounce it // out to a higher level since if this WCEs it is likely that we raced with another // upsert that may have matched our query, and therefore this may need to perform an // update rather than an insert. Bouncing to the higher level allows restarting the // query in this case. doInsert(); invariant(isEOF()); if (_params.request->shouldReturnNewDocs()) { // Want to return the document we just inserted, create it as a WorkingSetMember // so that we can return it. BSONObj newObj = _specificStats.objInserted; *out = _ws->allocate(); WorkingSetMember* member = _ws->get(*out); member->obj = Snapshotted<BSONObj>(getOpCtx()->recoveryUnit()->getSnapshotId(), newObj.getOwned()); member->transitionToOwnedObj(); return PlanStage::ADVANCED; } } // At this point either we're done updating and there was no insert to do, // or we're done updating and we're done inserting. Either way, we're EOF. invariant(isEOF()); return PlanStage::IS_EOF; } // If we're here, then we still have to ask for results from the child and apply // updates to them. We should only get here if the collection exists. invariant(_collection); // It is possible that after an update was applied, a WriteConflictException // occurred and prevented us from returning ADVANCED with the requested version // of the document. if (_idReturning != WorkingSet::INVALID_ID) { // We should only get here if we were trying to return something before. invariant(_params.request->shouldReturnAnyDocs()); WorkingSetMember* member = _ws->get(_idReturning); invariant(member->getState() == WorkingSetMember::OWNED_OBJ); *out = _idReturning; _idReturning = WorkingSet::INVALID_ID; return PlanStage::ADVANCED; } // Either retry the last WSM we worked on or get a new one from our child. WorkingSetID id; StageState status; if (_idRetrying == WorkingSet::INVALID_ID) { status = child()->work(&id); } else { status = ADVANCED; id = _idRetrying; _idRetrying = WorkingSet::INVALID_ID; } if (PlanStage::ADVANCED == status) { // Need to get these things from the result returned by the child. RecordId recordId; WorkingSetMember* member = _ws->get(id); // We want to free this member when we return, unless we need to retry updating or returning // it. ScopeGuard memberFreer = MakeGuard(&WorkingSet::free, _ws, id); if (!member->hasRecordId()) { // We expect to be here because of an invalidation causing a force-fetch. ++_specificStats.nInvalidateSkips; return PlanStage::NEED_TIME; } recordId = member->recordId; // Updates can't have projections. This means that covering analysis will always add // a fetch. We should always get fetched data, and never just key data. invariant(member->hasObj()); // We fill this with the new RecordIds of moved doc so we don't double-update. if (_updatedRecordIds && _updatedRecordIds->count(recordId) > 0) { // Found a RecordId that refers to a document we had already updated. Note that // we can never remove from _updatedRecordIds because updates by other clients // could cause us to encounter a document again later. return PlanStage::NEED_TIME; } bool docStillMatches; try { docStillMatches = write_stage_common::ensureStillMatches( _collection, getOpCtx(), _ws, id, _params.canonicalQuery); } catch (const WriteConflictException&) { // There was a problem trying to detect if the document still exists, so retry. memberFreer.Dismiss(); return prepareToRetryWSM(id, out); } if (!docStillMatches) { // Either the document has been deleted, or it has been updated such that it no longer // matches the predicate. if (shouldRestartUpdateIfNoLongerMatches(_params)) { throw WriteConflictException(); } return PlanStage::NEED_TIME; } // Ensure that the BSONObj underlying the WorkingSetMember is owned because saveState() // is allowed to free the memory. member->makeObjOwnedIfNeeded(); // Save state before making changes WorkingSetCommon::prepareForSnapshotChange(_ws); try { child()->saveState(); } catch (const WriteConflictException&) { std::terminate(); } // If we care about the pre-updated version of the doc, save it out here. BSONObj oldObj; if (_params.request->shouldReturnOldDocs()) { oldObj = member->obj.value().getOwned(); } BSONObj newObj; try { // Do the update, get us the new version of the doc. newObj = transformAndUpdate(member->obj, recordId); } catch (const WriteConflictException&) { memberFreer.Dismiss(); // Keep this member around so we can retry updating it. return prepareToRetryWSM(id, out); } // Set member's obj to be the doc we want to return. if (_params.request->shouldReturnAnyDocs()) { if (_params.request->shouldReturnNewDocs()) { member->obj = Snapshotted<BSONObj>(getOpCtx()->recoveryUnit()->getSnapshotId(), newObj.getOwned()); } else { invariant(_params.request->shouldReturnOldDocs()); member->obj.setValue(oldObj); } member->recordId = RecordId(); member->transitionToOwnedObj(); } // This should be after transformAndUpdate to make sure we actually updated this doc. ++_specificStats.nMatched; // Restore state after modification // As restoreState may restore (recreate) cursors, make sure to restore the // state outside of the WritUnitOfWork. try { child()->restoreState(); } catch (const WriteConflictException&) { // Note we don't need to retry updating anything in this case since the update // already was committed. However, we still need to return the updated document // (if it was requested). if (_params.request->shouldReturnAnyDocs()) { // member->obj should refer to the document we want to return. invariant(member->getState() == WorkingSetMember::OWNED_OBJ); _idReturning = id; // Keep this member around so that we can return it on the next work() call. memberFreer.Dismiss(); } *out = WorkingSet::INVALID_ID; return NEED_YIELD; } if (_params.request->shouldReturnAnyDocs()) { // member->obj should refer to the document we want to return. invariant(member->getState() == WorkingSetMember::OWNED_OBJ); memberFreer.Dismiss(); // Keep this member around so we can return it. *out = id; return PlanStage::ADVANCED; } return PlanStage::NEED_TIME; } else if (PlanStage::IS_EOF == status) { // The child is out of results, but we might not be done yet because we still might // have to do an insert. return PlanStage::NEED_TIME; } else if (PlanStage::FAILURE == status) { *out = id; // If a stage fails, it may create a status WSM to indicate why it failed, in which case // 'id' is valid. If ID is invalid, we create our own error message. if (WorkingSet::INVALID_ID == id) { const std::string errmsg = "update stage failed to read in results from child"; *out = WorkingSetCommon::allocateStatusMember( _ws, Status(ErrorCodes::InternalError, errmsg)); return PlanStage::FAILURE; } return status; } else if (PlanStage::NEED_YIELD == status) { *out = id; } return status; }
PlanStage::StageState DeleteStage::doWork(WorkingSetID* out) { if (isEOF()) { return PlanStage::IS_EOF; } invariant(_collection); // If isEOF() returns false, we must have a collection. // It is possible that after a delete was executed, a WriteConflictException occurred // and prevented us from returning ADVANCED with the old version of the document. if (_idReturning != WorkingSet::INVALID_ID) { // We should only get here if we were trying to return something before. invariant(_params.returnDeleted); WorkingSetMember* member = _ws->get(_idReturning); invariant(member->getState() == WorkingSetMember::OWNED_OBJ); *out = _idReturning; _idReturning = WorkingSet::INVALID_ID; return PlanStage::ADVANCED; } // Either retry the last WSM we worked on or get a new one from our child. WorkingSetID id; if (_idRetrying != WorkingSet::INVALID_ID) { id = _idRetrying; _idRetrying = WorkingSet::INVALID_ID; } else { auto status = child()->work(&id); switch (status) { case PlanStage::ADVANCED: break; case PlanStage::FAILURE: case PlanStage::DEAD: // The stage which produces a failure is responsible for allocating a working set // member with error details. invariant(WorkingSet::INVALID_ID != id); *out = id; return status; case PlanStage::NEED_TIME: return status; case PlanStage::NEED_YIELD: *out = id; return status; case PlanStage::IS_EOF: return status; default: MONGO_UNREACHABLE; } } // We advanced, or are retrying, and id is set to the WSM to work on. WorkingSetMember* member = _ws->get(id); // We want to free this member when we return, unless we need to retry deleting or returning it. ScopeGuard memberFreer = MakeGuard(&WorkingSet::free, _ws, id); invariant(member->hasRecordId()); RecordId recordId = member->recordId; // Deletes can't have projections. This means that covering analysis will always add // a fetch. We should always get fetched data, and never just key data. invariant(member->hasObj()); // Ensure the document still exists and matches the predicate. bool docStillMatches; try { docStillMatches = write_stage_common::ensureStillMatches( _collection, getOpCtx(), _ws, id, _params.canonicalQuery); } catch (const WriteConflictException&) { // There was a problem trying to detect if the document still exists, so retry. memberFreer.Dismiss(); return prepareToRetryWSM(id, out); } if (!docStillMatches) { // Either the document has already been deleted, or it has been updated such that it no // longer matches the predicate. if (shouldRestartDeleteIfNoLongerMatches(_params)) { throw WriteConflictException(); } return PlanStage::NEED_TIME; } // Ensure that the BSONObj underlying the WorkingSetMember is owned because saveState() is // allowed to free the memory. if (_params.returnDeleted) { // Save a copy of the document that is about to get deleted, but keep it in the RID_AND_OBJ // state in case we need to retry deleting it. BSONObj deletedDoc = member->obj.value(); member->obj.setValue(deletedDoc.getOwned()); } // TODO: Do we want to buffer docs and delete them in a group rather than saving/restoring state // repeatedly? WorkingSetCommon::prepareForSnapshotChange(_ws); try { child()->saveState(); } catch (const WriteConflictException&) { std::terminate(); } // Do the write, unless this is an explain. if (!_params.isExplain) { try { WriteUnitOfWork wunit(getOpCtx()); _collection->deleteDocument(getOpCtx(), _params.stmtId, recordId, _params.opDebug, _params.fromMigrate, false, _params.returnDeleted ? Collection::StoreDeletedDoc::On : Collection::StoreDeletedDoc::Off); wunit.commit(); } catch (const WriteConflictException&) { memberFreer.Dismiss(); // Keep this member around so we can retry deleting it. return prepareToRetryWSM(id, out); } } ++_specificStats.docsDeleted; if (_params.returnDeleted) { // After deleting the document, the RecordId associated with this member is invalid. // Remove the 'recordId' from the WorkingSetMember before returning it. member->recordId = RecordId(); member->transitionToOwnedObj(); } // As restoreState may restore (recreate) cursors, cursors are tied to the transaction in which // they are created, and a WriteUnitOfWork is a transaction, make sure to restore the state // outside of the WriteUnitOfWork. try { child()->restoreState(); } catch (const WriteConflictException&) { // Note we don't need to retry anything in this case since the delete already was committed. // However, we still need to return the deleted document (if it was requested). if (_params.returnDeleted) { // member->obj should refer to the deleted document. invariant(member->getState() == WorkingSetMember::OWNED_OBJ); _idReturning = id; // Keep this member around so that we can return it on the next work() call. memberFreer.Dismiss(); } *out = WorkingSet::INVALID_ID; return NEED_YIELD; } if (_params.returnDeleted) { // member->obj should refer to the deleted document. invariant(member->getState() == WorkingSetMember::OWNED_OBJ); memberFreer.Dismiss(); // Keep this member around so we can return it. *out = id; return PlanStage::ADVANCED; } return PlanStage::NEED_TIME; }