void CollectionShardingState::onInsertOp(OperationContext* opCtx, const BSONObj& insertedDoc) { dassert(opCtx->lockState()->isCollectionLockedForMode(_nss.ns(), MODE_IX)); if (serverGlobalParams.clusterRole == ClusterRole::ShardServer) { if (_nss == NamespaceString::kServerConfigurationNamespace) { if (auto idElem = insertedDoc["_id"]) { if (idElem.str() == ShardIdentityType::IdName) { auto shardIdentityDoc = uassertStatusOK(ShardIdentityType::fromBSON(insertedDoc)); uassertStatusOK(shardIdentityDoc.validate()); opCtx->recoveryUnit()->registerChange( new ShardIdentityLogOpHandler(opCtx, std::move(shardIdentityDoc))); } } } if (ShardingState::get(opCtx)->enabled()) { _incrementChunkOnInsertOrUpdate(opCtx, insertedDoc, insertedDoc.objsize()); } } checkShardVersionOrThrow(opCtx); if (_sourceMgr) { _sourceMgr->getCloner()->onInsertOp(opCtx, insertedDoc); } }
void CollectionShardingState::onDeleteOp(OperationContext* txn, const CollectionShardingState::DeleteState& deleteState) { dassert(txn->lockState()->isCollectionLockedForMode(_nss.ns(), MODE_IX)); if (txn->writesAreReplicated() && serverGlobalParams.clusterRole == ClusterRole::ShardServer && _nss == NamespaceString::kConfigCollectionNamespace) { if (auto idElem = deleteState.idDoc["_id"]) { uassert(40070, "cannot delete shardIdentity document while in --shardsvr mode", idElem.str() != ShardIdentityType::IdName); } } // For backwards compatibility, cancel a pending asynchronous addShard task created on the // primary config as a result of a 3.2 mongos doing addShard for the shard with id // deletedDocId. if (serverGlobalParams.clusterRole == ClusterRole::ConfigServer && _nss == ShardType::ConfigNS) { BSONElement idElement = deleteState.idDoc["_id"]; invariant(!idElement.eoo()); auto shardIdStr = idElement.valuestrsafe(); txn->recoveryUnit()->registerChange( new RemoveShardLogOpHandler(txn, ShardId(std::move(shardIdStr)))); } checkShardVersionOrThrow(txn); if (_sourceMgr && deleteState.isMigrating) { _sourceMgr->getCloner()->onDeleteOp(txn, deleteState.idDoc); } }
AutoGetCollectionForRead::AutoGetCollectionForRead(OperationContext* txn, const NamespaceString& nss) : _txn(txn), _transaction(txn, MODE_IS) { { _autoColl.emplace(txn, nss, MODE_IS); auto curOp = CurOp::get(_txn); stdx::lock_guard<Client> lk(*_txn->getClient()); // TODO: OldClientContext legacy, needs to be removed curOp->ensureStarted(); curOp->setNS_inlock(nss.ns()); // At this point, we are locked in shared mode for the database by the DB lock in the // constructor, so it is safe to load the DB pointer. if (_autoColl->getDb()) { // TODO: OldClientContext legacy, needs to be removed curOp->enter_inlock(nss.ns().c_str(), _autoColl->getDb()->getProfilingLevel()); } } // Note: this can yield. _ensureMajorityCommittedSnapshotIsValid(nss); // We have both the DB and collection locked, which is the prerequisite to do a stable shard // version check, but we'd like to do the check after we have a satisfactory snapshot. auto css = CollectionShardingState::get(txn, nss); css->checkShardVersionOrThrow(txn); }
void CollectionShardingState::onInsertOp(OperationContext* txn, const BSONObj& insertedDoc) { dassert(txn->lockState()->isCollectionLockedForMode(_nss.ns(), MODE_IX)); if (serverGlobalParams.clusterRole == ClusterRole::ShardServer && _nss == NamespaceString::kConfigCollectionNamespace) { if (auto idElem = insertedDoc["_id"]) { if (idElem.str() == ShardIdentityType::IdName) { auto shardIdentityDoc = uassertStatusOK(ShardIdentityType::fromBSON(insertedDoc)); uassertStatusOK(shardIdentityDoc.validate()); txn->recoveryUnit()->registerChange( new ShardIdentityLogOpHandler(txn, std::move(shardIdentityDoc))); } } } // For backwards compatibility with 3.2 mongos, perform share aware initialization on a newly // added shard on inserts to config.shards missing the "state" field. (On addShard, a 3.2 // mongos performs the insert into config.shards without a "state" field.) if (serverGlobalParams.clusterRole == ClusterRole::ConfigServer && _nss == ShardType::ConfigNS) { if (insertedDoc[ShardType::state.name()].eoo()) { const auto shardType = uassertStatusOK(ShardType::fromBSON(insertedDoc)); txn->recoveryUnit()->registerChange( new LegacyAddShardLogOpHandler(txn, std::move(shardType))); } } checkShardVersionOrThrow(txn); if (_sourceMgr) { _sourceMgr->getCloner()->onInsertOp(txn, insertedDoc); } }
void CollectionShardingState::onUpdateOp(OperationContext* txn, const BSONObj& updatedDoc) { dassert(txn->lockState()->isCollectionLockedForMode(_nss.ns(), MODE_IX)); checkShardVersionOrThrow(txn); if (_sourceMgr) { _sourceMgr->getCloner()->onUpdateOp(txn, updatedDoc); } }
void OldClientContext::_checkNotStale() const { switch (CurOp::get(_txn)->getNetworkOp()) { case dbGetMore: // getMore is special and should be handled elsewhere. case dbUpdate: // update & delete check shard version in instance.cpp, so don't check case dbDelete: // here as well. break; default: auto css = CollectionShardingState::get(_txn, _ns); css->checkShardVersionOrThrow(_txn); } }
AutoGetCollectionForReadCommand::AutoGetCollectionForReadCommand( OperationContext* opCtx, const NamespaceString& nss, AutoGetCollection::ViewMode viewMode) { _autoCollForRead.emplace(opCtx, nss, viewMode); const int doNotChangeProfilingLevel = 0; _statsTracker.emplace(opCtx, nss, Top::LockType::ReadLocked, _autoCollForRead->getDb() ? _autoCollForRead->getDb()->getProfilingLevel() : doNotChangeProfilingLevel); // We have both the DB and collection locked, which is the prerequisite to do a stable shard // version check, but we'd like to do the check after we have a satisfactory snapshot. auto css = CollectionShardingState::get(opCtx, nss); css->checkShardVersionOrThrow(opCtx); }
void CollectionShardingState::onDeleteOp(OperationContext* opCtx, const CollectionShardingState::DeleteState& deleteState) { dassert(opCtx->lockState()->isCollectionLockedForMode(_nss.ns(), MODE_IX)); if (serverGlobalParams.clusterRole == ClusterRole::ShardServer) { if (_nss == NamespaceString::kShardConfigCollectionsCollectionName) { _onConfigDeleteInvalidateCachedMetadataAndNotify(opCtx, deleteState.documentKey); } if (_nss == NamespaceString::kServerConfigurationNamespace) { if (auto idElem = deleteState.documentKey["_id"]) { auto idStr = idElem.str(); if (idStr == ShardIdentityType::IdName) { if (!repl::ReplicationCoordinator::get(opCtx)->getMemberState().rollback()) { uasserted(40070, "cannot delete shardIdentity document while in --shardsvr mode"); } else { warning() << "Shard identity document rolled back. Will shut down after " "finishing rollback."; ShardIdentityRollbackNotifier::get(opCtx)->recordThatRollbackHappened(); } } } } } if (serverGlobalParams.clusterRole == ClusterRole::ConfigServer) { if (_nss == VersionType::ConfigNS) { if (!repl::ReplicationCoordinator::get(opCtx)->getMemberState().rollback()) { uasserted(40302, "cannot delete config.version document while in --configsvr mode"); } else { // Throw out any cached information related to the cluster ID. ShardingCatalogManager::get(opCtx) ->discardCachedConfigDatabaseInitializationState(); ClusterIdentityLoader::get(opCtx)->discardCachedClusterId(); } } } checkShardVersionOrThrow(opCtx); if (_sourceMgr && deleteState.isMigrating) { _sourceMgr->getCloner()->onDeleteOp(opCtx, deleteState.documentKey); } }
void CollectionShardingState::onDeleteOp(OperationContext* txn, const BSONObj& deletedDocId) { dassert(txn->lockState()->isCollectionLockedForMode(_nss.ns(), MODE_IX)); if (txn->writesAreReplicated() && serverGlobalParams.clusterRole == ClusterRole::ShardServer && _nss == NamespaceString::kConfigCollectionNamespace) { if (auto idElem = deletedDocId["_id"]) { uassert(40070, "cannot delete shardIdentity document while in --shardsvr mode", idElem.str() != ShardIdentityType::IdName); } } checkShardVersionOrThrow(txn); if (_sourceMgr) { _sourceMgr->getCloner()->onDeleteOp(txn, deletedDocId); } }
void CollectionShardingState::onInsertOp(OperationContext* txn, const BSONObj& insertedDoc) { dassert(txn->lockState()->isCollectionLockedForMode(_nss.ns(), MODE_IX)); if (serverGlobalParams.clusterRole == ClusterRole::ShardServer && _nss == NamespaceString::kConfigCollectionNamespace) { if (auto idElem = insertedDoc["_id"]) { if (idElem.str() == ShardIdentityType::IdName) { auto shardIdentityDoc = uassertStatusOK(ShardIdentityType::fromBSON(insertedDoc)); uassertStatusOK(shardIdentityDoc.validate()); txn->recoveryUnit()->registerChange( new ShardIdentityLogOpHandler(txn, std::move(shardIdentityDoc))); } } } checkShardVersionOrThrow(txn); if (_sourceMgr) { _sourceMgr->getCloner()->onInsertOp(txn, insertedDoc); } }
void CollectionShardingState::onUpdateOp(OperationContext* opCtx, const BSONObj& query, const BSONObj& update, const BSONObj& updatedDoc) { dassert(opCtx->lockState()->isCollectionLockedForMode(_nss.ns(), MODE_IX)); if (serverGlobalParams.clusterRole == ClusterRole::ShardServer) { if (_nss == NamespaceString::kShardConfigCollectionsCollectionName) { _onConfigRefreshCompleteInvalidateCachedMetadataAndNotify( opCtx, query, update, updatedDoc); } if (ShardingState::get(opCtx)->enabled()) { _incrementChunkOnInsertOrUpdate(opCtx, updatedDoc, update.objsize()); } } checkShardVersionOrThrow(opCtx); if (_sourceMgr) { _sourceMgr->getCloner()->onUpdateOp(opCtx, updatedDoc); } }
/** * Runs a query using the following steps: * --Parsing. * --Acquire locks. * --Plan query, obtaining an executor that can run it. * --Generate the first batch. * --Save state for getMore, transferring ownership of the executor to a ClientCursor. * --Generate response to send to the client. */ bool run(OperationContext* txn, const std::string& dbname, BSONObj& cmdObj, int options, std::string& errmsg, BSONObjBuilder& result) override { const NamespaceString nss(parseNs(dbname, cmdObj)); if (!nss.isValid() || nss.isCommand() || nss.isSpecialCommand()) { return appendCommandStatus(result, {ErrorCodes::InvalidNamespace, str::stream() << "Invalid collection name: " << nss.ns()}); } // Although it is a command, a find command gets counted as a query. globalOpCounters.gotQuery(); if (txn->getClient()->isInDirectClient()) { return appendCommandStatus( result, Status(ErrorCodes::IllegalOperation, "Cannot run find command from eval()")); } // Parse the command BSON to a QueryRequest. const bool isExplain = false; auto qrStatus = QueryRequest::makeFromFindCommand(nss, cmdObj, isExplain); if (!qrStatus.isOK()) { return appendCommandStatus(result, qrStatus.getStatus()); } auto& qr = qrStatus.getValue(); // Validate term before acquiring locks, if provided. if (auto term = qr->getReplicationTerm()) { auto replCoord = repl::ReplicationCoordinator::get(txn); Status status = replCoord->updateTerm(txn, *term); // Note: updateTerm returns ok if term stayed the same. if (!status.isOK()) { return appendCommandStatus(result, status); } } // Fill out curop information. // // We pass negative values for 'ntoreturn' and 'ntoskip' to indicate that these values // should be omitted from the log line. Limit and skip information is already present in the // find command parameters, so these fields are redundant. const int ntoreturn = -1; const int ntoskip = -1; beginQueryOp(txn, nss, cmdObj, ntoreturn, ntoskip); // Finish the parsing step by using the QueryRequest to create a CanonicalQuery. ExtensionsCallbackReal extensionsCallback(txn, &nss); auto statusWithCQ = CanonicalQuery::canonicalize(txn, std::move(qr), extensionsCallback); if (!statusWithCQ.isOK()) { return appendCommandStatus(result, statusWithCQ.getStatus()); } std::unique_ptr<CanonicalQuery> cq = std::move(statusWithCQ.getValue()); // Acquire locks. AutoGetCollectionForRead ctx(txn, nss); Collection* collection = ctx.getCollection(); // Get the execution plan for the query. auto statusWithPlanExecutor = getExecutorFind(txn, collection, nss, std::move(cq), PlanExecutor::YIELD_AUTO); if (!statusWithPlanExecutor.isOK()) { return appendCommandStatus(result, statusWithPlanExecutor.getStatus()); } std::unique_ptr<PlanExecutor> exec = std::move(statusWithPlanExecutor.getValue()); { stdx::lock_guard<Client>(*txn->getClient()); CurOp::get(txn)->setPlanSummary_inlock(Explain::getPlanSummary(exec.get())); } if (!collection) { // No collection. Just fill out curop indicating that there were zero results and // there is no ClientCursor id, and then return. const long long numResults = 0; const CursorId cursorId = 0; endQueryOp(txn, collection, *exec, numResults, cursorId); appendCursorResponseObject(cursorId, nss.ns(), BSONArray(), &result); return true; } const QueryRequest& originalQR = exec->getCanonicalQuery()->getQueryRequest(); // Stream query results, adding them to a BSONArray as we go. CursorResponseBuilder firstBatch(/*isInitialResponse*/ true, &result); BSONObj obj; PlanExecutor::ExecState state = PlanExecutor::ADVANCED; long long numResults = 0; while (!FindCommon::enoughForFirstBatch(originalQR, numResults) && PlanExecutor::ADVANCED == (state = exec->getNext(&obj, NULL))) { // If we can't fit this result inside the current batch, then we stash it for later. if (!FindCommon::haveSpaceForNext(obj, numResults, firstBatch.bytesUsed())) { exec->enqueue(obj); break; } // Add result to output buffer. firstBatch.append(obj); numResults++; } // Throw an assertion if query execution fails for any reason. if (PlanExecutor::FAILURE == state || PlanExecutor::DEAD == state) { firstBatch.abandon(); error() << "Plan executor error during find command: " << PlanExecutor::statestr(state) << ", stats: " << Explain::getWinningPlanStats(exec.get()); return appendCommandStatus(result, Status(ErrorCodes::OperationFailed, str::stream() << "Executor error during find command: " << WorkingSetCommon::toStatusString(obj))); } // Before saving the cursor, ensure that whatever plan we established happened with the // expected collection version auto css = CollectionShardingState::get(txn, nss); css->checkShardVersionOrThrow(txn); // Set up the cursor for getMore. CursorId cursorId = 0; if (shouldSaveCursor(txn, collection, state, exec.get())) { // Register the execution plan inside a ClientCursor. Ownership of the PlanExecutor is // transferred to the ClientCursor. // // First unregister the PlanExecutor so it can be re-registered with ClientCursor. exec->deregisterExec(); // Create a ClientCursor containing this plan executor. We don't have to worry about // leaking it as it's inserted into a global map by its ctor. ClientCursor* cursor = new ClientCursor(collection->getCursorManager(), exec.release(), nss.ns(), txn->recoveryUnit()->isReadingFromMajorityCommittedSnapshot(), originalQR.getOptions(), cmdObj.getOwned()); cursorId = cursor->cursorid(); invariant(!exec); PlanExecutor* cursorExec = cursor->getExecutor(); // State will be restored on getMore. cursorExec->saveState(); cursorExec->detachFromOperationContext(); cursor->setLeftoverMaxTimeMicros(txn->getRemainingMaxTimeMicros()); cursor->setPos(numResults); // Fill out curop based on the results. endQueryOp(txn, collection, *cursorExec, numResults, cursorId); } else { endQueryOp(txn, collection, *exec, numResults, cursorId); } // Generate the response object to send to the client. firstBatch.done(cursorId, nss.ns()); return true; }