Status LegacyReplicationCoordinator::canServeReadsFor(OperationContext* txn, const NamespaceString& ns, bool slaveOk) { if (txn->getClient()->isGod()) { return Status::OK(); } if (canAcceptWritesForDatabase(ns.db())) { return Status::OK(); } if (getReplicationMode() == modeMasterSlave && _settings.slave == SimpleSlave) { return Status::OK(); } if (slaveOk) { if (getReplicationMode() == modeMasterSlave || getReplicationMode() == modeNone) { return Status::OK(); } if (getCurrentMemberState().secondary()) { return Status::OK(); } return Status(ErrorCodes::NotMasterOrSecondaryCode, "not master or secondary; cannot currently read from this replSet member"); } return Status(ErrorCodes::NotMasterNoSlaveOkCode, "not master and slaveOk=false"); }
int reap(OperationContext* opCtx) override { auto const coord = mongo::repl::ReplicationCoordinator::get(opCtx); Handler handler(opCtx, *_collection); if (!handler.initialize()) { return 0; } AutoGetCollection autoColl( opCtx, NamespaceString::kSessionTransactionsTableNamespace, MODE_IS); // Only start reaping if the shard or config server node is currently the primary if (!coord->canAcceptWritesForDatabase( opCtx, NamespaceString::kSessionTransactionsTableNamespace.db())) { return 0; } DBDirectClient client(opCtx); auto query = makeQuery(opCtx->getServiceContext()->getFastClockSource()->now()); auto cursor = client.query( NamespaceString::kSessionTransactionsTableNamespace, query, 0, 0, &kIdProjection); while (cursor->more()) { auto transactionSession = SessionsCollectionFetchResultIndividualResult::parse( "TransactionSession"_sd, cursor->next()); handler.handleLsid(transactionSession.get_id()); } // Before the handler goes out of scope, flush its last batch to disk and collect stats. return handler.finalize(); }
Status ReplicationCoordinatorImpl::canServeReadsFor(OperationContext* txn, const NamespaceString& ns, bool slaveOk) { if (txn->isGod()) { return Status::OK(); } if (canAcceptWritesForDatabase(ns.db())) { return Status::OK(); } boost::lock_guard<boost::mutex> lk(_mutex); Mode replMode = _getReplicationMode_inlock(); if (replMode == modeMasterSlave && _settings.slave == SimpleSlave) { return Status::OK(); } if (slaveOk) { if (replMode == modeMasterSlave || replMode == modeNone) { return Status::OK(); } if (_getCurrentMemberState_inlock().secondary()) { return Status::OK(); } return Status(ErrorCodes::NotMasterOrSecondaryCode, "not master or secondary; cannot currently read from this replSet member"); } return Status(ErrorCodes::NotMasterNoSlaveOkCode, "not master and slaveOk=false"); }
Status doTxn(OperationContext* opCtx, const std::string& dbName, const BSONObj& doTxnCmd, BSONObjBuilder* result) { auto txnNumber = opCtx->getTxnNumber(); uassert(ErrorCodes::InvalidOptions, "doTxn can only be run with a transaction ID.", txnNumber); auto txnParticipant = TransactionParticipant::get(opCtx); uassert(ErrorCodes::InvalidOptions, "doTxn must be run within a transaction", txnParticipant); invariant(txnParticipant->inMultiDocumentTransaction()); invariant(opCtx->getWriteUnitOfWork()); uassert( ErrorCodes::InvalidOptions, "doTxn supports only CRUD opts.", _areOpsCrudOnly(doTxnCmd)); auto hasPrecondition = _hasPrecondition(doTxnCmd); // Acquire global lock in IX mode so that the replication state check will remain valid. Lock::GlobalLock globalLock(opCtx, MODE_IX); auto replCoord = repl::ReplicationCoordinator::get(opCtx); bool userInitiatedWritesAndNotPrimary = opCtx->writesAreReplicated() && !replCoord->canAcceptWritesForDatabase(opCtx, dbName); if (userInitiatedWritesAndNotPrimary) return Status(ErrorCodes::NotMaster, str::stream() << "Not primary while applying ops to database " << dbName); int numApplied = 0; try { BSONObjBuilder intermediateResult; // The transaction takes place in a global unit of work, so the precondition check // and the writes will share the same snapshot. if (hasPrecondition) { uassertStatusOK(_checkPrecondition(opCtx, doTxnCmd, result)); } numApplied = 0; uassertStatusOK(_doTxn(opCtx, dbName, doTxnCmd, &intermediateResult, &numApplied)); txnParticipant->commitUnpreparedTransaction(opCtx); result->appendElements(intermediateResult.obj()); } catch (const DBException& ex) { txnParticipant->abortActiveUnpreparedOrStashPreparedTransaction(opCtx); BSONArrayBuilder ab; ++numApplied; for (int j = 0; j < numApplied; j++) ab.append(false); result->append("applied", numApplied); result->append("code", ex.code()); result->append("codeName", ErrorCodes::errorString(ex.code())); result->append("errmsg", ex.what()); result->append("results", ab.arr()); return Status(ErrorCodes::UnknownError, ex.what()); } return Status::OK(); }
void NoopWriter::_writeNoop(OperationContext* opCtx) { // Use GlobalLock + lockMMAPV1Flush instead of DBLock to allow return when the lock is not // available. It may happen when the primary steps down and a shared global lock is acquired. Lock::GlobalLock lock( opCtx, MODE_IX, Date_t::now() + Milliseconds(1), Lock::InterruptBehavior::kLeaveUnlocked); if (!lock.isLocked()) { LOG(1) << "Global lock is not available skipping noopWrite"; return; } opCtx->lockState()->lockMMAPV1Flush(); auto replCoord = ReplicationCoordinator::get(opCtx); // Its a proxy for being a primary if (!replCoord->canAcceptWritesForDatabase(opCtx, "admin")) { LOG(1) << "Not a primary, skipping the noop write"; return; } auto lastAppliedOpTime = replCoord->getMyLastAppliedOpTime(); // _lastKnownOpTime is not protected by lock as its used only by one thread. if (lastAppliedOpTime != _lastKnownOpTime) { LOG(1) << "Not scheduling a noop write. Last known OpTime: " << _lastKnownOpTime << " != last primary OpTime: " << lastAppliedOpTime; } else { if (writePeriodicNoops.load()) { const auto logLevel = getTestCommandsEnabled() ? 0 : 1; LOG(logLevel) << "Writing noop to oplog as there has been no writes to this replica set in over " << _writeInterval; writeConflictRetry( opCtx, "writeNoop", NamespaceString::kRsOplogNamespace.ns(), [&opCtx] { WriteUnitOfWork uow(opCtx); opCtx->getClient()->getServiceContext()->getOpObserver()->onOpMessage(opCtx, kMsgObj); uow.commit(); }); } } _lastKnownOpTime = replCoord->getMyLastAppliedOpTime(); LOG(1) << "Set last known op time to " << _lastKnownOpTime; }
Status dropDatabase(OperationContext* opCtx, const std::string& dbName) { uassert(ErrorCodes::IllegalOperation, "Cannot drop a database in read-only mode", !storageGlobalParams.readOnly); // TODO (Kal): OldClientContext legacy, needs to be removed { CurOp::get(opCtx)->ensureStarted(); stdx::lock_guard<Client> lk(*opCtx->getClient()); CurOp::get(opCtx)->setNS_inlock(dbName); } auto replCoord = repl::ReplicationCoordinator::get(opCtx); std::size_t numCollectionsToDrop = 0; // We have to wait for the last drop-pending collection to be removed if there are no // collections to drop. repl::OpTime latestDropPendingOpTime; using Result = boost::optional<Status>; // Get an optional result--if it's there, early return; otherwise, wait for collections to drop. auto result = writeConflictRetry(opCtx, "dropDatabase_collection", dbName, [&] { Lock::GlobalWrite lk(opCtx); AutoGetDb autoDB(opCtx, dbName, MODE_X); Database* const db = autoDB.getDb(); if (!db) { return Result(Status(ErrorCodes::NamespaceNotFound, str::stream() << "Could not drop database " << dbName << " because it does not exist")); } bool userInitiatedWritesAndNotPrimary = opCtx->writesAreReplicated() && !replCoord->canAcceptWritesForDatabase(opCtx, dbName); if (userInitiatedWritesAndNotPrimary) { return Result( Status(ErrorCodes::NotMaster, str::stream() << "Not primary while dropping database " << dbName)); } log() << "dropDatabase " << dbName << " - starting"; db->setDropPending(opCtx, true); // If Database::dropCollectionEventIfSystem() fails, we should reset the drop-pending state // on Database. auto dropPendingGuard = MakeGuard([&db, opCtx] { db->setDropPending(opCtx, false); }); for (auto collection : *db) { const auto& nss = collection->ns(); if (nss.isDropPendingNamespace() && replCoord->isReplEnabled() && opCtx->writesAreReplicated()) { log() << "dropDatabase " << dbName << " - found drop-pending collection: " << nss; latestDropPendingOpTime = std::max( latestDropPendingOpTime, uassertStatusOK(nss.getDropPendingNamespaceOpTime())); continue; } if (replCoord->isOplogDisabledFor(opCtx, nss) || nss.isSystemDotIndexes()) { continue; } log() << "dropDatabase " << dbName << " - dropping collection: " << nss; WriteUnitOfWork wunit(opCtx); fassertStatusOK(40476, db->dropCollectionEvenIfSystem(opCtx, nss)); wunit.commit(); numCollectionsToDrop++; } dropPendingGuard.Dismiss(); // If there are no collection drops to wait for, we complete the drop database operation. if (numCollectionsToDrop == 0U && latestDropPendingOpTime.isNull()) { return Result(_finishDropDatabase(opCtx, dbName, db)); } return Result(boost::none); }); if (result) { return *result; } // If waitForWriteConcern() returns an error or throws an exception, we should reset the // drop-pending state on Database. auto dropPendingGuardWhileAwaitingReplication = MakeGuard([dbName, opCtx] { Lock::GlobalWrite lk(opCtx); AutoGetDb autoDB(opCtx, dbName, MODE_X); if (auto db = autoDB.getDb()) { db->setDropPending(opCtx, false); } }); { // Holding of any locks is disallowed while awaiting replication because this can // potentially block for long time while doing network activity. // // Even though dropDatabase() does not explicitly acquire any locks before awaiting // replication, it is possible that the caller of this function may already have acquired // a lock. The applyOps command is an example of a dropDatabase() caller that does this. // Therefore, we have to release any locks using a TempRelease RAII object. // // TODO: Remove the use of this TempRelease object when SERVER-29802 is completed. // The work in SERVER-29802 will adjust the locking rules around applyOps operations and // dropDatabase is expected to be one of the operations where we expect to no longer acquire // the global lock. Lock::TempRelease release(opCtx->lockState()); if (numCollectionsToDrop > 0U) { auto status = replCoord->awaitReplicationOfLastOpForClient(opCtx, kDropDatabaseWriteConcern) .status; if (!status.isOK()) { return Status(status.code(), str::stream() << "dropDatabase " << dbName << " failed waiting for " << numCollectionsToDrop << " collection drops to replicate: " << status.reason()); } log() << "dropDatabase " << dbName << " - successfully dropped " << numCollectionsToDrop << " collections. dropping database"; } else { invariant(!latestDropPendingOpTime.isNull()); auto status = replCoord ->awaitReplication(opCtx, latestDropPendingOpTime, kDropDatabaseWriteConcern) .status; if (!status.isOK()) { return Status( status.code(), str::stream() << "dropDatabase " << dbName << " failed waiting for pending collection drops (most recent drop optime: " << latestDropPendingOpTime.toString() << ") to replicate: " << status.reason()); } log() << "dropDatabase " << dbName << " - pending collection drops completed. dropping database"; } } dropPendingGuardWhileAwaitingReplication.Dismiss(); return writeConflictRetry(opCtx, "dropDatabase_database", dbName, [&] { Lock::GlobalWrite lk(opCtx); AutoGetDb autoDB(opCtx, dbName, MODE_X); if (auto db = autoDB.getDb()) { return _finishDropDatabase(opCtx, dbName, db); } return Status(ErrorCodes::NamespaceNotFound, str::stream() << "Could not drop database " << dbName << " because it does not exist after dropping " << numCollectionsToDrop << " collection(s)."); }); }
bool ReplicationCoordinatorMock::canAcceptWritesFor(OperationContext* opCtx, const NamespaceString& ns) { // TODO return canAcceptWritesForDatabase(opCtx, ns.db()); }
bool ReplicationCoordinatorMock::canAcceptWritesForDatabase_UNSAFE(OperationContext* opCtx, StringData dbName) { return canAcceptWritesForDatabase(opCtx, dbName); }