/** * Removes the specified set of session ids from the persistent sessions collection and returns the * number of sessions actually removed. */ int removeSessionsRecords(OperationContext* opCtx, SessionsCollection& sessionsCollection, const LogicalSessionIdSet& sessionIdsToRemove) { if (sessionIdsToRemove.empty()) { return 0; } Locker* locker = opCtx->lockState(); Locker::LockSnapshot snapshot; invariant(locker->saveLockStateAndUnlock(&snapshot)); const auto guard = MakeGuard([&] { UninterruptibleLockGuard noInterrupt(opCtx->lockState()); locker->restoreLockState(opCtx, snapshot); }); // Top-level locks are freed, release any potential low-level (storage engine-specific // locks). If we are yielding, we are at a safe place to do so. opCtx->recoveryUnit()->abandonSnapshot(); // Track the number of yields in CurOp. CurOp::get(opCtx)->yielded(); auto removed = uassertStatusOK(sessionsCollection.findRemovedSessions(opCtx, sessionIdsToRemove)); uassertStatusOK(sessionsCollection.removeTransactionRecords(opCtx, removed)); return removed.size(); }
boost::optional<Date_t> CollectionRangeDeleter::cleanUpNextRange( OperationContext* opCtx, NamespaceString const& nss, OID const& epoch, int maxToDelete, CollectionRangeDeleter* forTestOnly) { if (maxToDelete <= 0) { maxToDelete = rangeDeleterBatchSize.load(); if (maxToDelete <= 0) { maxToDelete = std::max(int(internalQueryExecYieldIterations.load()), 1); } } StatusWith<int> wrote = 0; auto range = boost::optional<ChunkRange>(boost::none); auto notification = DeleteNotification(); { UninterruptibleLockGuard noInterrupt(opCtx->lockState()); AutoGetCollection autoColl(opCtx, nss, MODE_IX); auto* const collection = autoColl.getCollection(); auto* const csr = CollectionShardingRuntime::get(opCtx, nss); auto& metadataManager = csr->_metadataManager; if (!_checkCollectionMetadataStillValid( opCtx, nss, epoch, forTestOnly, collection, metadataManager)) { return boost::none; } auto* const self = forTestOnly ? forTestOnly : &metadataManager->_rangesToClean; bool writeOpLog = false; { stdx::lock_guard<stdx::mutex> scopedLock(csr->_metadataManager->_managerLock); if (self->isEmpty()) { LOG(1) << "No further range deletions scheduled on " << nss.ns(); return boost::none; } auto& orphans = self->_orphans; if (orphans.empty()) { // We have delayed deletions; see if any are ready. auto& df = self->_delayedOrphans.front(); if (df.whenToDelete > Date_t::now()) { LOG(0) << "Deferring deletion of " << nss.ns() << " range " << redact(df.range.toString()) << " until " << df.whenToDelete; return df.whenToDelete; } // Move a single range from _delayedOrphans to _orphans orphans.splice(orphans.end(), self->_delayedOrphans, self->_delayedOrphans.begin()); LOG(1) << "Proceeding with deferred deletion of " << nss.ns() << " range " << redact(orphans.front().range.toString()); writeOpLog = true; } invariant(!orphans.empty()); const auto& frontRange = orphans.front().range; range.emplace(frontRange.getMin().getOwned(), frontRange.getMax().getOwned()); notification = orphans.front().notification; } invariant(range); if (writeOpLog) { // Secondaries will watch for this update, and kill any queries that may depend on // documents in the range -- excepting any queries with a read-concern option // 'ignoreChunkMigration' try { AutoGetCollection autoAdmin( opCtx, NamespaceString::kServerConfigurationNamespace, MODE_IX); Helpers::upsert(opCtx, NamespaceString::kServerConfigurationNamespace.ns(), BSON("_id" << "startRangeDeletion" << "ns" << nss.ns() << "epoch" << epoch << "min" << range->getMin() << "max" << range->getMax())); } catch (const DBException& e) { stdx::lock_guard<stdx::mutex> scopedLock(csr->_metadataManager->_managerLock); csr->_metadataManager->_clearAllCleanups( scopedLock, e.toStatus("cannot push startRangeDeletion record to Op Log," " abandoning scheduled range deletions")); return boost::none; } } const auto scopedCollectionMetadata = metadataManager->getActiveMetadata(metadataManager, boost::none); const auto& metadata = *scopedCollectionMetadata; try { wrote = self->_doDeletion( opCtx, collection, metadata->getKeyPattern(), *range, maxToDelete); } catch (const DBException& e) { wrote = e.toStatus(); warning() << e.what(); } } // drop autoColl if (!wrote.isOK() || wrote.getValue() == 0) { if (wrote.isOK()) { LOG(0) << "No documents remain to delete in " << nss << " range " << redact(range->toString()); } // Wait for majority replication even when wrote isn't OK or == 0, because it might have // been OK and/or > 0 previously, and the deletions must be persistent before notifying // clients in _pop(). LOG(0) << "Waiting for majority replication of local deletions in " << nss.ns() << " range " << redact(range->toString()); repl::ReplClientInfo::forClient(opCtx->getClient()).setLastOpToSystemLastOpTime(opCtx); const auto clientOpTime = repl::ReplClientInfo::forClient(opCtx->getClient()).getLastOp(); // Wait for replication outside the lock const auto replicationStatus = [&] { try { WriteConcernResult unusedWCResult; return waitForWriteConcern( opCtx, clientOpTime, kMajorityWriteConcern, &unusedWCResult); } catch (const DBException& e) { return e.toStatus(); } }(); // Get the lock again to finish off this range (including notifying, if necessary). // Don't allow lock interrupts while cleaning up. UninterruptibleLockGuard noInterrupt(opCtx->lockState()); AutoGetCollection autoColl(opCtx, nss, MODE_IX); auto* const collection = autoColl.getCollection(); auto* const csr = CollectionShardingRuntime::get(opCtx, nss); auto& metadataManager = csr->_metadataManager; if (!_checkCollectionMetadataStillValid( opCtx, nss, epoch, forTestOnly, collection, metadataManager)) { return boost::none; } auto* const self = forTestOnly ? forTestOnly : &metadataManager->_rangesToClean; stdx::lock_guard<stdx::mutex> scopedLock(csr->_metadataManager->_managerLock); if (!replicationStatus.isOK()) { LOG(0) << "Error when waiting for write concern after removing " << nss << " range " << redact(range->toString()) << " : " << redact(replicationStatus.reason()); // If range were already popped (e.g. by dropping nss during the waitForWriteConcern // above) its notification would have been triggered, so this check suffices to ensure // that it is safe to pop the range here if (!notification.ready()) { invariant(!self->isEmpty() && self->_orphans.front().notification == notification); LOG(0) << "Abandoning deletion of latest range in " << nss.ns() << " after local " << "deletions because of replication failure"; self->_pop(replicationStatus); } } else { LOG(0) << "Finished deleting documents in " << nss.ns() << " range " << redact(range->toString()); self->_pop(wrote.getStatus()); } if (!self->_orphans.empty()) { LOG(1) << "Deleting " << nss.ns() << " range " << redact(self->_orphans.front().range.toString()) << " next."; } return Date_t::now() + Milliseconds(rangeDeleterBatchDelayMS.load()); } invariant(range); invariant(wrote.getStatus()); invariant(wrote.getValue() > 0); notification.abandon(); return Date_t::now() + Milliseconds(rangeDeleterBatchDelayMS.load()); }