OpTime ReplicationCoordinatorExternalStateImpl::onTransitionToPrimary(OperationContext* txn,
                                                                      bool isV1ElectionProtocol) {
    invariant(txn->lockState()->isW());

    // Clear the appliedThrough marker so on startup we'll use the top of the oplog. This must be
    // done before we add anything to our oplog.
    invariant(_storageInterface->getOplogDeleteFromPoint(txn).isNull());
    _storageInterface->setAppliedThrough(txn, {});

    if (isV1ElectionProtocol) {
        MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
            ScopedTransaction scopedXact(txn, MODE_X);

            WriteUnitOfWork wuow(txn);
            txn->getClient()->getServiceContext()->getOpObserver()->onOpMessage(
                txn,
                BSON("msg"
                     << "new primary"));
            wuow.commit();
        }
        MONGO_WRITE_CONFLICT_RETRY_LOOP_END(
            txn, "logging transition to primary to oplog", "local.oplog.rs");
    }
    const auto opTimeToReturn = fassertStatusOK(28665, loadLastOpTime(txn));

    _shardingOnTransitionToPrimaryHook(txn);
    _dropAllTempCollections(txn);

    return opTimeToReturn;
}
OpTime ReplicationCoordinatorExternalStateImpl::onTransitionToPrimary(OperationContext* opCtx,
                                                                      bool isV1ElectionProtocol) {
    invariant(opCtx->lockState()->isW());

    // Clear the appliedThrough marker so on startup we'll use the top of the oplog. This must be
    // done before we add anything to our oplog.
    // We record this update at the 'lastAppliedOpTime'. If there are any outstanding
    // checkpoints being taken, they should only reflect this write if they see all writes up
    // to our 'lastAppliedOpTime'.
    invariant(
        _replicationProcess->getConsistencyMarkers()->getOplogTruncateAfterPoint(opCtx).isNull());
    auto lastAppliedOpTime = repl::ReplicationCoordinator::get(opCtx)->getMyLastAppliedOpTime();
    _replicationProcess->getConsistencyMarkers()->clearAppliedThrough(
        opCtx, lastAppliedOpTime.getTimestamp());

    if (isV1ElectionProtocol) {
        writeConflictRetry(opCtx, "logging transition to primary to oplog", "local.oplog.rs", [&] {
            WriteUnitOfWork wuow(opCtx);
            opCtx->getClient()->getServiceContext()->getOpObserver()->onOpMessage(
                opCtx,
                BSON("msg"
                     << "new primary"));
            wuow.commit();
        });
    }
    const auto opTimeToReturn = fassertStatusOK(28665, loadLastOpTime(opCtx));

    _shardingOnTransitionToPrimaryHook(opCtx);
    _dropAllTempCollections(opCtx);

    serverGlobalParams.validateFeaturesAsMaster.store(true);

    return opTimeToReturn;
}
void ReplicationCoordinatorExternalStateImpl::shutdown(OperationContext* txn) {
    UniqueLock lk(_threadMutex);
    if (_startedThreads) {
        _stopDataReplication_inlock(txn, &lk);

        if (_snapshotThread) {
            log() << "Stopping replication snapshot thread";
            _snapshotThread->shutdown();
        }

        if (_storageInterface->getOplogDeleteFromPoint(txn).isNull() &&
            loadLastOpTime(txn) == _storageInterface->getAppliedThrough(txn)) {
            // Clear the appliedThrough marker to indicate we are consistent with the top of the
            // oplog.
            _storageInterface->setAppliedThrough(txn, {});
        }

        if (_noopWriter) {
            LOG(1) << "Stopping noop writer";
            _noopWriter->stopWritingPeriodicNoops();
        }

        log() << "Stopping replication storage threads";
        _taskExecutor->shutdown();
        _taskExecutor->join();
        _storageInterface->shutdown();
    }
}
void ReplicationCoordinatorExternalStateImpl::shutdown(OperationContext* opCtx) {
    UniqueLock lk(_threadMutex);
    if (!_startedThreads) {
        return;
    }

    _inShutdown = true;
    _stopDataReplication_inlock(opCtx, &lk);

    if (_noopWriter) {
        LOG(1) << "Stopping noop writer";
        _noopWriter->stopWritingPeriodicNoops();
    }

    log() << "Stopping replication storage threads";
    _taskExecutor->shutdown();
    _taskExecutor->join();
    lk.unlock();

    // Perform additional shutdown steps below that must be done outside _threadMutex.

    if (_replicationProcess->getConsistencyMarkers()->getOplogTruncateAfterPoint(opCtx).isNull() &&
        loadLastOpTime(opCtx) ==
            _replicationProcess->getConsistencyMarkers()->getAppliedThrough(opCtx)) {
        // Clear the appliedThrough marker to indicate we are consistent with the top of the
        // oplog. We record this update at the 'lastAppliedOpTime'. If there are any outstanding
        // checkpoints being taken, they should only reflect this write if they see all writes up
        // to our 'lastAppliedOpTime'.
        auto lastAppliedOpTime = repl::ReplicationCoordinator::get(opCtx)->getMyLastAppliedOpTime();
        _replicationProcess->getConsistencyMarkers()->clearAppliedThrough(
            opCtx, lastAppliedOpTime.getTimestamp());
    }
}
void ReplicationCoordinatorExternalStateImpl::cleanUpLastApplyBatch(OperationContext* txn) {
    if (_storageInterface->getInitialSyncFlag(txn)) {
        return;  // Initial Sync will take over so no cleanup is needed.
    }

    const auto deleteFromPoint = _storageInterface->getOplogDeleteFromPoint(txn);
    const auto appliedThrough = _storageInterface->getAppliedThrough(txn);

    const bool needToDeleteEndOfOplog = !deleteFromPoint.isNull() &&
        // This version should never have a non-null deleteFromPoint with a null appliedThrough.
        // This scenario means that we downgraded after unclean shutdown, then the downgraded node
        // deleted the ragged end of our oplog, then did a clean shutdown.
        !appliedThrough.isNull() &&
        // Similarly we should never have an appliedThrough higher than the deleteFromPoint. This
        // means that the downgraded node deleted our ragged end then applied ahead of our
        // deleteFromPoint and then had an unclean shutdown before upgrading. We are ok with
        // applying these ops because older versions wrote to the oplog from a single thread so we
        // know they are in order.
        !(appliedThrough.getTimestamp() >= deleteFromPoint);
    if (needToDeleteEndOfOplog) {
        log() << "Removing unapplied entries starting at: " << deleteFromPoint;
        truncateOplogTo(txn, deleteFromPoint);
    }
    _storageInterface->setOplogDeleteFromPoint(txn, {});  // clear the deleteFromPoint

    if (appliedThrough.isNull()) {
        // No follow-up work to do.
        return;
    }

    // Check if we have any unapplied ops in our oplog. It is important that this is done after
    // deleting the ragged end of the oplog.
    const auto topOfOplog = fassertStatusOK(40290, loadLastOpTime(txn));
    if (topOfOplog >= appliedThrough) {
        return;  // We've applied all the valid oplog we have.
    }

    log() << "Replaying stored operations from " << appliedThrough << " (exclusive) to "
          << topOfOplog << " (inclusive).";

    DBDirectClient db(txn);
    auto cursor = db.query(rsOplogName,
                           QUERY("ts" << BSON("$gte" << appliedThrough.getTimestamp())),
                           /*batchSize*/ 0,
                           /*skip*/ 0,
                           /*projection*/ nullptr,
                           QueryOption_OplogReplay);

    // Check that the first document matches our appliedThrough point then skip it since it's
    // already been applied.
    if (!cursor->more()) {
        // This should really be impossible because we check above that the top of the oplog is
        // strictly > appliedThrough. If this fails it represents a serious bug in either the
        // storage engine or query's implementation of OplogReplay.
        severe() << "Couldn't find any entries in the oplog >= " << appliedThrough
                 << " which should be impossible.";
        fassertFailedNoTrace(40293);
    }
    auto firstOpTimeFound = fassertStatusOK(40291, OpTime::parseFromOplogEntry(cursor->nextSafe()));
    if (firstOpTimeFound != appliedThrough) {
        severe() << "Oplog entry at " << appliedThrough << " is missing; actual entry found is "
                 << firstOpTimeFound;
        fassertFailedNoTrace(40292);
    }

    // Apply remaining ops one at at time, but don't log them because they are already logged.
    const bool wereWritesReplicated = txn->writesAreReplicated();
    ON_BLOCK_EXIT([&] { txn->setReplicatedWrites(wereWritesReplicated); });
    txn->setReplicatedWrites(false);

    while (cursor->more()) {
        auto entry = cursor->nextSafe();
        fassertStatusOK(40294, SyncTail::syncApply(txn, entry, true));
        _storageInterface->setAppliedThrough(
            txn, fassertStatusOK(40295, OpTime::parseFromOplogEntry(entry)));
    }
}