void BackgroundSync::_rollback(OperationContext* txn, const HostAndPort& source, stdx::function<DBClientBase*()> getConnection) { // Abort only when syncRollback detects we are in a unrecoverable state. // In other cases, we log the message contained in the error status and retry later. auto status = syncRollback(txn, OplogInterfaceLocal(txn, rsOplogName), RollbackSourceImpl(getConnection, source, rsOplogName), _replCoord); if (status.isOK()) { // When the syncTail thread sees there is no new data by adding something to the buffer. _signalNoNewDataForApplier(txn); // Wait until the buffer is empty. // This is an indication that syncTail has removed the sentinal marker from the buffer // and reset its local lastAppliedOpTime via the replCoord. while (!_oplogBuffer->isEmpty()) { sleepmillis(10); if (inShutdown()) { return; } } // At this point we are about to leave rollback. Before we do, wait for any writes done // as part of rollback to be durable, and then do any necessary checks that we didn't // wind up rolling back something illegal. We must wait for the rollback to be durable // so that if we wind up shutting down uncleanly in response to something we rolled back // we know that we won't wind up right back in the same situation when we start back up // because the rollback wasn't durable. txn->recoveryUnit()->waitUntilDurable(); // If we detected that we rolled back the shardIdentity document as part of this rollback // then we must shut down to clear the in-memory ShardingState associated with the // shardIdentity document. if (ShardIdentityRollbackNotifier::get(txn)->didRollbackHappen()) { severe() << "shardIdentity document rollback detected. Shutting down to clear " "in-memory sharding state. Restarting this process should safely return it " "to a healthy state"; fassertFailedNoTrace(40276); } // It is now safe to clear the ROLLBACK state, which may result in the applier thread // transitioning to SECONDARY. This is safe because the applier thread has now reloaded // the new rollback minValid from the database. if (!_replCoord->setFollowerMode(MemberState::RS_RECOVERING)) { warning() << "Failed to transition into " << MemberState(MemberState::RS_RECOVERING) << "; expected to be in state " << MemberState(MemberState::RS_ROLLBACK) << " but found self in " << _replCoord->getMemberState(); } return; } if (ErrorCodes::UnrecoverableRollbackError == status.code()) { fassertNoTrace(28723, status); } warning() << "rollback cannot proceed at this time (retrying later): " << redact(status); }
void BackgroundSync::_rollback(OperationContext* txn, const HostAndPort& source, stdx::function<DBClientBase*()> getConnection) { // Abort only when syncRollback detects we are in a unrecoverable state. // In other cases, we log the message contained in the error status and retry later. auto status = syncRollback(txn, _replCoord->getMyLastOptime(), OplogInterfaceLocal(txn, rsOplogName), RollbackSourceImpl(getConnection, source, rsOplogName), _replCoord); if (status.isOK()) { return; } if (ErrorCodes::UnrecoverableRollbackError == status.code()) { fassertNoTrace(28723, status); } warning() << "rollback cannot proceed at this time (retrying later): " << status; }
void BackgroundSync::_rollback(OperationContext* txn, const HostAndPort& source, stdx::function<DBClientBase*()> getConnection) { // Abort only when syncRollback detects we are in a unrecoverable state. // In other cases, we log the message contained in the error status and retry later. auto status = syncRollback(txn, OplogInterfaceLocal(txn, rsOplogName), RollbackSourceImpl(getConnection, source, rsOplogName), _replCoord); if (status.isOK()) { // When the syncTail thread sees there is no new data by adding something to the buffer. _signalNoNewDataForApplier(); // Wait until the buffer is empty. // This is an indication that syncTail has removed the sentinal marker from the buffer // and reset its local lastAppliedOpTime via the replCoord. while (!_buffer.empty()) { sleepmillis(10); if (inShutdown()) { return; } } // It is now safe to clear the ROLLBACK state, which may result in the applier thread // transitioning to SECONDARY. This is safe because the applier thread has now reloaded // the new rollback minValid from the database. if (!_replCoord->setFollowerMode(MemberState::RS_RECOVERING)) { warning() << "Failed to transition into " << MemberState(MemberState::RS_RECOVERING) << "; expected to be in state " << MemberState(MemberState::RS_ROLLBACK) << " but found self in " << _replCoord->getMemberState(); } return; } if (ErrorCodes::UnrecoverableRollbackError == status.code()) { fassertNoTrace(28723, status); } warning() << "rollback cannot proceed at this time (retrying later): " << status; }
void BackgroundSync::_produce(OperationContext* txn) { while (MONGO_FAIL_POINT(pauseRsBgSyncProducer)) { sleepmillis(0); } // this oplog reader does not do a handshake because we don't want the server it's syncing // from to track how far it has synced { stdx::unique_lock<stdx::mutex> lock(_mutex); if (_lastOpTimeFetched.isNull()) { // then we're initial syncing and we're still waiting for this to be set lock.unlock(); sleepsecs(1); // if there is no one to sync from return; } if (!_replCoord->isCatchingUp() && (_replCoord->isWaitingForApplierToDrain() || _replCoord->getMemberState().primary())) { return; } if (_inShutdown_inlock()) { return; } } // find a target to sync from the last optime fetched OpTime lastOpTimeFetched; HostAndPort source; SyncSourceResolverResponse syncSourceResp; SyncSourceResolver* syncSourceResolver; OpTime minValid; if (_replCoord->getMemberState().recovering()) { auto minValidSaved = StorageInterface::get(txn)->getMinValid(txn); if (minValidSaved > lastOpTimeFetched) { minValid = minValidSaved; } } { stdx::unique_lock<stdx::mutex> lock(_mutex); lastOpTimeFetched = _lastOpTimeFetched; _syncSourceHost = HostAndPort(); _syncSourceResolver = stdx::make_unique<SyncSourceResolver>( _replicationCoordinatorExternalState->getTaskExecutor(), _replCoord, lastOpTimeFetched, minValid, [&syncSourceResp](const SyncSourceResolverResponse& resp) { syncSourceResp = resp; }); syncSourceResolver = _syncSourceResolver.get(); } // This may deadlock if called inside the mutex because SyncSourceResolver::startup() calls // ReplicationCoordinator::chooseNewSyncSource(). ReplicationCoordinatorImpl's mutex has to // acquired before BackgroundSync's. // It is safe to call startup() outside the mutex on this instance of SyncSourceResolver because // we do not destroy this instance outside of this function. auto status = _syncSourceResolver->startup(); if (ErrorCodes::CallbackCanceled == status || ErrorCodes::isShutdownError(status.code())) { return; } fassertStatusOK(40349, status); syncSourceResolver->join(); syncSourceResolver = nullptr; { stdx::unique_lock<stdx::mutex> lock(_mutex); _syncSourceResolver.reset(); } if (syncSourceResp.syncSourceStatus == ErrorCodes::OplogStartMissing) { // All (accessible) sync sources were too stale. if (_replCoord->isCatchingUp()) { warning() << "Too stale to catch up."; log() << "Our newest OpTime : " << lastOpTimeFetched; log() << "Earliest OpTime available is " << syncSourceResp.earliestOpTimeSeen << " from " << syncSourceResp.getSyncSource(); sleepsecs(1); return; } error() << "too stale to catch up -- entering maintenance mode"; log() << "Our newest OpTime : " << lastOpTimeFetched; log() << "Earliest OpTime available is " << syncSourceResp.earliestOpTimeSeen; log() << "See http://dochub.mongodb.org/core/resyncingaverystalereplicasetmember"; auto status = _replCoord->setMaintenanceMode(true); if (!status.isOK()) { warning() << "Failed to transition into maintenance mode: " << status; } bool worked = _replCoord->setFollowerMode(MemberState::RS_RECOVERING); if (!worked) { warning() << "Failed to transition into " << MemberState(MemberState::RS_RECOVERING) << ". Current state: " << _replCoord->getMemberState(); } return; } else if (syncSourceResp.isOK() && !syncSourceResp.getSyncSource().empty()) { stdx::lock_guard<stdx::mutex> lock(_mutex); _syncSourceHost = syncSourceResp.getSyncSource(); source = _syncSourceHost; } else { if (!syncSourceResp.isOK()) { log() << "failed to find sync source, received error " << syncSourceResp.syncSourceStatus.getStatus(); } // No sync source found. sleepsecs(1); return; } long long lastHashFetched; { stdx::lock_guard<stdx::mutex> lock(_mutex); if (_stopped) { return; } lastOpTimeFetched = _lastOpTimeFetched; lastHashFetched = _lastFetchedHash; if (!_replCoord->isCatchingUp()) { _replCoord->signalUpstreamUpdater(); } } // Set the applied point if unset. This is most likely the first time we've established a sync // source since stepping down or otherwise clearing the applied point. We need to set this here, // before the OplogWriter gets a chance to append to the oplog. if (StorageInterface::get(txn)->getAppliedThrough(txn).isNull()) { StorageInterface::get(txn)->setAppliedThrough(txn, _replCoord->getMyLastAppliedOpTime()); } // "lastFetched" not used. Already set in _enqueueDocuments. Status fetcherReturnStatus = Status::OK(); DataReplicatorExternalStateBackgroundSync dataReplicatorExternalState( _replCoord, _replicationCoordinatorExternalState, this); OplogFetcher* oplogFetcher; try { auto executor = _replicationCoordinatorExternalState->getTaskExecutor(); auto config = _replCoord->getConfig(); auto onOplogFetcherShutdownCallbackFn = [&fetcherReturnStatus](const Status& status, const OpTimeWithHash& lastFetched) { fetcherReturnStatus = status; }; stdx::lock_guard<stdx::mutex> lock(_mutex); _oplogFetcher = stdx::make_unique<OplogFetcher>( executor, OpTimeWithHash(lastHashFetched, lastOpTimeFetched), source, NamespaceString(rsOplogName), config, _replicationCoordinatorExternalState->getOplogFetcherMaxFetcherRestarts(), &dataReplicatorExternalState, stdx::bind(&BackgroundSync::_enqueueDocuments, this, stdx::placeholders::_1, stdx::placeholders::_2, stdx::placeholders::_3), onOplogFetcherShutdownCallbackFn); oplogFetcher = _oplogFetcher.get(); } catch (const mongo::DBException& ex) { fassertFailedWithStatus(34440, exceptionToStatus()); } LOG(1) << "scheduling fetcher to read remote oplog on " << _syncSourceHost << " starting at " << oplogFetcher->getCommandObject_forTest()["filter"]; auto scheduleStatus = oplogFetcher->startup(); if (!scheduleStatus.isOK()) { warning() << "unable to schedule fetcher to read remote oplog on " << source << ": " << scheduleStatus; return; } oplogFetcher->join(); LOG(1) << "fetcher stopped reading remote oplog on " << source; // If the background sync is stopped after the fetcher is started, we need to // re-evaluate our sync source and oplog common point. if (isStopped()) { return; } if (fetcherReturnStatus.code() == ErrorCodes::OplogOutOfOrder) { // This is bad because it means that our source // has not returned oplog entries in ascending ts order, and they need to be. warning() << redact(fetcherReturnStatus); // Do not blacklist the server here, it will be blacklisted when we try to reuse it, // if it can't return a matching oplog start from the last fetch oplog ts field. return; } else if (fetcherReturnStatus.code() == ErrorCodes::OplogStartMissing || fetcherReturnStatus.code() == ErrorCodes::RemoteOplogStale) { if (_replCoord->isCatchingUp()) { warning() << "Rollback situation detected in catch-up mode; catch-up mode will end."; sleepsecs(1); return; } // Rollback is a synchronous operation that uses the task executor and may not be // executed inside the fetcher callback. const int messagingPortTags = 0; ConnectionPool connectionPool(messagingPortTags); std::unique_ptr<ConnectionPool::ConnectionPtr> connection; auto getConnection = [&connection, &connectionPool, source]() -> DBClientBase* { if (!connection.get()) { connection.reset(new ConnectionPool::ConnectionPtr( &connectionPool, source, Date_t::now(), kRollbackOplogSocketTimeout)); }; return connection->get(); }; { stdx::lock_guard<stdx::mutex> lock(_mutex); lastOpTimeFetched = _lastOpTimeFetched; } log() << "Starting rollback due to " << redact(fetcherReturnStatus); // Wait till all buffered oplog entries have drained and been applied. auto lastApplied = _replCoord->getMyLastAppliedOpTime(); if (lastApplied != lastOpTimeFetched) { log() << "Waiting for all operations from " << lastApplied << " until " << lastOpTimeFetched << " to be applied before starting rollback."; while (lastOpTimeFetched > (lastApplied = _replCoord->getMyLastAppliedOpTime())) { sleepmillis(10); if (isStopped() || inShutdown()) { return; } } } // check that we are at minvalid, otherwise we cannot roll back as we may be in an // inconsistent state const auto minValid = StorageInterface::get(txn)->getMinValid(txn); if (lastApplied < minValid) { fassertNoTrace(18750, Status(ErrorCodes::UnrecoverableRollbackError, str::stream() << "need to rollback, but in inconsistent state. " << "minvalid: " << minValid.toString() << " > our last optime: " << lastApplied.toString())); } _rollback(txn, source, getConnection); stop(); } else if (fetcherReturnStatus == ErrorCodes::InvalidBSON) { Seconds blacklistDuration(60); warning() << "Fetcher got invalid BSON while querying oplog. Blacklisting sync source " << source << " for " << blacklistDuration << "."; _replCoord->blacklistSyncSource(source, Date_t::now() + blacklistDuration); } else if (!fetcherReturnStatus.isOK()) { warning() << "Fetcher stopped querying remote oplog with error: " << redact(fetcherReturnStatus); } }
void BackgroundSync::_produce(OperationContext* txn) { // this oplog reader does not do a handshake because we don't want the server it's syncing // from to track how far it has synced { stdx::unique_lock<stdx::mutex> lock(_mutex); if (_lastOpTimeFetched.isNull()) { // then we're initial syncing and we're still waiting for this to be set lock.unlock(); sleepsecs(1); // if there is no one to sync from return; } if (_replCoord->isWaitingForApplierToDrain() || _replCoord->getMemberState().primary() || inShutdownStrict()) { return; } } while (MONGO_FAIL_POINT(rsBgSyncProduce)) { sleepmillis(0); } // find a target to sync from the last optime fetched OpTime lastOpTimeFetched; HostAndPort source; { stdx::unique_lock<stdx::mutex> lock(_mutex); lastOpTimeFetched = _lastOpTimeFetched; _syncSourceHost = HostAndPort(); } SyncSourceResolverResponse syncSourceResp = _syncSourceResolver.findSyncSource(txn, lastOpTimeFetched); if (syncSourceResp.syncSourceStatus == ErrorCodes::OplogStartMissing) { // All (accessible) sync sources were too stale. error() << "too stale to catch up -- entering maintenance mode"; log() << "Our newest OpTime : " << lastOpTimeFetched; log() << "Earliest OpTime available is " << syncSourceResp.earliestOpTimeSeen; log() << "See http://dochub.mongodb.org/core/resyncingaverystalereplicasetmember"; setMinValid(txn, {lastOpTimeFetched, syncSourceResp.earliestOpTimeSeen}); auto status = _replCoord->setMaintenanceMode(true); if (!status.isOK()) { warning() << "Failed to transition into maintenance mode."; } bool worked = _replCoord->setFollowerMode(MemberState::RS_RECOVERING); if (!worked) { warning() << "Failed to transition into " << MemberState(MemberState::RS_RECOVERING) << ". Current state: " << _replCoord->getMemberState(); } return; } else if (syncSourceResp.isOK() && !syncSourceResp.getSyncSource().empty()) { stdx::lock_guard<stdx::mutex> lock(_mutex); _syncSourceHost = syncSourceResp.getSyncSource(); source = _syncSourceHost; } else { if (!syncSourceResp.isOK()) { log() << "failed to find sync source, received error " << syncSourceResp.syncSourceStatus.getStatus(); } // No sync source found. sleepsecs(1); return; } long long lastHashFetched; { stdx::lock_guard<stdx::mutex> lock(_mutex); if (_stopped) { return; } lastOpTimeFetched = _lastOpTimeFetched; lastHashFetched = _lastFetchedHash; _replCoord->signalUpstreamUpdater(); } const auto isV1ElectionProtocol = _replCoord->isV1ElectionProtocol(); // Under protocol version 1, make the awaitData timeout (maxTimeMS) dependent on the election // timeout. This enables the sync source to communicate liveness of the primary to secondaries. // Under protocol version 0, use a default timeout of 2 seconds for awaitData. const Milliseconds fetcherMaxTimeMS( isV1ElectionProtocol ? _replCoord->getConfig().getElectionTimeoutPeriod() / 2 : Seconds(2)); Status fetcherReturnStatus = Status::OK(); auto fetcherCallback = stdx::bind(&BackgroundSync::_fetcherCallback, this, stdx::placeholders::_1, stdx::placeholders::_3, stdx::cref(source), lastOpTimeFetched, lastHashFetched, fetcherMaxTimeMS, &fetcherReturnStatus); BSONObjBuilder cmdBob; cmdBob.append("find", nsToCollectionSubstring(rsOplogName)); cmdBob.append("filter", BSON("ts" << BSON("$gte" << lastOpTimeFetched.getTimestamp()))); cmdBob.append("tailable", true); cmdBob.append("oplogReplay", true); cmdBob.append("awaitData", true); cmdBob.append("maxTimeMS", durationCount<Milliseconds>(Minutes(1))); // 1 min initial find. BSONObjBuilder metadataBob; if (isV1ElectionProtocol) { cmdBob.append("term", _replCoord->getTerm()); metadataBob.append(rpc::kReplSetMetadataFieldName, 1); } auto dbName = nsToDatabase(rsOplogName); auto cmdObj = cmdBob.obj(); auto metadataObj = metadataBob.obj(); Fetcher fetcher(&_threadPoolTaskExecutor, source, dbName, cmdObj, fetcherCallback, metadataObj, _replCoord->getConfig().getElectionTimeoutPeriod()); LOG(1) << "scheduling fetcher to read remote oplog on " << source << " starting at " << cmdObj["filter"]; auto scheduleStatus = fetcher.schedule(); if (!scheduleStatus.isOK()) { warning() << "unable to schedule fetcher to read remote oplog on " << source << ": " << scheduleStatus; return; } fetcher.wait(); LOG(1) << "fetcher stopped reading remote oplog on " << source; // If the background sync is stopped after the fetcher is started, we need to // re-evaluate our sync source and oplog common point. if (isStopped()) { return; } if (fetcherReturnStatus.code() == ErrorCodes::OplogOutOfOrder) { // This is bad because it means that our source // has not returned oplog entries in ascending ts order, and they need to be. warning() << fetcherReturnStatus.toString(); // Do not blacklist the server here, it will be blacklisted when we try to reuse it, // if it can't return a matching oplog start from the last fetch oplog ts field. return; } else if (fetcherReturnStatus.code() == ErrorCodes::OplogStartMissing || fetcherReturnStatus.code() == ErrorCodes::RemoteOplogStale) { // Rollback is a synchronous operation that uses the task executor and may not be // executed inside the fetcher callback. const int messagingPortTags = 0; ConnectionPool connectionPool(messagingPortTags); std::unique_ptr<ConnectionPool::ConnectionPtr> connection; auto getConnection = [&connection, &connectionPool, source]() -> DBClientBase* { if (!connection.get()) { connection.reset(new ConnectionPool::ConnectionPtr( &connectionPool, source, Date_t::now(), oplogSocketTimeout)); }; return connection->get(); }; { stdx::lock_guard<stdx::mutex> lock(_mutex); lastOpTimeFetched = _lastOpTimeFetched; } log() << "Starting rollback due to " << fetcherReturnStatus; // Wait till all buffered oplog entries have drained and been applied. auto lastApplied = _replCoord->getMyLastAppliedOpTime(); if (lastApplied != lastOpTimeFetched) { log() << "Waiting for all operations from " << lastApplied << " until " << lastOpTimeFetched << " to be applied before starting rollback."; while (lastOpTimeFetched > (lastApplied = _replCoord->getMyLastAppliedOpTime())) { sleepmillis(10); if (isStopped() || inShutdown()) { return; } } } // check that we are at minvalid, otherwise we cannot roll back as we may be in an // inconsistent state BatchBoundaries boundaries = getMinValid(txn); if (!boundaries.start.isNull() || boundaries.end > lastApplied) { fassertNoTrace(18750, Status(ErrorCodes::UnrecoverableRollbackError, str::stream() << "need to rollback, but in inconsistent state. " << "minvalid: " << boundaries.end.toString() << " > our last optime: " << lastApplied.toString())); } _rollback(txn, source, getConnection); stop(); } else if (fetcherReturnStatus == ErrorCodes::InvalidBSON) { Seconds blacklistDuration(60); warning() << "Fetcher got invalid BSON while querying oplog. Blacklisting sync source " << source << " for " << blacklistDuration << "."; _replCoord->blacklistSyncSource(source, Date_t::now() + blacklistDuration); } else if (!fetcherReturnStatus.isOK()) { warning() << "Fetcher error querying oplog: " << fetcherReturnStatus.toString(); } }
/** * Return whether there are non-local databases. If there was an error becauses the wrong mongod * version was used for these datafiles, a DBException with status ErrorCodes::MustDowngrade is * thrown. */ bool repairDatabasesAndCheckVersion(OperationContext* opCtx) { auto const storageEngine = opCtx->getServiceContext()->getStorageEngine(); Lock::GlobalWrite lk(opCtx); std::vector<std::string> dbNames = storageEngine->listDatabases(); // Rebuilding indexes must be done before a database can be opened, except when using repair, // which rebuilds all indexes when it is done. if (!storageGlobalParams.readOnly && !storageGlobalParams.repair) { // Determine whether this is a replica set node running in standalone mode. If we're in // repair mode, we cannot set the flag yet as it needs to open a database and look through a // collection. Rebuild the necessary indexes after setting the flag. setReplSetMemberInStandaloneMode(opCtx); rebuildIndexes(opCtx, storageEngine); } bool ensuredCollectionProperties = false; // Repair all databases first, so that we do not try to open them if they are in bad shape auto databaseHolder = DatabaseHolder::get(opCtx); if (storageGlobalParams.repair) { invariant(!storageGlobalParams.readOnly); if (MONGO_FAIL_POINT(exitBeforeDataRepair)) { log() << "Exiting because 'exitBeforeDataRepair' fail point was set."; quickExit(EXIT_ABRUPT); } // Ensure that the local database is repaired first, if it exists, so that we can open it // before any other database to be able to determine if this is a replica set node running // in standalone mode before rebuilding any indexes. auto dbNamesIt = std::find(dbNames.begin(), dbNames.end(), NamespaceString::kLocalDb); if (dbNamesIt != dbNames.end()) { std::swap(dbNames.front(), *dbNamesIt); invariant(dbNames.front() == NamespaceString::kLocalDb); } stdx::function<void(const std::string& dbName)> onRecordStoreRepair = [opCtx](const std::string& dbName) { if (dbName == NamespaceString::kLocalDb) { setReplSetMemberInStandaloneMode(opCtx); } }; for (const auto& dbName : dbNames) { LOG(1) << " Repairing database: " << dbName; fassertNoTrace(18506, repairDatabase(opCtx, storageEngine, dbName, onRecordStoreRepair)); } // All collections must have UUIDs before restoring the FCV document to a version that // requires UUIDs. uassertStatusOK(ensureCollectionProperties(opCtx, dbNames)); ensuredCollectionProperties = true; // Attempt to restore the featureCompatibilityVersion document if it is missing. NamespaceString fcvNSS(NamespaceString::kServerConfigurationNamespace); auto db = databaseHolder->getDb(opCtx, fcvNSS.db()); Collection* versionColl; BSONObj featureCompatibilityVersion; if (!db || !(versionColl = db->getCollection(opCtx, fcvNSS)) || !Helpers::findOne(opCtx, versionColl, BSON("_id" << FeatureCompatibilityVersionParser::kParameterName), featureCompatibilityVersion)) { uassertStatusOK(restoreMissingFeatureCompatibilityVersionDocument(opCtx, dbNames)); } } if (!ensuredCollectionProperties) { uassertStatusOK(ensureCollectionProperties(opCtx, dbNames)); } if (!storageGlobalParams.readOnly) { // We open the "local" database before calling hasReplSetConfigDoc() to ensure the in-memory // catalog entries for the 'kSystemReplSetNamespace' collection have been populated if the // collection exists. If the "local" database didn't exist at this point yet, then it will // be created. If the mongod is running in a read-only mode, then it is fine to not open the // "local" database and populate the catalog entries because we won't attempt to drop the // temporary collections anyway. Lock::DBLock dbLock(opCtx, NamespaceString::kSystemReplSetNamespace.db(), MODE_X); databaseHolder->openDb(opCtx, NamespaceString::kSystemReplSetNamespace.db()); } if (storageGlobalParams.repair) { if (MONGO_FAIL_POINT(exitBeforeRepairInvalidatesConfig)) { log() << "Exiting because 'exitBeforeRepairInvalidatesConfig' fail point was set."; quickExit(EXIT_ABRUPT); } // This must be done after opening the "local" database as it modifies the replica set // config. auto repairObserver = StorageRepairObserver::get(opCtx->getServiceContext()); repairObserver->onRepairDone(opCtx); if (repairObserver->isDataModified()) { warning() << "Modifications made by repair:"; const auto& mods = repairObserver->getModifications(); for (const auto& mod : mods) { warning() << " " << mod; } if (hasReplSetConfigDoc(opCtx)) { warning() << "WARNING: Repair may have modified replicated data. This node will no " "longer be able to join a replica set without a full re-sync"; } } } const repl::ReplSettings& replSettings = repl::ReplicationCoordinator::get(opCtx)->getSettings(); // On replica set members we only clear temp collections on DBs other than "local" during // promotion to primary. On pure slaves, they are only cleared when the oplog tells them // to. The local DB is special because it is not replicated. See SERVER-10927 for more // details. const bool shouldClearNonLocalTmpCollections = !(hasReplSetConfigDoc(opCtx) || replSettings.usingReplSets()); // To check whether a featureCompatibilityVersion document exists. bool fcvDocumentExists = false; // To check whether we have databases other than local. bool nonLocalDatabases = false; // Refresh list of database names to include newly-created admin, if it exists. dbNames = storageEngine->listDatabases(); for (const auto& dbName : dbNames) { if (dbName != "local") { nonLocalDatabases = true; } LOG(1) << " Recovering database: " << dbName; auto db = databaseHolder->openDb(opCtx, dbName); invariant(db); // First thing after opening the database is to check for file compatibility, // otherwise we might crash if this is a deprecated format. auto status = storageEngine->currentFilesCompatible(opCtx); if (!status.isOK()) { if (status.code() == ErrorCodes::CanRepairToDowngrade) { // Convert CanRepairToDowngrade statuses to MustUpgrade statuses to avoid logging a // potentially confusing and inaccurate message. // // TODO SERVER-24097: Log a message informing the user that they can start the // current version of mongod with --repair and then proceed with normal startup. status = {ErrorCodes::MustUpgrade, status.reason()}; } severe() << "Unable to start mongod due to an incompatibility with the data files and" " this version of mongod: " << redact(status); severe() << "Please consult our documentation when trying to downgrade to a previous" " major release"; quickExit(EXIT_NEED_UPGRADE); MONGO_UNREACHABLE; } // If the server configuration collection already contains a valid // featureCompatibilityVersion document, cache it in-memory as a server parameter. if (dbName == "admin") { if (Collection* versionColl = db->getCollection(opCtx, NamespaceString::kServerConfigurationNamespace)) { BSONObj featureCompatibilityVersion; if (Helpers::findOne( opCtx, versionColl, BSON("_id" << FeatureCompatibilityVersionParser::kParameterName), featureCompatibilityVersion)) { auto swVersion = FeatureCompatibilityVersionParser::parse(featureCompatibilityVersion); // Note this error path captures all cases of an FCV document existing, // but with any value other than "4.0" or "4.2". This includes unexpected // cases with no path forward such as the FCV value not being a string. uassert(ErrorCodes::MustDowngrade, str::stream() << "UPGRADE PROBLEM: Found an invalid " "featureCompatibilityVersion document (ERROR: " << swVersion.getStatus() << "). If the current featureCompatibilityVersion is below " "4.0, see the documentation on upgrading at " << feature_compatibility_version_documentation::kUpgradeLink << ".", swVersion.isOK()); fcvDocumentExists = true; auto version = swVersion.getValue(); serverGlobalParams.featureCompatibility.setVersion(version); FeatureCompatibilityVersion::updateMinWireVersion(); // On startup, if the version is in an upgrading or downrading state, print a // warning. if (version == ServerGlobalParams::FeatureCompatibility::Version::kUpgradingTo42) { log() << "** WARNING: A featureCompatibilityVersion upgrade did not " << "complete. " << startupWarningsLog; log() << "** The current featureCompatibilityVersion is " << FeatureCompatibilityVersionParser::toString(version) << "." << startupWarningsLog; log() << "** To fix this, use the setFeatureCompatibilityVersion " << "command to resume upgrade to 4.2." << startupWarningsLog; } else if (version == ServerGlobalParams::FeatureCompatibility::Version:: kDowngradingTo40) { log() << "** WARNING: A featureCompatibilityVersion downgrade did not " << "complete. " << startupWarningsLog; log() << "** The current featureCompatibilityVersion is " << FeatureCompatibilityVersionParser::toString(version) << "." << startupWarningsLog; log() << "** To fix this, use the setFeatureCompatibilityVersion " << "command to resume downgrade to 4.0." << startupWarningsLog; } } } } if (replSettings.usingReplSets()) { // We only care about _id indexes and drop-pending collections if we are in a replset. db->checkForIdIndexesAndDropPendingCollections(opCtx); // Ensure oplog is capped (mongodb does not guarantee order of inserts on noncapped // collections) if (db->name() == "local") { checkForCappedOplog(opCtx, db); } } if (!storageGlobalParams.readOnly && (shouldClearNonLocalTmpCollections || dbName == "local")) { db->clearTmpCollections(opCtx); } } // Fail to start up if there is no featureCompatibilityVersion document and there are non-local // databases present. if (!fcvDocumentExists && nonLocalDatabases) { severe() << "Unable to start up mongod due to missing featureCompatibilityVersion document."; severe() << "Please run with --repair to restore the document."; fassertFailedNoTrace(40652); } LOG(1) << "done repairDatabases"; return nonLocalDatabases; }