void DatabasesCloner::_onListDatabaseFinish( const executor::TaskExecutor::RemoteCommandCallbackArgs& cbd) { Status respStatus = cbd.response.status; if (respStatus.isOK()) { respStatus = getStatusFromCommandResult(cbd.response.data); } UniqueLock lk(_mutex); if (!respStatus.isOK()) { LOG(1) << "'listDatabases' failed: " << respStatus; _fail_inlock(&lk, respStatus); return; } // There should not be any cloners yet. invariant(_databaseCloners.size() == 0); const auto respBSON = cbd.response.data; auto databasesArray = _parseListDatabasesResponse(respBSON); if (!databasesArray.isOK()) { LOG(1) << "'listDatabases' returned a malformed response: " << databasesArray.getStatus().toString(); _fail_inlock(&lk, databasesArray.getStatus()); return; } auto dbsArray = databasesArray.getValue(); // Ensure that the 'admin' database is the first element in the array of databases so that it // will be the first to be cloned. This allows users to authenticate against a database while // initial sync is occurring. _setAdminAsFirst(dbsArray); for (BSONElement arrayElement : dbsArray) { const BSONObj dbBSON = arrayElement.Obj(); // Check to see if we want to exclude this db from the clone. if (!_includeDbFn(dbBSON)) { LOG(1) << "Excluding database from the 'listDatabases' response: " << dbBSON; continue; } if (!dbBSON.hasField("name")) { LOG(1) << "Excluding database due to the 'listDatabases' response not containing a " "'name' field for this entry: " << dbBSON; } const std::string dbName = dbBSON["name"].str(); std::shared_ptr<DatabaseCloner> dbCloner{nullptr}; // filters for DatabasesCloner. const auto collectionFilterPred = [dbName](const BSONObj& collInfo) { const auto collName = collInfo["name"].str(); const NamespaceString ns(dbName, collName); if (ns.isSystem() && !ns.isLegalClientSystemNS()) { LOG(1) << "Skipping 'system' collection: " << ns.ns(); return false; } if (!ns.isNormal()) { LOG(1) << "Skipping non-normal collection: " << ns.ns(); return false; } LOG(2) << "Allowing cloning of collectionInfo: " << collInfo; return true; }; const auto onCollectionFinish = [](const Status& status, const NamespaceString& srcNss) { if (status.isOK()) { LOG(1) << "collection clone finished: " << srcNss; } else { error() << "collection clone for '" << srcNss << "' failed due to " << status.toString(); } }; const auto onDbFinish = [this, dbName](const Status& status) { _onEachDBCloneFinish(status, dbName); }; Status startStatus = Status::OK(); try { dbCloner.reset(new DatabaseCloner( _exec, _dbWorkThreadPool, _source, dbName, BSONObj(), // do not filter collections out during listCollections call. collectionFilterPred, _storage, // use storage provided. onCollectionFinish, onDbFinish)); if (_scheduleDbWorkFn) { dbCloner->setScheduleDbWorkFn_forTest(_scheduleDbWorkFn); } if (_startCollectionClonerFn) { dbCloner->setStartCollectionClonerFn(_startCollectionClonerFn); } // Start first database cloner. if (_databaseCloners.empty()) { startStatus = dbCloner->startup(); } } catch (...) { startStatus = exceptionToStatus(); } if (!startStatus.isOK()) { std::string err = str::stream() << "could not create cloner for database: " << dbName << " due to: " << startStatus.toString(); _setStatus_inlock({ErrorCodes::InitialSyncFailure, err}); error() << err; break; // exit for_each loop } // add cloner to list. _databaseCloners.push_back(dbCloner); } if (_databaseCloners.size() == 0) { if (_status.isOK()) { _succeed_inlock(&lk); } else { _fail_inlock(&lk, _status); } } }
void BackgroundSync::_produce( OperationContext* txn, ReplicationCoordinatorExternalState* replicationCoordinatorExternalState) { // this oplog reader does not do a handshake because we don't want the server it's syncing // from to track how far it has synced { stdx::unique_lock<stdx::mutex> lock(_mutex); if (_lastOpTimeFetched.isNull()) { // then we're initial syncing and we're still waiting for this to be set lock.unlock(); sleepsecs(1); // if there is no one to sync from return; } if (_replCoord->isWaitingForApplierToDrain() || _replCoord->getMemberState().primary() || inShutdownStrict()) { return; } } while (MONGO_FAIL_POINT(rsBgSyncProduce)) { sleepmillis(0); } // find a target to sync from the last optime fetched OpTime lastOpTimeFetched; HostAndPort source; { stdx::unique_lock<stdx::mutex> lock(_mutex); lastOpTimeFetched = _lastOpTimeFetched; _syncSourceHost = HostAndPort(); } SyncSourceResolverResponse syncSourceResp = _syncSourceResolver.findSyncSource(txn, lastOpTimeFetched); if (syncSourceResp.syncSourceStatus == ErrorCodes::OplogStartMissing) { // All (accessible) sync sources were too stale. error() << "too stale to catch up -- entering maintenance mode"; log() << "Our newest OpTime : " << lastOpTimeFetched; log() << "Earliest OpTime available is " << syncSourceResp.earliestOpTimeSeen; log() << "See http://dochub.mongodb.org/core/resyncingaverystalereplicasetmember"; StorageInterface::get(txn) ->setMinValid(txn, {lastOpTimeFetched, syncSourceResp.earliestOpTimeSeen}); auto status = _replCoord->setMaintenanceMode(true); if (!status.isOK()) { warning() << "Failed to transition into maintenance mode."; } bool worked = _replCoord->setFollowerMode(MemberState::RS_RECOVERING); if (!worked) { warning() << "Failed to transition into " << MemberState(MemberState::RS_RECOVERING) << ". Current state: " << _replCoord->getMemberState(); } return; } else if (syncSourceResp.isOK() && !syncSourceResp.getSyncSource().empty()) { stdx::lock_guard<stdx::mutex> lock(_mutex); _syncSourceHost = syncSourceResp.getSyncSource(); source = _syncSourceHost; } else { if (!syncSourceResp.isOK()) { log() << "failed to find sync source, received error " << syncSourceResp.syncSourceStatus.getStatus(); } // No sync source found. sleepsecs(1); return; } long long lastHashFetched; { stdx::lock_guard<stdx::mutex> lock(_mutex); if (_stopped) { return; } lastOpTimeFetched = _lastOpTimeFetched; lastHashFetched = _lastFetchedHash; _replCoord->signalUpstreamUpdater(); } // "lastFetched" not used. Already set in _enqueueDocuments. Status fetcherReturnStatus = Status::OK(); DataReplicatorExternalStateBackgroundSync dataReplicatorExternalState( _replCoord, replicationCoordinatorExternalState, this); OplogFetcher* oplogFetcher; try { auto config = _replCoord->getConfig(); auto onOplogFetcherShutdownCallbackFn = [&fetcherReturnStatus](const Status& status, const OpTimeWithHash& lastFetched) { fetcherReturnStatus = status; }; stdx::lock_guard<stdx::mutex> lock(_mutex); _oplogFetcher = stdx::make_unique<OplogFetcher>(&_threadPoolTaskExecutor, OpTimeWithHash(lastHashFetched, lastOpTimeFetched), source, NamespaceString(rsOplogName), config, &dataReplicatorExternalState, stdx::bind(&BackgroundSync::_enqueueDocuments, this, stdx::placeholders::_1, stdx::placeholders::_2, stdx::placeholders::_3, stdx::placeholders::_4), onOplogFetcherShutdownCallbackFn); oplogFetcher = _oplogFetcher.get(); } catch (const mongo::DBException& ex) { fassertFailedWithStatus(34440, exceptionToStatus()); } LOG(1) << "scheduling fetcher to read remote oplog on " << _syncSourceHost << " starting at " << oplogFetcher->getCommandObject_forTest()["filter"]; auto scheduleStatus = oplogFetcher->startup(); if (!scheduleStatus.isOK()) { warning() << "unable to schedule fetcher to read remote oplog on " << source << ": " << scheduleStatus; return; } oplogFetcher->join(); LOG(1) << "fetcher stopped reading remote oplog on " << source; // If the background sync is stopped after the fetcher is started, we need to // re-evaluate our sync source and oplog common point. if (isStopped()) { return; } if (fetcherReturnStatus.code() == ErrorCodes::OplogOutOfOrder) { // This is bad because it means that our source // has not returned oplog entries in ascending ts order, and they need to be. warning() << fetcherReturnStatus.toString(); // Do not blacklist the server here, it will be blacklisted when we try to reuse it, // if it can't return a matching oplog start from the last fetch oplog ts field. return; } else if (fetcherReturnStatus.code() == ErrorCodes::OplogStartMissing || fetcherReturnStatus.code() == ErrorCodes::RemoteOplogStale) { // Rollback is a synchronous operation that uses the task executor and may not be // executed inside the fetcher callback. const int messagingPortTags = 0; ConnectionPool connectionPool(messagingPortTags); std::unique_ptr<ConnectionPool::ConnectionPtr> connection; auto getConnection = [&connection, &connectionPool, source]() -> DBClientBase* { if (!connection.get()) { connection.reset(new ConnectionPool::ConnectionPtr( &connectionPool, source, Date_t::now(), kOplogSocketTimeout)); }; return connection->get(); }; { stdx::lock_guard<stdx::mutex> lock(_mutex); lastOpTimeFetched = _lastOpTimeFetched; } log() << "Starting rollback due to " << fetcherReturnStatus; // Wait till all buffered oplog entries have drained and been applied. auto lastApplied = _replCoord->getMyLastAppliedOpTime(); if (lastApplied != lastOpTimeFetched) { log() << "Waiting for all operations from " << lastApplied << " until " << lastOpTimeFetched << " to be applied before starting rollback."; while (lastOpTimeFetched > (lastApplied = _replCoord->getMyLastAppliedOpTime())) { sleepmillis(10); if (isStopped() || inShutdown()) { return; } } } // check that we are at minvalid, otherwise we cannot roll back as we may be in an // inconsistent state BatchBoundaries boundaries = StorageInterface::get(txn)->getMinValid(txn); if (!boundaries.start.isNull() || boundaries.end > lastApplied) { fassertNoTrace(18750, Status(ErrorCodes::UnrecoverableRollbackError, str::stream() << "need to rollback, but in inconsistent state. " << "minvalid: " << boundaries.end.toString() << " > our last optime: " << lastApplied.toString())); } _rollback(txn, source, getConnection); stop(); } else if (fetcherReturnStatus == ErrorCodes::InvalidBSON) { Seconds blacklistDuration(60); warning() << "Fetcher got invalid BSON while querying oplog. Blacklisting sync source " << source << " for " << blacklistDuration << "."; _replCoord->blacklistSyncSource(source, Date_t::now() + blacklistDuration); } else if (!fetcherReturnStatus.isOK()) { warning() << "Fetcher error querying oplog: " << fetcherReturnStatus.toString(); } }
Status verifySystemIndexes(OperationContext* opCtx) { const NamespaceString& systemUsers = AuthorizationManager::usersCollectionNamespace; const NamespaceString& systemRoles = AuthorizationManager::rolesCollectionNamespace; AutoGetDb autoDb(opCtx, systemUsers.db(), MODE_X); if (!autoDb.getDb()) { return Status::OK(); } Collection* collection = autoDb.getDb()->getCollection(opCtx, systemUsers); if (collection) { IndexCatalog* indexCatalog = collection->getIndexCatalog(); invariant(indexCatalog); // Make sure the old unique index from v2.4 on system.users doesn't exist. std::vector<IndexDescriptor*> indexes; indexCatalog->findIndexesByKeyPattern(opCtx, v1SystemUsersKeyPattern, false, &indexes); if (!indexes.empty()) { fassert(ErrorCodes::AmbiguousIndexKeyPattern, indexes.size() == 1); return Status(ErrorCodes::AuthSchemaIncompatible, "Old 2.4 style user index identified. " "The authentication schema needs to be updated by " "running authSchemaUpgrade on a 2.6 server."); } // Ensure that system indexes exist for the user collection indexCatalog->findIndexesByKeyPattern(opCtx, v3SystemUsersKeyPattern, false, &indexes); if (indexes.empty()) { try { generateSystemIndexForExistingCollection( opCtx, collection, systemUsers, v3SystemUsersIndexSpec); } catch (...) { return exceptionToStatus(); } } } // Ensure that system indexes exist for the roles collection, if it exists. collection = autoDb.getDb()->getCollection(opCtx, systemRoles); if (collection) { IndexCatalog* indexCatalog = collection->getIndexCatalog(); invariant(indexCatalog); std::vector<IndexDescriptor*> indexes; indexCatalog->findIndexesByKeyPattern(opCtx, v3SystemRolesKeyPattern, false, &indexes); if (indexes.empty()) { try { generateSystemIndexForExistingCollection( opCtx, collection, systemRoles, v3SystemRolesIndexSpec); } catch (...) { return exceptionToStatus(); } } } // Ensure that system indexes exist for the sessions collection, if it exists. collection = autoDb.getDb()->getCollection(opCtx, sessionCollectionNamespace); if (collection) { IndexCatalog* indexCatalog = collection->getIndexCatalog(); invariant(indexCatalog); std::vector<IndexDescriptor*> indexes; indexCatalog->findIndexesByKeyPattern(opCtx, v1SystemSessionsKeyPattern, false, &indexes); if (indexes.empty()) { try { generateSystemIndexForExistingCollection( opCtx, collection, sessionCollectionNamespace, v1SystemSessionsIndexSpec); } catch (...) { return exceptionToStatus(); } } } return Status::OK(); }