Пример #1
0
void DatabasesCloner::_onListDatabaseFinish(
    const executor::TaskExecutor::RemoteCommandCallbackArgs& cbd) {
    Status respStatus = cbd.response.status;
    if (respStatus.isOK()) {
        respStatus = getStatusFromCommandResult(cbd.response.data);
    }

    UniqueLock lk(_mutex);
    if (!respStatus.isOK()) {
        LOG(1) << "'listDatabases' failed: " << respStatus;
        _fail_inlock(&lk, respStatus);
        return;
    }

    // There should not be any cloners yet.
    invariant(_databaseCloners.size() == 0);
    const auto respBSON = cbd.response.data;

    auto databasesArray = _parseListDatabasesResponse(respBSON);
    if (!databasesArray.isOK()) {
        LOG(1) << "'listDatabases' returned a malformed response: "
               << databasesArray.getStatus().toString();
        _fail_inlock(&lk, databasesArray.getStatus());
        return;
    }

    auto dbsArray = databasesArray.getValue();
    // Ensure that the 'admin' database is the first element in the array of databases so that it
    // will be the first to be cloned. This allows users to authenticate against a database while
    // initial sync is occurring.
    _setAdminAsFirst(dbsArray);

    for (BSONElement arrayElement : dbsArray) {
        const BSONObj dbBSON = arrayElement.Obj();

        // Check to see if we want to exclude this db from the clone.
        if (!_includeDbFn(dbBSON)) {
            LOG(1) << "Excluding database from the 'listDatabases' response: " << dbBSON;
            continue;
        }

        if (!dbBSON.hasField("name")) {
            LOG(1) << "Excluding database due to the 'listDatabases' response not containing a "
                      "'name' field for this entry: "
                   << dbBSON;
        }

        const std::string dbName = dbBSON["name"].str();
        std::shared_ptr<DatabaseCloner> dbCloner{nullptr};

        // filters for DatabasesCloner.
        const auto collectionFilterPred = [dbName](const BSONObj& collInfo) {
            const auto collName = collInfo["name"].str();
            const NamespaceString ns(dbName, collName);
            if (ns.isSystem() && !ns.isLegalClientSystemNS()) {
                LOG(1) << "Skipping 'system' collection: " << ns.ns();
                return false;
            }
            if (!ns.isNormal()) {
                LOG(1) << "Skipping non-normal collection: " << ns.ns();
                return false;
            }

            LOG(2) << "Allowing cloning of collectionInfo: " << collInfo;
            return true;
        };
        const auto onCollectionFinish = [](const Status& status, const NamespaceString& srcNss) {
            if (status.isOK()) {
                LOG(1) << "collection clone finished: " << srcNss;
            } else {
                error() << "collection clone for '" << srcNss << "' failed due to "
                        << status.toString();
            }
        };
        const auto onDbFinish = [this, dbName](const Status& status) {
            _onEachDBCloneFinish(status, dbName);
        };
        Status startStatus = Status::OK();
        try {
            dbCloner.reset(new DatabaseCloner(
                _exec,
                _dbWorkThreadPool,
                _source,
                dbName,
                BSONObj(),  // do not filter collections out during listCollections call.
                collectionFilterPred,
                _storage,  // use storage provided.
                onCollectionFinish,
                onDbFinish));
            if (_scheduleDbWorkFn) {
                dbCloner->setScheduleDbWorkFn_forTest(_scheduleDbWorkFn);
            }
            if (_startCollectionClonerFn) {
                dbCloner->setStartCollectionClonerFn(_startCollectionClonerFn);
            }
            // Start first database cloner.
            if (_databaseCloners.empty()) {
                startStatus = dbCloner->startup();
            }
        } catch (...) {
            startStatus = exceptionToStatus();
        }

        if (!startStatus.isOK()) {
            std::string err = str::stream() << "could not create cloner for database: " << dbName
                                            << " due to: " << startStatus.toString();
            _setStatus_inlock({ErrorCodes::InitialSyncFailure, err});
            error() << err;
            break;  // exit for_each loop
        }

        // add cloner to list.
        _databaseCloners.push_back(dbCloner);
    }
    if (_databaseCloners.size() == 0) {
        if (_status.isOK()) {
            _succeed_inlock(&lk);
        } else {
            _fail_inlock(&lk, _status);
        }
    }
}
Пример #2
0
void BackgroundSync::_produce(
    OperationContext* txn,
    ReplicationCoordinatorExternalState* replicationCoordinatorExternalState) {
    // this oplog reader does not do a handshake because we don't want the server it's syncing
    // from to track how far it has synced
    {
        stdx::unique_lock<stdx::mutex> lock(_mutex);
        if (_lastOpTimeFetched.isNull()) {
            // then we're initial syncing and we're still waiting for this to be set
            lock.unlock();
            sleepsecs(1);
            // if there is no one to sync from
            return;
        }

        if (_replCoord->isWaitingForApplierToDrain() || _replCoord->getMemberState().primary() ||
            inShutdownStrict()) {
            return;
        }
    }

    while (MONGO_FAIL_POINT(rsBgSyncProduce)) {
        sleepmillis(0);
    }


    // find a target to sync from the last optime fetched
    OpTime lastOpTimeFetched;
    HostAndPort source;
    {
        stdx::unique_lock<stdx::mutex> lock(_mutex);
        lastOpTimeFetched = _lastOpTimeFetched;
        _syncSourceHost = HostAndPort();
    }
    SyncSourceResolverResponse syncSourceResp =
        _syncSourceResolver.findSyncSource(txn, lastOpTimeFetched);

    if (syncSourceResp.syncSourceStatus == ErrorCodes::OplogStartMissing) {
        // All (accessible) sync sources were too stale.
        error() << "too stale to catch up -- entering maintenance mode";
        log() << "Our newest OpTime : " << lastOpTimeFetched;
        log() << "Earliest OpTime available is " << syncSourceResp.earliestOpTimeSeen;
        log() << "See http://dochub.mongodb.org/core/resyncingaverystalereplicasetmember";
        StorageInterface::get(txn)
            ->setMinValid(txn, {lastOpTimeFetched, syncSourceResp.earliestOpTimeSeen});
        auto status = _replCoord->setMaintenanceMode(true);
        if (!status.isOK()) {
            warning() << "Failed to transition into maintenance mode.";
        }
        bool worked = _replCoord->setFollowerMode(MemberState::RS_RECOVERING);
        if (!worked) {
            warning() << "Failed to transition into " << MemberState(MemberState::RS_RECOVERING)
                      << ". Current state: " << _replCoord->getMemberState();
        }
        return;
    } else if (syncSourceResp.isOK() && !syncSourceResp.getSyncSource().empty()) {
        stdx::lock_guard<stdx::mutex> lock(_mutex);
        _syncSourceHost = syncSourceResp.getSyncSource();
        source = _syncSourceHost;
    } else {
        if (!syncSourceResp.isOK()) {
            log() << "failed to find sync source, received error "
                  << syncSourceResp.syncSourceStatus.getStatus();
        }
        // No sync source found.
        sleepsecs(1);
        return;
    }

    long long lastHashFetched;
    {
        stdx::lock_guard<stdx::mutex> lock(_mutex);
        if (_stopped) {
            return;
        }
        lastOpTimeFetched = _lastOpTimeFetched;
        lastHashFetched = _lastFetchedHash;
        _replCoord->signalUpstreamUpdater();
    }

    // "lastFetched" not used. Already set in _enqueueDocuments.
    Status fetcherReturnStatus = Status::OK();
    DataReplicatorExternalStateBackgroundSync dataReplicatorExternalState(
        _replCoord, replicationCoordinatorExternalState, this);
    OplogFetcher* oplogFetcher;
    try {
        auto config = _replCoord->getConfig();
        auto onOplogFetcherShutdownCallbackFn =
            [&fetcherReturnStatus](const Status& status, const OpTimeWithHash& lastFetched) {
                fetcherReturnStatus = status;
            };

        stdx::lock_guard<stdx::mutex> lock(_mutex);
        _oplogFetcher =
            stdx::make_unique<OplogFetcher>(&_threadPoolTaskExecutor,
                                            OpTimeWithHash(lastHashFetched, lastOpTimeFetched),
                                            source,
                                            NamespaceString(rsOplogName),
                                            config,
                                            &dataReplicatorExternalState,
                                            stdx::bind(&BackgroundSync::_enqueueDocuments,
                                                       this,
                                                       stdx::placeholders::_1,
                                                       stdx::placeholders::_2,
                                                       stdx::placeholders::_3,
                                                       stdx::placeholders::_4),
                                            onOplogFetcherShutdownCallbackFn);
        oplogFetcher = _oplogFetcher.get();
    } catch (const mongo::DBException& ex) {
        fassertFailedWithStatus(34440, exceptionToStatus());
    }

    LOG(1) << "scheduling fetcher to read remote oplog on " << _syncSourceHost << " starting at "
           << oplogFetcher->getCommandObject_forTest()["filter"];
    auto scheduleStatus = oplogFetcher->startup();
    if (!scheduleStatus.isOK()) {
        warning() << "unable to schedule fetcher to read remote oplog on " << source << ": "
                  << scheduleStatus;
        return;
    }

    oplogFetcher->join();
    LOG(1) << "fetcher stopped reading remote oplog on " << source;

    // If the background sync is stopped after the fetcher is started, we need to
    // re-evaluate our sync source and oplog common point.
    if (isStopped()) {
        return;
    }

    if (fetcherReturnStatus.code() == ErrorCodes::OplogOutOfOrder) {
        // This is bad because it means that our source
        // has not returned oplog entries in ascending ts order, and they need to be.

        warning() << fetcherReturnStatus.toString();
        // Do not blacklist the server here, it will be blacklisted when we try to reuse it,
        // if it can't return a matching oplog start from the last fetch oplog ts field.
        return;
    } else if (fetcherReturnStatus.code() == ErrorCodes::OplogStartMissing ||
               fetcherReturnStatus.code() == ErrorCodes::RemoteOplogStale) {
        // Rollback is a synchronous operation that uses the task executor and may not be
        // executed inside the fetcher callback.
        const int messagingPortTags = 0;
        ConnectionPool connectionPool(messagingPortTags);
        std::unique_ptr<ConnectionPool::ConnectionPtr> connection;
        auto getConnection = [&connection, &connectionPool, source]() -> DBClientBase* {
            if (!connection.get()) {
                connection.reset(new ConnectionPool::ConnectionPtr(
                    &connectionPool, source, Date_t::now(), kOplogSocketTimeout));
            };
            return connection->get();
        };

        {
            stdx::lock_guard<stdx::mutex> lock(_mutex);
            lastOpTimeFetched = _lastOpTimeFetched;
        }

        log() << "Starting rollback due to " << fetcherReturnStatus;

        // Wait till all buffered oplog entries have drained and been applied.
        auto lastApplied = _replCoord->getMyLastAppliedOpTime();
        if (lastApplied != lastOpTimeFetched) {
            log() << "Waiting for all operations from " << lastApplied << " until "
                  << lastOpTimeFetched << " to be applied before starting rollback.";
            while (lastOpTimeFetched > (lastApplied = _replCoord->getMyLastAppliedOpTime())) {
                sleepmillis(10);
                if (isStopped() || inShutdown()) {
                    return;
                }
            }
        }
        // check that we are at minvalid, otherwise we cannot roll back as we may be in an
        // inconsistent state
        BatchBoundaries boundaries = StorageInterface::get(txn)->getMinValid(txn);
        if (!boundaries.start.isNull() || boundaries.end > lastApplied) {
            fassertNoTrace(18750,
                           Status(ErrorCodes::UnrecoverableRollbackError,
                                  str::stream()
                                      << "need to rollback, but in inconsistent state. "
                                      << "minvalid: " << boundaries.end.toString()
                                      << " > our last optime: " << lastApplied.toString()));
        }

        _rollback(txn, source, getConnection);
        stop();
    } else if (fetcherReturnStatus == ErrorCodes::InvalidBSON) {
        Seconds blacklistDuration(60);
        warning() << "Fetcher got invalid BSON while querying oplog. Blacklisting sync source "
                  << source << " for " << blacklistDuration << ".";
        _replCoord->blacklistSyncSource(source, Date_t::now() + blacklistDuration);
    } else if (!fetcherReturnStatus.isOK()) {
        warning() << "Fetcher error querying oplog: " << fetcherReturnStatus.toString();
    }
}
Пример #3
0
Status verifySystemIndexes(OperationContext* opCtx) {
    const NamespaceString& systemUsers = AuthorizationManager::usersCollectionNamespace;
    const NamespaceString& systemRoles = AuthorizationManager::rolesCollectionNamespace;

    AutoGetDb autoDb(opCtx, systemUsers.db(), MODE_X);
    if (!autoDb.getDb()) {
        return Status::OK();
    }

    Collection* collection = autoDb.getDb()->getCollection(opCtx, systemUsers);
    if (collection) {
        IndexCatalog* indexCatalog = collection->getIndexCatalog();
        invariant(indexCatalog);

        // Make sure the old unique index from v2.4 on system.users doesn't exist.
        std::vector<IndexDescriptor*> indexes;
        indexCatalog->findIndexesByKeyPattern(opCtx, v1SystemUsersKeyPattern, false, &indexes);

        if (!indexes.empty()) {
            fassert(ErrorCodes::AmbiguousIndexKeyPattern, indexes.size() == 1);
            return Status(ErrorCodes::AuthSchemaIncompatible,
                          "Old 2.4 style user index identified. "
                          "The authentication schema needs to be updated by "
                          "running authSchemaUpgrade on a 2.6 server.");
        }

        // Ensure that system indexes exist for the user collection
        indexCatalog->findIndexesByKeyPattern(opCtx, v3SystemUsersKeyPattern, false, &indexes);
        if (indexes.empty()) {
            try {
                generateSystemIndexForExistingCollection(
                    opCtx, collection, systemUsers, v3SystemUsersIndexSpec);
            } catch (...) {
                return exceptionToStatus();
            }
        }
    }

    // Ensure that system indexes exist for the roles collection, if it exists.
    collection = autoDb.getDb()->getCollection(opCtx, systemRoles);
    if (collection) {
        IndexCatalog* indexCatalog = collection->getIndexCatalog();
        invariant(indexCatalog);

        std::vector<IndexDescriptor*> indexes;
        indexCatalog->findIndexesByKeyPattern(opCtx, v3SystemRolesKeyPattern, false, &indexes);
        if (indexes.empty()) {
            try {
                generateSystemIndexForExistingCollection(
                    opCtx, collection, systemRoles, v3SystemRolesIndexSpec);
            } catch (...) {
                return exceptionToStatus();
            }
        }
    }

    // Ensure that system indexes exist for the sessions collection, if it exists.
    collection = autoDb.getDb()->getCollection(opCtx, sessionCollectionNamespace);
    if (collection) {
        IndexCatalog* indexCatalog = collection->getIndexCatalog();
        invariant(indexCatalog);

        std::vector<IndexDescriptor*> indexes;
        indexCatalog->findIndexesByKeyPattern(opCtx, v1SystemSessionsKeyPattern, false, &indexes);
        if (indexes.empty()) {
            try {
                generateSystemIndexForExistingCollection(
                    opCtx, collection, sessionCollectionNamespace, v1SystemSessionsIndexSpec);
            } catch (...) {
                return exceptionToStatus();
            }
        }
    }

    return Status::OK();
}