Beispiel #1
0
void SyncTail::handleSlaveDelay(const BSONObj& lastOp) {
    ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
    int slaveDelaySecs = durationCount<Seconds>(replCoord->getSlaveDelaySecs());

    // ignore slaveDelay if the box is still initializing. once
    // it becomes secondary we can worry about it.
    if (slaveDelaySecs > 0 && replCoord->getMemberState().secondary()) {
        const Timestamp ts = lastOp["ts"].timestamp();
        long long a = ts.getSecs();
        long long b = time(0);
        long long lag = b - a;
        long long sleeptime = slaveDelaySecs - lag;
        if (sleeptime > 0) {
            uassert(12000,
                    "rs slaveDelay differential too big check clocks and systems",
                    sleeptime < 0x40000000);
            if (sleeptime < 60) {
                sleepsecs((int)sleeptime);
            } else {
                warning() << "slavedelay causing a long sleep of " << sleeptime << " seconds";
                // sleep(hours) would prevent reconfigs from taking effect & such!
                long long waitUntil = b + sleeptime;
                while (time(0) < waitUntil) {
                    sleepsecs(6);

                    // Handle reconfigs that changed the slave delay
                    if (durationCount<Seconds>(replCoord->getSlaveDelaySecs()) != slaveDelaySecs)
                        break;
                }
            }
        }
    }  // endif slaveDelay
}
Beispiel #2
0
void SyncSourceFeedback::run() {
    Client::initThread("SyncSourceFeedback");

    ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
    while (true) {  // breaks once _shutdownSignaled is true
        {
            stdx::unique_lock<stdx::mutex> lock(_mtx);
            while (!_positionChanged && !_shutdownSignaled) {
                if (_cond.wait_for(lock, _keepAliveInterval) == stdx::cv_status::timeout) {
                    break;
                }
            }

            if (_shutdownSignaled) {
                break;
            }

            _positionChanged = false;
        }

        auto txn = cc().makeOperationContext();
        MemberState state = replCoord->getMemberState();
        if (state.primary() || state.startup()) {
            _resetConnection();
            continue;
        }
        const HostAndPort target = BackgroundSync::get()->getSyncTarget();
        if (_syncTarget != target) {
            _resetConnection();
            _syncTarget = target;
        }
        if (!hasConnection()) {
            // fix connection if need be
            if (target.empty()) {
                sleepmillis(500);
                stdx::unique_lock<stdx::mutex> lock(_mtx);
                _positionChanged = true;
                continue;
            }
            if (!_connect(txn.get(), target)) {
                sleepmillis(500);
                stdx::unique_lock<stdx::mutex> lock(_mtx);
                _positionChanged = true;
                continue;
            }
        }
        Status status = updateUpstream(txn.get());
        if (!status.isOK()) {
            sleepmillis(500);
            stdx::unique_lock<stdx::mutex> lock(_mtx);
            _positionChanged = true;
        }
    }
}
    bool SyncSourceFeedback::replHandshake(OperationContext* txn) {
        ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
        if (replCoord->getMemberState().primary()) {
            // primary has no one to handshake to
            return true;
        }
        // construct a vector of handshake obj for us as well as all chained members
        std::vector<BSONObj> handshakeObjs;
        replCoord->prepareReplSetUpdatePositionCommandHandshakes(&handshakeObjs);
        LOG(1) << "handshaking upstream updater";
        for (std::vector<BSONObj>::iterator it = handshakeObjs.begin();
                it != handshakeObjs.end();
                ++it) {
            BSONObj res;
            try {
                LOG(2) << "Sending to " << _connection.get()->toString() << " the replication "
                        "handshake: " << *it;
                if (!_connection->runCommand("admin", *it, res)) {
                    std::string errMsg = res["errmsg"].valuestrsafe();
                    massert(17447, "upstream updater is not supported by the member from which we"
                            " are syncing, please update all nodes to 2.6 or later.",
                            errMsg.find("no such cmd") == std::string::npos);

                    error() << "Error while handshaking the upstream updater: "
                        << errMsg;

                    // sleep half a second if we are not in our sync source's config
                    // TODO(dannenberg) after 3.0, remove the string comparison 
                    if (res["code"].numberInt() == ErrorCodes::NodeNotFound ||
                            errMsg.find("could not be found in replica set config while attempting "
                                        "to associate it with") != std::string::npos) {

                        // black list sync target for 10 seconds and find a new one
                        replCoord->blacklistSyncSource(_syncTarget,
                                                       Date_t(curTimeMillis64() + 10*1000));
                        BackgroundSync::get()->clearSyncTarget();
                    }

                    _resetConnection();
                    return false;
                }
            }
            catch (const DBException& e) {
                log() << "SyncSourceFeedback error sending handshake: " << e.what() << endl;
                _resetConnection();
                return false;
            }
        }
        return true;
    }
    Status SyncSourceFeedback::updateUpstream(OperationContext* txn) {
        ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
        if (replCoord->getMemberState().primary()) {
            // primary has no one to update to
            return Status::OK();
        }
        BSONObjBuilder cmd;
        {
            boost::unique_lock<boost::mutex> lock(_mtx);
            if (_handshakeNeeded) {
                // Don't send updates if there are nodes that haven't yet been handshaked
                return Status(ErrorCodes::NodeNotFound,
                              "Need to send handshake before updating position upstream");
            }
            // the command could not be created, likely because the node was removed from the set
            if (!replCoord->prepareReplSetUpdatePositionCommand(&cmd)) {
                return Status::OK();
            }
        }
        BSONObj res;

        LOG(2) << "Sending slave oplog progress to upstream updater: " << cmd.done();
        try {
            _connection->runCommand("admin", cmd.obj(), res);
        }
        catch (const DBException& e) {
            log() << "SyncSourceFeedback error sending update: " << e.what() << endl;
            // blacklist sync target for .5 seconds and find a new one
            replCoord->blacklistSyncSource(_syncTarget,
                                           Date_t(curTimeMillis64() + 500));
            BackgroundSync::get()->clearSyncTarget();
            _resetConnection();
            return e.toStatus();
        }

        Status status = Command::getStatusFromCommandResult(res);
        if (!status.isOK()) {
            log() << "SyncSourceFeedback error sending update, response: " << res.toString() <<endl;
            // blacklist sync target for .5 seconds and find a new one
            replCoord->blacklistSyncSource(_syncTarget,
                                           Date_t(curTimeMillis64() + 500));
            BackgroundSync::get()->clearSyncTarget();
            _resetConnection();
        }
        return status;
    }
Beispiel #5
0
        virtual bool run(OperationContext* txn,
                         const string& dbname,
                         BSONObj& cmdObj,
                         int,
                         string& errmsg,
                         BSONObjBuilder& result,
                         bool fromRepl) {

            ScopedTransaction transaction(txn, MODE_X);
            Lock::GlobalWrite globalWriteLock(txn->lockState());

            ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
            if (getGlobalReplicationCoordinator()->getSettings().usingReplSets()) {
                const MemberState memberState = replCoord->getMemberState();
                if (memberState.startup()) {
                    return appendCommandStatus(result, Status(ErrorCodes::NotYetInitialized,
                                                              "no replication yet active"));
                }
                if (memberState.primary() ||
                        !replCoord->setFollowerMode(MemberState::RS_STARTUP2)) {
                    return appendCommandStatus(result, Status(ErrorCodes::NotSecondary,
                                                              "primaries cannot resync"));
                }
                BackgroundSync::get()->setInitialSyncRequestedFlag(true);
                return true;
            }

            // below this comment pertains only to master/slave replication
            if ( cmdObj.getBoolField( "force" ) ) {
                if ( !waitForSyncToFinish(txn, errmsg ) )
                    return false;
                replAllDead = "resync forced";
            }
            // TODO(dannenberg) replAllDead is bad and should be removed when masterslave is removed
            if (!replAllDead) {
                errmsg = "not dead, no need to resync";
                return false;
            }
            if ( !waitForSyncToFinish(txn, errmsg ) )
                return false;

            ReplSource::forceResyncDead( txn, "client" );
            result.append( "info", "triggered resync for all sources" );

            return true;
        }
Beispiel #6
0
    Status SyncSourceFeedback::updateUpstream(OperationContext* txn) {
        ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
        if (replCoord->getMemberState().primary()) {
            // primary has no one to update to
            return Status::OK();
        }
        BSONObjBuilder cmd;
        {
            stdx::unique_lock<stdx::mutex> lock(_mtx);
            // the command could not be created, likely because the node was removed from the set
            if (!replCoord->prepareReplSetUpdatePositionCommand(&cmd)) {
                return Status::OK();
            }
        }
        BSONObj res;

        LOG(2) << "Sending slave oplog progress to upstream updater: " << cmd.done();
        try {
            _connection->runCommand("admin", cmd.obj(), res);
        }
        catch (const DBException& e) {
            log() << "SyncSourceFeedback error sending update: " << e.what() << endl;
            // blacklist sync target for .5 seconds and find a new one
            replCoord->blacklistSyncSource(_syncTarget, Date_t::now() + Milliseconds(500));
            BackgroundSync::get()->clearSyncTarget();
            _resetConnection();
            return e.toStatus();
        }

        Status status = Command::getStatusFromCommandResult(res);
        if (!status.isOK()) {
            log() << "SyncSourceFeedback error sending update, response: " << res.toString() <<endl;
            // blacklist sync target for .5 seconds and find a new one, unless we were rejected due
            // to the syncsource having a newer config
            if (status != ErrorCodes::InvalidReplicaSetConfig || res["cfgver"].eoo() ||
                    res["cfgver"].numberLong() < replCoord->getConfig().getConfigVersion()) {
                replCoord->blacklistSyncSource(_syncTarget, Date_t::now() + Milliseconds(500));
                BackgroundSync::get()->clearSyncTarget();
                _resetConnection();
            }
        }

        return status;
    }
// Applies a batch of oplog entries, by using a set of threads to apply the operations and then
// writes the oplog entries to the local oplog.
OpTime SyncTail::multiApply(OperationContext* txn, const OpQueue& ops) {
    invariant(_applyFunc);

    if (getGlobalServiceContext()->getGlobalStorageEngine()->isMmapV1()) {
        // Use a ThreadPool to prefetch all the operations in a batch.
        prefetchOps(ops.getDeque(), &_prefetcherPool);
    }

    std::vector<std::vector<BSONObj>> writerVectors(replWriterThreadCount);

    fillWriterVectors(txn, ops.getDeque(), &writerVectors);
    LOG(2) << "replication batch size is " << ops.getDeque().size() << endl;
    // We must grab this because we're going to grab write locks later.
    // We hold this mutex the entire time we're writing; it doesn't matter
    // because all readers are blocked anyway.
    stdx::lock_guard<SimpleMutex> fsynclk(filesLockedFsync);

    // stop all readers until we're done
    Lock::ParallelBatchWriterMode pbwm(txn->lockState());

    ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
    if (replCoord->getMemberState().primary() && !replCoord->isWaitingForApplierToDrain()) {
        severe() << "attempting to replicate ops while primary";
        fassertFailed(28527);
    }

    applyOps(writerVectors, &_writerPool, _applyFunc, this);

    OpTime lastOpTime;
    {
        ON_BLOCK_EXIT([&] { _writerPool.join(); });
        std::vector<BSONObj> raws;
        raws.reserve(ops.getDeque().size());
        for (auto&& op : ops.getDeque()) {
            raws.emplace_back(op.raw);
        }
        lastOpTime = writeOpsToOplog(txn, raws);
        if (inShutdown()) {
            return OpTime();
        }
    }
    // We have now written all database writes and updated the oplog to match.
    return lastOpTime;
}
Beispiel #8
0
void runSyncThread() {
    Client::initThread("rsSync");
    AuthorizationSession::get(cc())->grantInternalAuthorization();
    ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();

    // Set initial indexPrefetch setting
    const std::string& prefetch = replCoord->getSettings().rsIndexPrefetch;
    if (!prefetch.empty()) {
        BackgroundSync::IndexPrefetchConfig prefetchConfig = BackgroundSync::PREFETCH_ALL;
        if (prefetch == "none")
            prefetchConfig = BackgroundSync::PREFETCH_NONE;
        else if (prefetch == "_id_only")
            prefetchConfig = BackgroundSync::PREFETCH_ID_ONLY;
        else if (prefetch == "all")
            prefetchConfig = BackgroundSync::PREFETCH_ALL;
        else {
            warning() << "unrecognized indexPrefetch setting " << prefetch << ", defaulting "
                      << "to \"all\"";
        }
        BackgroundSync::get()->setIndexPrefetchConfig(prefetchConfig);
    }

    while (!inShutdown()) {
        // After a reconfig, we may not be in the replica set anymore, so
        // check that we are in the set (and not an arbiter) before
        // trying to sync with other replicas.
        // TODO(spencer): Use a condition variable to await loading a config
        if (replCoord->getMemberState().startup()) {
            warning() << "did not receive a valid config yet";
            sleepsecs(1);
            continue;
        }

        const MemberState memberState = replCoord->getMemberState();

        // An arbiter can never transition to any other state, and doesn't replicate, ever
        if (memberState.arbiter()) {
            break;
        }

        // If we are removed then we don't belong to the set anymore
        if (memberState.removed()) {
            sleepsecs(5);
            continue;
        }

        try {
            if (memberState.primary() && !replCoord->isWaitingForApplierToDrain()) {
                sleepsecs(1);
                continue;
            }

            bool initialSyncRequested = BackgroundSync::get()->getInitialSyncRequestedFlag();
            // Check criteria for doing an initial sync:
            // 1. If the oplog is empty, do an initial sync
            // 2. If minValid has _initialSyncFlag set, do an initial sync
            // 3. If initialSyncRequested is true
            if (getGlobalReplicationCoordinator()->getMyLastOptime().isNull() ||
                getInitialSyncFlag() || initialSyncRequested) {
                syncDoInitialSync();
                continue;  // start from top again in case sync failed.
            }
            if (!replCoord->setFollowerMode(MemberState::RS_RECOVERING)) {
                continue;
            }

            /* we have some data.  continue tailing. */
            SyncTail tail(BackgroundSync::get(), multiSyncApply);
            tail.oplogApplication();
        } catch (const DBException& e) {
            log() << "Received exception while syncing: " << e.toString();
            sleepsecs(10);
        } catch (const std::exception& e) {
            log() << "Received exception while syncing: " << e.what();
            sleepsecs(10);
        }
    }
}
Beispiel #9
0
void runSyncThread() {
    Client::initThread("rsSync");
    AuthorizationSession::get(cc())->grantInternalAuthorization();
    ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();

    // Overwrite prefetch index mode in BackgroundSync if ReplSettings has a mode set.
    ReplSettings replSettings = replCoord->getSettings();
    if (replSettings.isPrefetchIndexModeSet())
        BackgroundSync::get()->setIndexPrefetchConfig(replSettings.getPrefetchIndexMode());

    while (!inShutdown()) {
        // After a reconfig, we may not be in the replica set anymore, so
        // check that we are in the set (and not an arbiter) before
        // trying to sync with other replicas.
        // TODO(spencer): Use a condition variable to await loading a config
        if (replCoord->getMemberState().startup()) {
            warning() << "did not receive a valid config yet";
            sleepsecs(1);
            continue;
        }

        const MemberState memberState = replCoord->getMemberState();

        // An arbiter can never transition to any other state, and doesn't replicate, ever
        if (memberState.arbiter()) {
            break;
        }

        // If we are removed then we don't belong to the set anymore
        if (memberState.removed()) {
            sleepsecs(5);
            continue;
        }

        try {
            if (memberState.primary() && !replCoord->isWaitingForApplierToDrain()) {
                sleepsecs(1);
                continue;
            }

            bool initialSyncRequested = BackgroundSync::get()->getInitialSyncRequestedFlag();
            // Check criteria for doing an initial sync:
            // 1. If the oplog is empty, do an initial sync
            // 2. If minValid has _initialSyncFlag set, do an initial sync
            // 3. If initialSyncRequested is true
            if (getGlobalReplicationCoordinator()->getMyLastOptime().isNull() ||
                getInitialSyncFlag() || initialSyncRequested) {
                syncDoInitialSync();
                continue;  // start from top again in case sync failed.
            }
            if (!replCoord->setFollowerMode(MemberState::RS_RECOVERING)) {
                continue;
            }

            /* we have some data.  continue tailing. */
            SyncTail tail(BackgroundSync::get(), multiSyncApply);
            tail.oplogApplication();
        } catch (...) {
            std::terminate();
        }
    }
}
    void SyncSourceFeedback::run() {
        Client::initThread("SyncSourceFeedback");
        OperationContextImpl txn;

        bool positionChanged = false;
        bool handshakeNeeded = false;
        ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
        while (!inShutdown()) { // TODO(spencer): Remove once legacy repl coordinator is gone.
            {
                boost::unique_lock<boost::mutex> lock(_mtx);
                while (!_positionChanged && !_handshakeNeeded && !_shutdownSignaled) {
                    _cond.wait(lock);
                }

                if (_shutdownSignaled) {
                    break;
                }

                positionChanged = _positionChanged;
                handshakeNeeded = _handshakeNeeded;
                _positionChanged = false;
                _handshakeNeeded = false;
            }

            MemberState state = replCoord->getMemberState();
            if (state.primary() || state.startup()) {
                _resetConnection();
                continue;
            }
            const HostAndPort target = BackgroundSync::get()->getSyncTarget();
            if (_syncTarget != target) {
                _resetConnection();
                _syncTarget = target;
            }
            if (!hasConnection()) {
                // fix connection if need be
                if (target.empty()) {
                    sleepmillis(500);
                    continue;
                }
                if (!_connect(&txn, target)) {
                    sleepmillis(500);
                    continue;
                }
                handshakeNeeded = true;
            }
            if (handshakeNeeded) {
                positionChanged = true;
                if (!replHandshake(&txn)) {
                    boost::unique_lock<boost::mutex> lock(_mtx);
                    _handshakeNeeded = true;
                    continue;
                }
            }
            if (positionChanged) {
                Status status = updateUpstream(&txn);
                if (!status.isOK()) {
                    boost::unique_lock<boost::mutex> lock(_mtx);
                    _positionChanged = true;
                    if (status == ErrorCodes::NodeNotFound) {
                        _handshakeNeeded = true;
                    }
                }
            }
        }
        cc().shutdown();
    }