예제 #1
0
    bool SyncSourceFeedback::replHandshake(OperationContext* txn) {
        // construct a vector of handshake obj for us as well as all chained members
        std::vector<BSONObj> handshakeObjs;
        getGlobalReplicationCoordinator()->prepareReplSetUpdatePositionCommandHandshakes(
                txn,
                &handshakeObjs);
        LOG(1) << "handshaking upstream updater";
        for (std::vector<BSONObj>::iterator it = handshakeObjs.begin();
                it != handshakeObjs.end();
                ++it) {
            BSONObj res;
            try {
                LOG(2) << "Sending to " << _connection.get()->toString() << " the replication "
                        "handshake: " << *it;
                if (!_connection->runCommand("admin", *it, res)) {
                    massert(17447, "upstream updater is not supported by the member from which we"
                            " are syncing, please update all nodes to 2.6 or later.",
                            res["errmsg"].str().find("no such cmd") == std::string::npos);
                    log() << "replSet error while handshaking the upstream updater: "
                        << res["errmsg"].valuestrsafe();

                    _resetConnection();
                    return false;
                }
            }
            catch (const DBException& e) {
                log() << "SyncSourceFeedback error sending handshake: " << e.what() << endl;
                _resetConnection();
                return false;
            }
        }
        return true;
    }
예제 #2
0
    Status SyncSourceFeedback::updateUpstream(OperationContext* txn) {
        ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
        if (replCoord->getCurrentMemberState().primary()) {
            // primary has no one to update to
            return Status::OK();
        }
        BSONObjBuilder cmd;
        {
            boost::unique_lock<boost::mutex> lock(_mtx);
            if (_handshakeNeeded) {
                // Don't send updates if there are nodes that haven't yet been handshaked
                return Status(ErrorCodes::NodeNotFound,
                              "Need to send handshake before updating position upstream");
            }
            replCoord->prepareReplSetUpdatePositionCommand(txn, &cmd);
        }
        BSONObj res;

        LOG(2) << "Sending slave oplog progress to upstream updater: " << cmd.done();
        try {
            _connection->runCommand("admin", cmd.obj(), res);
        }
        catch (const DBException& e) {
            log() << "SyncSourceFeedback error sending update: " << e.what() << endl;
            _resetConnection();
            return e.toStatus();
        }

        Status status = Command::getStatusFromCommandResult(res);
        if (!status.isOK()) {
            log() << "SyncSourceFeedback error sending update, response: " << res.toString() <<endl;
            _resetConnection();
        }
        return status;
    }
예제 #3
0
    bool SyncSourceFeedback::updateUpstream(OperationContext* txn) {
        ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
        if (replCoord->getCurrentMemberState().primary()) {
            // primary has no one to update to
            return true;
        }
        BSONObjBuilder cmd;
        {
            boost::unique_lock<boost::mutex> lock(_mtx);
            if (_handshakeNeeded) {
                // Don't send updates if there are nodes that haven't yet been handshaked
                return false;
            }
            replCoord->prepareReplSetUpdatePositionCommand(txn, &cmd);
        }
        BSONObj res;

        LOG(2) << "Sending slave oplog progress to upstream updater: " << cmd.done();
        bool ok;
        try {
            ok = _connection->runCommand("admin", cmd.obj(), res);
        }
        catch (const DBException& e) {
            log() << "SyncSourceFeedback error sending update: " << e.what() << endl;
            _resetConnection();
            return false;
        }
        if (!ok) {
            log() << "SyncSourceFeedback error sending update, response: " << res.toString() <<endl;
            _resetConnection();
            return false;
        }
        return true;
    }
예제 #4
0
void SyncSourceFeedback::run() {
    Client::initThread("SyncSourceFeedback");

    ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
    while (true) {  // breaks once _shutdownSignaled is true
        {
            stdx::unique_lock<stdx::mutex> lock(_mtx);
            while (!_positionChanged && !_shutdownSignaled) {
                if (_cond.wait_for(lock, _keepAliveInterval) == stdx::cv_status::timeout) {
                    break;
                }
            }

            if (_shutdownSignaled) {
                break;
            }

            _positionChanged = false;
        }

        auto txn = cc().makeOperationContext();
        MemberState state = replCoord->getMemberState();
        if (state.primary() || state.startup()) {
            _resetConnection();
            continue;
        }
        const HostAndPort target = BackgroundSync::get()->getSyncTarget();
        if (_syncTarget != target) {
            _resetConnection();
            _syncTarget = target;
        }
        if (!hasConnection()) {
            // fix connection if need be
            if (target.empty()) {
                sleepmillis(500);
                stdx::unique_lock<stdx::mutex> lock(_mtx);
                _positionChanged = true;
                continue;
            }
            if (!_connect(txn.get(), target)) {
                sleepmillis(500);
                stdx::unique_lock<stdx::mutex> lock(_mtx);
                _positionChanged = true;
                continue;
            }
        }
        Status status = updateUpstream(txn.get());
        if (!status.isOK()) {
            sleepmillis(500);
            stdx::unique_lock<stdx::mutex> lock(_mtx);
            _positionChanged = true;
        }
    }
}
예제 #5
0
    bool SyncSourceFeedback::replHandshake(OperationContext* txn) {
        ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
        if (replCoord->getCurrentMemberState().primary()) {
            // primary has no one to handshake to
            return true;
        }
        // construct a vector of handshake obj for us as well as all chained members
        std::vector<BSONObj> handshakeObjs;
        replCoord->prepareReplSetUpdatePositionCommandHandshakes(txn, &handshakeObjs);
        LOG(1) << "handshaking upstream updater";
        for (std::vector<BSONObj>::iterator it = handshakeObjs.begin();
                it != handshakeObjs.end();
                ++it) {
            BSONObj res;
            try {
                LOG(2) << "Sending to " << _connection.get()->toString() << " the replication "
                        "handshake: " << *it;
                if (!_connection->runCommand("admin", *it, res)) {
                    std::string errMsg = res["errmsg"].valuestrsafe();
                    massert(17447, "upstream updater is not supported by the member from which we"
                            " are syncing, please update all nodes to 2.6 or later.",
                            errMsg.find("no such cmd") == std::string::npos);

                    log() << "replSet error while handshaking the upstream updater: "
                        << errMsg;

                    // sleep half a second if we are not in our sync source's config
                    // TODO(dannenberg) after 2.8, remove the string comparison 
                    if (res["code"].numberInt() == ErrorCodes::NodeNotFound ||
                            errMsg.find("could not be found in replica set config while attempting "
                                        "to associate it with") != std::string::npos) {

                        // black list sync target for 10 seconds and find a new one
                        replCoord->blacklistSyncSource(_syncTarget,
                                                       Date_t(curTimeMillis64() + 10*1000));
                        BackgroundSync::get()->clearSyncTarget();
                    }

                    _resetConnection();
                    return false;
                }
            }
            catch (const DBException& e) {
                log() << "SyncSourceFeedback error sending handshake: " << e.what() << endl;
                _resetConnection();
                return false;
            }
        }
        return true;
    }
예제 #6
0
    void SyncSourceFeedback::run() {
        Client::initThread("SyncSourceFeedbackThread");
        OperationContextImpl txn;

        bool positionChanged = false;
        bool handshakeNeeded = false;
        ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
        while (!inShutdown()) { // TODO(spencer): Remove once legacy repl coordinator is gone.
            {
                boost::unique_lock<boost::mutex> lock(_mtx);
                while (!_positionChanged && !_handshakeNeeded && !_shutdownSignaled) {
                    _cond.wait(lock);
                }

                if (_shutdownSignaled) {
                    break;
                }

                positionChanged = _positionChanged;
                handshakeNeeded = _handshakeNeeded;
                _positionChanged = false;
                _handshakeNeeded = false;
            }

            MemberState state = replCoord->getCurrentMemberState();
            if (state.primary() || state.fatal() || state.startup()) {
                continue;
            }
            const Member* target = BackgroundSync::get()->getSyncTarget();
            if (_syncTarget != target) {
                _resetConnection();
                _syncTarget = target;
            }
            if (!hasConnection()) {
                // fix connection if need be
                if (!target) {
                    sleepmillis(500);
                    continue;
                }
                if (!_connect(&txn, target->fullName())) {
                    sleepmillis(500);
                    continue;
                }
            }
            if (handshakeNeeded) {
                if (!replHandshake(&txn)) {
                    boost::unique_lock<boost::mutex> lock(_mtx);
                    _handshakeNeeded = true;
                    continue;
                }
            }
            if (positionChanged) {
                if (!updateUpstream(&txn)) {
                    boost::unique_lock<boost::mutex> lock(_mtx);
                    _positionChanged = true;
                }
            }
        }
        cc().shutdown();
    }
예제 #7
0
    Status SyncSourceFeedback::updateUpstream(OperationContext* txn) {
        ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
        if (replCoord->getMemberState().primary()) {
            // primary has no one to update to
            return Status::OK();
        }
        BSONObjBuilder cmd;
        {
            boost::unique_lock<boost::mutex> lock(_mtx);
            if (_handshakeNeeded) {
                // Don't send updates if there are nodes that haven't yet been handshaked
                return Status(ErrorCodes::NodeNotFound,
                              "Need to send handshake before updating position upstream");
            }
            // the command could not be created, likely because the node was removed from the set
            if (!replCoord->prepareReplSetUpdatePositionCommand(&cmd)) {
                return Status::OK();
            }
        }
        BSONObj res;

        LOG(2) << "Sending slave oplog progress to upstream updater: " << cmd.done();
        try {
            _connection->runCommand("admin", cmd.obj(), res);
        }
        catch (const DBException& e) {
            log() << "SyncSourceFeedback error sending update: " << e.what() << endl;
            // blacklist sync target for .5 seconds and find a new one
            replCoord->blacklistSyncSource(_syncTarget,
                                           Date_t(curTimeMillis64() + 500));
            BackgroundSync::get()->clearSyncTarget();
            _resetConnection();
            return e.toStatus();
        }

        Status status = Command::getStatusFromCommandResult(res);
        if (!status.isOK()) {
            log() << "SyncSourceFeedback error sending update, response: " << res.toString() <<endl;
            // blacklist sync target for .5 seconds and find a new one
            replCoord->blacklistSyncSource(_syncTarget,
                                           Date_t(curTimeMillis64() + 500));
            BackgroundSync::get()->clearSyncTarget();
            _resetConnection();
        }
        return status;
    }
예제 #8
0
    Status SyncSourceFeedback::updateUpstream(OperationContext* txn) {
        ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
        if (replCoord->getMemberState().primary()) {
            // primary has no one to update to
            return Status::OK();
        }
        BSONObjBuilder cmd;
        {
            stdx::unique_lock<stdx::mutex> lock(_mtx);
            // the command could not be created, likely because the node was removed from the set
            if (!replCoord->prepareReplSetUpdatePositionCommand(&cmd)) {
                return Status::OK();
            }
        }
        BSONObj res;

        LOG(2) << "Sending slave oplog progress to upstream updater: " << cmd.done();
        try {
            _connection->runCommand("admin", cmd.obj(), res);
        }
        catch (const DBException& e) {
            log() << "SyncSourceFeedback error sending update: " << e.what() << endl;
            // blacklist sync target for .5 seconds and find a new one
            replCoord->blacklistSyncSource(_syncTarget, Date_t::now() + Milliseconds(500));
            BackgroundSync::get()->clearSyncTarget();
            _resetConnection();
            return e.toStatus();
        }

        Status status = Command::getStatusFromCommandResult(res);
        if (!status.isOK()) {
            log() << "SyncSourceFeedback error sending update, response: " << res.toString() <<endl;
            // blacklist sync target for .5 seconds and find a new one, unless we were rejected due
            // to the syncsource having a newer config
            if (status != ErrorCodes::InvalidReplicaSetConfig || res["cfgver"].eoo() ||
                    res["cfgver"].numberLong() < replCoord->getConfig().getConfigVersion()) {
                replCoord->blacklistSyncSource(_syncTarget, Date_t::now() + Milliseconds(500));
                BackgroundSync::get()->clearSyncTarget();
                _resetConnection();
            }
        }

        return status;
    }
예제 #9
0
    bool SyncSourceFeedback::_connect(OperationContext* txn, const HostAndPort& host) {
        if (hasConnection()) {
            return true;
        }
        log() << "replset setting syncSourceFeedback to " << host.toString() << rsLog;
        _connection.reset(new DBClientConnection(false, 0, OplogReader::tcp_timeout));
        string errmsg;
        try {
            if (!_connection->connect(host, errmsg) ||
                (getGlobalAuthorizationManager()->isAuthEnabled() && !replAuthenticate())) {
                _resetConnection();
                log() << "repl: " << errmsg << endl;
                return false;
            }
        }
        catch (const DBException& e) {
            log() << "Error connecting to " << host.toString() << ": " << e.what();
            _resetConnection();
            return false;
        }

        return hasConnection();
    }