コード例 #1
0
    // Produce a reply to a RAFT-style RequestVote RPC; this is MongoDB ReplSetFresh command
    // The caller should validate that the message is for the correct set, and has the required data
    void TopologyCoordinatorImpl::prepareRequestVoteResponse(const Date_t now,
                                                             const BSONObj& cmdObj,
                                                             const OpTime& lastOpApplied,
                                                             std::string& errmsg,
                                                             BSONObjBuilder& result) {

        string who = cmdObj["who"].String();
        int cfgver = cmdObj["cfgver"].Int();
        OpTime opTime(cmdObj["opTime"].Date());

        bool weAreFresher = false;
        if( _currentConfig.getConfigVersion() > cfgver ) {
            log() << "replSet member " << who << " is not yet aware its cfg version "
                  << cfgver << " is stale";
            result.append("info", "config version stale");
            weAreFresher = true;
        }
        // check not only our own optime, but any other member we can reach
        else if( opTime < _commitOkayThrough ||
                 opTime < _latestKnownOpTime())  {
            weAreFresher = true;
        }
        result.appendDate("opTime", lastOpApplied.asDate());
        result.append("fresher", weAreFresher);

        bool doVeto = _shouldVeto(cmdObj, errmsg);
        result.append("veto",doVeto);
        if (doVeto) {
            result.append("errmsg", errmsg);
        }
    }
コード例 #2
0
    // update internal state with heartbeat response, and run topology checks
    void TopologyCoordinatorImpl::updateHeartbeatInfo(Date_t now, const HeartbeatInfo& newInfo) {

        // Fill in the new heartbeat data for the appropriate member
        for (Member *m = _otherMembers.head(); m; m=m->next()) {
            if (m->id() == newInfo.id()) {
                m->get_hbinfo().updateFromLastPoll(newInfo);
                break;
            }
        }

        // Don't bother to make any changes if we are an election candidate
        if (_busyWithElectSelf) return;

        // ex-checkelectableset begins here
        unsigned int latestOp = _latestKnownOpTime().getSecs();
        
        // make sure the electable set is up-to-date
        if (_aMajoritySeemsToBeUp()
            && !_currentConfig.self->arbiterOnly    // not an arbiter
            && (_currentConfig.self->priority > 0)  // not priority 0
            && (_stepDownUntil <= now)              // stepDown timer has expired
            && (_memberState == MemberState::RS_SECONDARY)
            // we are within 10 seconds of primary
            && (latestOp == 0 || _lastApplied.getSecs() >= latestOp - 10)) {
            _electableSet.insert(_currentConfig.self->_id);
        }
        else {
            _electableSet.erase(_currentConfig.self->_id);
        }

        // check if we should ask the primary (possibly ourselves) to step down
        const Member* highestPriority = _getHighestPriorityElectable();
        const Member* primary = _currentPrimary;
        
        if (primary && highestPriority &&
            highestPriority->config().priority > primary->config().priority &&
            // if we're stepping down to allow another member to become primary, we
            // better have another member (latestOp), and it should be up-to-date
            latestOp != 0 && highestPriority->hbinfo().opTime.getSecs() >= latestOp - 10) {
            log() << "stepping down " << primary->fullName() << " (priority " <<
                primary->config().priority << "), " << highestPriority->fullName() <<
                " is priority " << highestPriority->config().priority << " and " <<
                (latestOp - highestPriority->hbinfo().opTime.getSecs()) << " seconds behind";

            // Are we primary?
            if (primary->h().isSelf()) {
                // replSetStepDown tries to acquire the same lock
                // msgCheckNewState takes, so we can't call replSetStepDown on
                // ourselves.
                // XXX Eric: schedule relinquish
                //rs->relinquish();
            }
            else {
                // We are not primary.  Step down the remote node.
                BSONObj cmd = BSON( "replSetStepDown" << 1 );
/*                ScopedConn conn(primary->fullName());
                BSONObj result;
                // XXX Eric: schedule stepdown command

                try {
                    if (!conn.runCommand("admin", cmd, result, 0)) {
                        log() << "stepping down " << primary->fullName()
                              << " failed: " << result << endl;
                    }
                }
                catch (DBException &e) {
                    log() << "stepping down " << primary->fullName() << " threw exception: "
                          << e.toString() << endl;
                }

*/
            }
        }


        // ex-checkauth begins here
        {
            int down = 0, authIssue = 0, total = 0;

            for( Member *m = _otherMembers.head(); m; m=m->next() ) {
                total++;

                // all authIssue servers will also be not up
                if (!m->hbinfo().up()) {
                    down++;
                    if (m->hbinfo().authIssue) {
                        authIssue++;
                    }
                }
            }

            // if all nodes are down or failed auth AND at least one failed
            // auth, go into recovering.  If all nodes are down, stay a
            // secondary.
            if (authIssue > 0 && down == total) {
                log() << "replset error could not reach/authenticate against any members";

                if (_currentPrimary == _self) {
                    log() << "auth problems, relinquishing primary" << rsLog;
                    // XXX Eric: schedule relinquish
                    //rs->relinquish();
                }

                _blockSync = true;
                // syncing is how we get into SECONDARY state, so we'll be stuck in
                // RECOVERING until we unblock
                _changeMemberState(MemberState::RS_RECOVERING);
            }
            else {
                _blockSync = false;
            }
        }

        // If a remote is primary, check that it is still up.
        if (_currentPrimary && _currentPrimary->id() != _self->id()) {
            if (!_currentPrimary->hbinfo().up() || 
                !_currentPrimary->hbinfo().hbstate.primary()) {
                _currentPrimary = NULL;
            }
        }

        // Scan the member list's heartbeat data for who is primary, and update ourselves if it's
        // not what _currentPrimary is.
        {
            const Member* remotePrimary(NULL);
            Member* m = _otherMembers.head();
            while (m) {
                DEV verify( m != _self );
                if( m->state().primary() && m->hbinfo().up() ) {
                    if( remotePrimary ) {
                        /* two other nodes think they are primary (asynchronously polled) -- wait for things to settle down. */
                        log() << "replSet info two primaries (transiently)" << rsLog;
                        return;
                    }
                    remotePrimary = m;
                }
                m = m->next();
            }

            if (remotePrimary) {
                // If it's the same as last time, don't do anything further.
                if (_currentPrimary == remotePrimary) {
                    return;
                }
                // Clear last heartbeat message on ourselves (why?)
                _self->lhb() = "";

                // insanity: this is what actually puts arbiters into ARBITER state
                if (_currentConfig.self->arbiterOnly) {
                    _changeMemberState(MemberState::RS_ARBITER);
                    return;
                }

                // If we are also primary, this is a problem.  Determine who should step down.
                if (_memberState == MemberState::RS_PRIMARY) {
                    OpTime remoteElectionTime = remotePrimary->hbinfo().electionTime;
                    log() << "replset: another primary seen with election time " 
                          << remoteElectionTime; 
                    // Step down whoever has the older election time.
                    if (remoteElectionTime > _electionTime) {
                        log() << "stepping down; another primary was elected more recently";
                        // XXX Eric: schedule a relinquish
                        //rs->relinquish();
                        // after completion, set currentprimary to remotePrimary.
                    }
                    else {
                        // else, stick around
                        log() << "another PRIMARY detected but it should step down"
                            " since it was elected earlier than me";
                        return;
                    }
                }

                _currentPrimary = remotePrimary;
                return;
            }
            /* didn't find anyone who is currently primary */
        }

        // If we are primary, check if we can still see majority of the set;
        // stepdown if we can't.
        if (_currentPrimary) {
            /* we must be primary */
            fassert(18505, _currentPrimary == _self);

            if (_shouldRelinquish()) {
                log() << "can't see a majority of the set, relinquishing primary" << rsLog;
                // XXX Eric: schedule a relinquish
                //rs->relinquish();
            }

            return;
        }

        // At this point, there is no primary anywhere.  Check to see if we should become an
        // election candidate.

        // If we can't elect ourselves due to config, can't become a candidate.
        if (!_currentConfig.self->arbiterOnly       // not an arbiter
            && (_currentConfig.self->priority > 0)  // not priority 0
            && (_stepDownUntil <= now)              // stepDown timer has expired
            && (_memberState == MemberState::RS_SECONDARY)) {
            OCCASIONALLY log() << "replSet I don't see a primary and I can't elect myself";
            return;
        }

        // If we can't see a majority, can't become a candidate.
        if (!_aMajoritySeemsToBeUp()) {
            static Date_t last;
            static int n = 0;
            int ll = 0;
            if( ++n > 5 ) ll++;
            if( last + 60 > now ) ll++;
            LOG(ll) << "replSet can't see a majority, will not try to elect self" << rsLog;
            last = now;
            return;
        }

        // If we can't elect ourselves due to the current electable set;
        // we are in the set if we are within 10 seconds of the latest known op (via heartbeats)
        if (!(_electableSet.find(_self->id()) != _electableSet.end())) {
            // we are too far behind to become primary
            return;
        }

        // All checks passed, become a candidate and start election proceedings.

        // don't try to do further elections & such while we are already working on one.
        _busyWithElectSelf = true; 

    // XXX: schedule an election
/*
        try {
            rs->elect.electSelf();
        }
        catch(RetryAfterSleepException&) {
            // we want to process new inbounds before trying this again.  so we just put a checkNewstate in the queue for eval later. 
            requeue();
        }
        catch(...) {
            log() << "replSet error unexpected assertion in rs manager" << rsLog;
        }
        
    }


*/
        _busyWithElectSelf = false;
    }