// Produce a reply to a RAFT-style RequestVote RPC; this is MongoDB ReplSetFresh command // The caller should validate that the message is for the correct set, and has the required data void TopologyCoordinatorImpl::prepareRequestVoteResponse(const Date_t now, const BSONObj& cmdObj, const OpTime& lastOpApplied, std::string& errmsg, BSONObjBuilder& result) { string who = cmdObj["who"].String(); int cfgver = cmdObj["cfgver"].Int(); OpTime opTime(cmdObj["opTime"].Date()); bool weAreFresher = false; if( _currentConfig.getConfigVersion() > cfgver ) { log() << "replSet member " << who << " is not yet aware its cfg version " << cfgver << " is stale"; result.append("info", "config version stale"); weAreFresher = true; } // check not only our own optime, but any other member we can reach else if( opTime < _commitOkayThrough || opTime < _latestKnownOpTime()) { weAreFresher = true; } result.appendDate("opTime", lastOpApplied.asDate()); result.append("fresher", weAreFresher); bool doVeto = _shouldVeto(cmdObj, errmsg); result.append("veto",doVeto); if (doVeto) { result.append("errmsg", errmsg); } }
// update internal state with heartbeat response, and run topology checks void TopologyCoordinatorImpl::updateHeartbeatInfo(Date_t now, const HeartbeatInfo& newInfo) { // Fill in the new heartbeat data for the appropriate member for (Member *m = _otherMembers.head(); m; m=m->next()) { if (m->id() == newInfo.id()) { m->get_hbinfo().updateFromLastPoll(newInfo); break; } } // Don't bother to make any changes if we are an election candidate if (_busyWithElectSelf) return; // ex-checkelectableset begins here unsigned int latestOp = _latestKnownOpTime().getSecs(); // make sure the electable set is up-to-date if (_aMajoritySeemsToBeUp() && !_currentConfig.self->arbiterOnly // not an arbiter && (_currentConfig.self->priority > 0) // not priority 0 && (_stepDownUntil <= now) // stepDown timer has expired && (_memberState == MemberState::RS_SECONDARY) // we are within 10 seconds of primary && (latestOp == 0 || _lastApplied.getSecs() >= latestOp - 10)) { _electableSet.insert(_currentConfig.self->_id); } else { _electableSet.erase(_currentConfig.self->_id); } // check if we should ask the primary (possibly ourselves) to step down const Member* highestPriority = _getHighestPriorityElectable(); const Member* primary = _currentPrimary; if (primary && highestPriority && highestPriority->config().priority > primary->config().priority && // if we're stepping down to allow another member to become primary, we // better have another member (latestOp), and it should be up-to-date latestOp != 0 && highestPriority->hbinfo().opTime.getSecs() >= latestOp - 10) { log() << "stepping down " << primary->fullName() << " (priority " << primary->config().priority << "), " << highestPriority->fullName() << " is priority " << highestPriority->config().priority << " and " << (latestOp - highestPriority->hbinfo().opTime.getSecs()) << " seconds behind"; // Are we primary? if (primary->h().isSelf()) { // replSetStepDown tries to acquire the same lock // msgCheckNewState takes, so we can't call replSetStepDown on // ourselves. // XXX Eric: schedule relinquish //rs->relinquish(); } else { // We are not primary. Step down the remote node. BSONObj cmd = BSON( "replSetStepDown" << 1 ); /* ScopedConn conn(primary->fullName()); BSONObj result; // XXX Eric: schedule stepdown command try { if (!conn.runCommand("admin", cmd, result, 0)) { log() << "stepping down " << primary->fullName() << " failed: " << result << endl; } } catch (DBException &e) { log() << "stepping down " << primary->fullName() << " threw exception: " << e.toString() << endl; } */ } } // ex-checkauth begins here { int down = 0, authIssue = 0, total = 0; for( Member *m = _otherMembers.head(); m; m=m->next() ) { total++; // all authIssue servers will also be not up if (!m->hbinfo().up()) { down++; if (m->hbinfo().authIssue) { authIssue++; } } } // if all nodes are down or failed auth AND at least one failed // auth, go into recovering. If all nodes are down, stay a // secondary. if (authIssue > 0 && down == total) { log() << "replset error could not reach/authenticate against any members"; if (_currentPrimary == _self) { log() << "auth problems, relinquishing primary" << rsLog; // XXX Eric: schedule relinquish //rs->relinquish(); } _blockSync = true; // syncing is how we get into SECONDARY state, so we'll be stuck in // RECOVERING until we unblock _changeMemberState(MemberState::RS_RECOVERING); } else { _blockSync = false; } } // If a remote is primary, check that it is still up. if (_currentPrimary && _currentPrimary->id() != _self->id()) { if (!_currentPrimary->hbinfo().up() || !_currentPrimary->hbinfo().hbstate.primary()) { _currentPrimary = NULL; } } // Scan the member list's heartbeat data for who is primary, and update ourselves if it's // not what _currentPrimary is. { const Member* remotePrimary(NULL); Member* m = _otherMembers.head(); while (m) { DEV verify( m != _self ); if( m->state().primary() && m->hbinfo().up() ) { if( remotePrimary ) { /* two other nodes think they are primary (asynchronously polled) -- wait for things to settle down. */ log() << "replSet info two primaries (transiently)" << rsLog; return; } remotePrimary = m; } m = m->next(); } if (remotePrimary) { // If it's the same as last time, don't do anything further. if (_currentPrimary == remotePrimary) { return; } // Clear last heartbeat message on ourselves (why?) _self->lhb() = ""; // insanity: this is what actually puts arbiters into ARBITER state if (_currentConfig.self->arbiterOnly) { _changeMemberState(MemberState::RS_ARBITER); return; } // If we are also primary, this is a problem. Determine who should step down. if (_memberState == MemberState::RS_PRIMARY) { OpTime remoteElectionTime = remotePrimary->hbinfo().electionTime; log() << "replset: another primary seen with election time " << remoteElectionTime; // Step down whoever has the older election time. if (remoteElectionTime > _electionTime) { log() << "stepping down; another primary was elected more recently"; // XXX Eric: schedule a relinquish //rs->relinquish(); // after completion, set currentprimary to remotePrimary. } else { // else, stick around log() << "another PRIMARY detected but it should step down" " since it was elected earlier than me"; return; } } _currentPrimary = remotePrimary; return; } /* didn't find anyone who is currently primary */ } // If we are primary, check if we can still see majority of the set; // stepdown if we can't. if (_currentPrimary) { /* we must be primary */ fassert(18505, _currentPrimary == _self); if (_shouldRelinquish()) { log() << "can't see a majority of the set, relinquishing primary" << rsLog; // XXX Eric: schedule a relinquish //rs->relinquish(); } return; } // At this point, there is no primary anywhere. Check to see if we should become an // election candidate. // If we can't elect ourselves due to config, can't become a candidate. if (!_currentConfig.self->arbiterOnly // not an arbiter && (_currentConfig.self->priority > 0) // not priority 0 && (_stepDownUntil <= now) // stepDown timer has expired && (_memberState == MemberState::RS_SECONDARY)) { OCCASIONALLY log() << "replSet I don't see a primary and I can't elect myself"; return; } // If we can't see a majority, can't become a candidate. if (!_aMajoritySeemsToBeUp()) { static Date_t last; static int n = 0; int ll = 0; if( ++n > 5 ) ll++; if( last + 60 > now ) ll++; LOG(ll) << "replSet can't see a majority, will not try to elect self" << rsLog; last = now; return; } // If we can't elect ourselves due to the current electable set; // we are in the set if we are within 10 seconds of the latest known op (via heartbeats) if (!(_electableSet.find(_self->id()) != _electableSet.end())) { // we are too far behind to become primary return; } // All checks passed, become a candidate and start election proceedings. // don't try to do further elections & such while we are already working on one. _busyWithElectSelf = true; // XXX: schedule an election /* try { rs->elect.electSelf(); } catch(RetryAfterSleepException&) { // we want to process new inbounds before trying this again. so we just put a checkNewstate in the queue for eval later. requeue(); } catch(...) { log() << "replSet error unexpected assertion in rs manager" << rsLog; } } */ _busyWithElectSelf = false; }