bool TopologyCoordinatorImpl::_shouldRelinquish() const { int vUp = _currentConfig.self->votes; for ( Member *m = _otherMembers.head(); m; m = m->next() ) { if (m->hbinfo().up()) { vUp += m->config().votes; } } return !( vUp * 2 > _totalVotes() ); }
bool Consensus::shouldRelinquish() const { int vUp = rs._self->config().votes; for( Member *m = rs.head(); m; m=m->next() ) { if (m->hbinfo().up()) { vUp += m->config().votes; } } // the manager will handle calling stepdown if another node should be // primary due to priority return !( vUp * 2 > _totalVotes() ); }
bool TopologyCoordinatorImpl::_aMajoritySeemsToBeUp() const { int vUp = _currentConfig.self->votes; for ( Member *m = _otherMembers.head(); m; m=m->next() ) vUp += m->hbinfo().up() ? m->config().votes : 0; return vUp * 2 > _totalVotes(); }
void Consensus::_electSelf() { if( time(0) < steppedDown ) return; { const OpTime ord = theReplSet->lastOpTimeWritten; if( ord == 0 ) { log() << "replSet info not trying to elect self, do not yet have a complete set of data from any point in time" << rsLog; return; } } bool allUp; int nTies; if( !_weAreFreshest(allUp, nTies) ) { return; } rs.sethbmsg("",9); if (!allUp && time(0) - serverGlobalParams.started < 60 * 5) { /* the idea here is that if a bunch of nodes bounce all at once, we don't want to drop data if we don't have to -- we'd rather be offline and wait a little longer instead todo: make this configurable. */ rs.sethbmsg("not electing self, not all members up and we have been up less than 5 minutes"); return; } Member& me = *rs._self; if( nTies ) { /* tie? we then randomly sleep to try to not collide on our voting. */ /* todo: smarter. */ if( me.id() == 0 || _sleptLast ) { // would be fine for one node not to sleep // todo: biggest / highest priority nodes should be the ones that get to not sleep } else { verify( !rs.lockedByMe() ); // bad to go to sleep locked unsigned ms = ((unsigned) rand()) % 1000 + 50; DEV log() << "replSet tie " << nTies << " sleeping a little " << ms << "ms" << rsLog; _sleptLast = true; sleepmillis(ms); throw RetryAfterSleepException(); } } _sleptLast = false; time_t start = time(0); unsigned meid = me.id(); int tally = _yea( meid ); bool success = false; try { log() << "replSet info electSelf " << meid << rsLog; BSONObj electCmd = BSON( "replSetElect" << 1 << "set" << rs.name() << "who" << me.fullName() << "whoid" << me.hbinfo().id() << "cfgver" << rs._cfg->version << "round" << OID::gen() /* this is just for diagnostics */ ); int configVersion; list<Target> L; rs.getTargets(L, configVersion); _multiCommand(electCmd, L); { for( list<Target>::iterator i = L.begin(); i != L.end(); i++ ) { LOG(1) << "replSet elect res: " << i->result.toString() << rsLog; if( i->ok ) { int v = i->result["vote"].Int(); tally += v; } } if( tally*2 <= _totalVotes() ) { log() << "replSet couldn't elect self, only received " << tally << " votes" << rsLog; } else if( time(0) - start > 30 ) { // defensive; should never happen as we have timeouts on connection and operation for our conn log() << "replSet too much time passed during our election, ignoring result" << rsLog; } else if( configVersion != rs.config().version ) { log() << "replSet config version changed during our election, ignoring result" << rsLog; } else { /* succeeded. */ LOG(1) << "replSet election succeeded, assuming primary role" << rsLog; success = true; setElectionTime(getNextGlobalOptime()); rs.assumePrimary(); } } } catch( std::exception& ) { if( !success ) _electionFailed(meid); throw; } if( !success ) _electionFailed(meid); }
bool Consensus::aMajoritySeemsToBeUp() const { int vUp = rs._self->config().votes; for( Member *m = rs.head(); m; m=m->next() ) vUp += m->hbinfo().up() ? m->config().votes : 0; return vUp * 2 > _totalVotes(); }