ReplSetConfig::ReplSetConfig(const HostAndPort& h) { clear(); int level = 2; DEV level = 0; BSONObj cfg; int v = -5; try { if( h.isSelf() ) { ; } else { /* first, make sure other node is configured to be a replset. just to be safe. */ string setname = cmdLine.ourSetName(); BSONObj cmd = BSON( "replSetHeartbeat" << setname ); int theirVersion; BSONObj info; log() << "trying to contact " << h.toString() << rsLog; bool ok = requestHeartbeat(setname, "", h.toString(), info, -2, theirVersion); if( info["rs"].trueValue() ) { // yes, it is a replicate set, although perhaps not yet initialized } else { if( !ok ) { log() << "replSet TEMP !ok heartbeating " << h.toString() << " on cfg load" << rsLog; if( !info.isEmpty() ) log() << "replSet info " << h.toString() << " : " << info.toString() << rsLog; return; } { stringstream ss; ss << "replSet error: member " << h.toString() << " is not in --replSet mode"; msgassertedNoTrace(13260, ss.str().c_str()); // not caught as not a user exception - we want it not caught //for python err# checker: uassert(13260, "", false); } } } v = -4; unsigned long long count = 0; try { ScopedConn conn(h.toString()); v = -3; cfg = conn.findOne(rsConfigNs, Query()).getOwned(); count = conn.count(rsConfigNs); } catch ( DBException& ) { if ( !h.isSelf() ) { throw; } // on startup, socket is not listening yet DBDirectClient cli; cfg = cli.findOne( rsConfigNs, Query() ).getOwned(); count = cli.count(rsConfigNs); } if( count > 1 ) uasserted(13109, str::stream() << "multiple rows in " << rsConfigNs << " not supported host: " << h.toString()); if( cfg.isEmpty() ) { version = EMPTYCONFIG; return; } version = -1; } catch( DBException& e) { version = v; log(level) << "replSet load config couldn't get from " << h.toString() << ' ' << e.what() << rsLog; return; } from(cfg); checkRsConfig(); _ok = true; log(level) << "replSet load config ok from " << (h.isSelf() ? "self" : h.toString()) << rsLog; }
/* called on a reconfig AND on initiate throws @param initial true when initiating */ void checkMembersUpForConfigChange(const ReplSetConfig& cfg, bool initial) { int failures = 0, majority = 0; int me = 0; stringstream selfs; for( vector<ReplSetConfig::MemberCfg>::const_iterator i = cfg.members.begin(); i != cfg.members.end(); i++ ) { if( i->h.isSelf() ) { me++; if( me > 1 ) selfs << ','; selfs << i->h.toString(); if( !i->potentiallyHot() ) { uasserted(13420, "initiation and reconfiguration of a replica set must be sent to a node that can become primary"); } majority += i->votes; } } majority = (majority / 2) + 1; uassert(13278, "bad config: isSelf is true for multiple hosts: " + selfs.str(), me <= 1); // dups? if( me != 1 ) { stringstream ss; ss << "can't find self in the replset config"; if( !cmdLine.isDefaultPort() ) ss << " my port: " << cmdLine.port; if( me != 0 ) ss << " found: " << me; uasserted(13279, ss.str()); } for( vector<ReplSetConfig::MemberCfg>::const_iterator i = cfg.members.begin(); i != cfg.members.end(); i++ ) { // we know we're up if (i->h.isSelf()) { continue; } BSONObj res; { bool ok = false; try { int theirVersion = -1000; ok = requestHeartbeat(cfg._id, "", i->h.toString(), res, -1, theirVersion, initial/*check if empty*/); if( theirVersion >= cfg.version ) { stringstream ss; ss << "replSet member " << i->h.toString() << " has too new a config version (" << theirVersion << ") to reconfigure"; uasserted(13259, ss.str()); } } catch(DBException& e) { log() << "replSet cmufcc requestHeartbeat " << i->h.toString() << " : " << e.toString() << rsLog; } catch(...) { log() << "replSet cmufcc error exception in requestHeartbeat?" << rsLog; } if( res.getBoolField("mismatch") ) uasserted(13145, "set name does not match the set name host " + i->h.toString() + " expects"); if( *res.getStringField("set") ) { if( cfg.version <= 1 ) { // this was to be initiation, no one shoudl be initiated already. uasserted(13256, "member " + i->h.toString() + " is already initiated"); } else { // Assure no one has a newer config. if( res["v"].Int() >= cfg.version ) { uasserted(13341, "member " + i->h.toString() + " has a config version >= to the new cfg version; cannot change config"); } } } if( !ok && !res["rs"].trueValue() ) { if( !res.isEmpty() ) { /* strange. got a response, but not "ok". log it. */ log() << "replSet warning " << i->h.toString() << " replied: " << res.toString() << rsLog; } bool allowFailure = false; failures += i->votes; if( res.isEmpty() && !initial && failures >= majority ) { const Member* m = theReplSet->findById( i->_id ); if( m ) { assert( m->h().toString() == i->h.toString() ); } // it's okay if the down member isn't part of the config, // we might be adding a new member that isn't up yet allowFailure = true; } if( !allowFailure ) { string msg = string("need all members up to initiate, not ok : ") + i->h.toString(); if( !initial ) msg = string("need most members up to reconfigure, not ok : ") + i->h.toString(); uasserted(13144, msg); } } } if( initial ) { bool hasData = res["hasData"].Bool(); uassert(13311, "member " + i->h.toString() + " has data already, cannot initiate set. All members except initiator must be empty.", !hasData || i->h.isSelf()); } } }
/* called on a reconfig AND on initiate throws @param initial true when initiating */ void checkMembersUpForConfigChange(const ReplSetConfig& cfg, bool initial) { int failures = 0; int me = 0; for( vector<ReplSetConfig::MemberCfg>::const_iterator i = cfg.members.begin(); i != cfg.members.end(); i++ ) { if( i->h.isSelf() ) { me++; if( !i->potentiallyHot() ) { uasserted(13420, "initiation and reconfiguration of a replica set must be sent to a node that can become primary"); } } } uassert(13278, "bad config - dups?", me <= 1); // dups? uassert(13279, "can't find self in the replset config", me == 1); for( vector<ReplSetConfig::MemberCfg>::const_iterator i = cfg.members.begin(); i != cfg.members.end(); i++ ) { BSONObj res; { bool ok = false; try { int theirVersion = -1000; ok = requestHeartbeat(cfg._id, "", i->h.toString(), res, -1, theirVersion, initial/*check if empty*/); if( theirVersion >= cfg.version ) { stringstream ss; ss << "replSet member " << i->h.toString() << " has too new a config version (" << theirVersion << ") to reconfigure"; uasserted(13259, ss.str()); } } catch(DBException& e) { log() << "replSet cmufcc requestHeartbeat " << i->h.toString() << " : " << e.toString() << rsLog; } catch(...) { log() << "replSet cmufcc error exception in requestHeartbeat?" << rsLog; } if( res.getBoolField("mismatch") ) uasserted(13145, "set name does not match the set name host " + i->h.toString() + " expects"); if( *res.getStringField("set") ) { if( cfg.version <= 1 ) { // this was to be initiation, no one shoudl be initiated already. uasserted(13256, "member " + i->h.toString() + " is already initiated"); } else { // Assure no one has a newer config. if( res["v"].Int() >= cfg.version ) { uasserted(13341, "member " + i->h.toString() + " has a config version >= to the new cfg version; cannot change config"); } } } if( !ok && !res["rs"].trueValue() ) { if( !res.isEmpty() ) { /* strange. got a response, but not "ok". log it. */ log() << "replSet warning " << i->h.toString() << " replied: " << res.toString() << rsLog; } bool allowFailure = false; failures++; if( res.isEmpty() && !initial && failures == 1 ) { /* for now we are only allowing 1 node to be down on a reconfig. this can be made to be a minority trying to keep change small as release is near. */ const Member* m = theReplSet->findById( i->_id ); if( m ) { // ok, so this was an existing member (wouldn't make sense to add to config a new member that is down) assert( m->h().toString() == i->h.toString() ); allowFailure = true; } } if( !allowFailure ) { string msg = string("need members up to initiate, not ok : ") + i->h.toString(); if( !initial ) msg = string("need most members up to reconfigure, not ok : ") + i->h.toString(); uasserted(13144, msg); } } } if( initial ) { bool hasData = res["hasData"].Bool(); uassert(13311, "member " + i->h.toString() + " has data already, cannot initiate set. All members except initiator must be empty.", !hasData || i->h.isSelf()); } } }