void appendReplicationInfo(OperationContext* txn, BSONObjBuilder& result, int level) { ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator(); if (replCoord->getSettings().usingReplSets()) { IsMasterResponse isMasterResponse; replCoord->fillIsMasterForReplSet(&isMasterResponse); result.appendElements(isMasterResponse.toBSON()); if (level) { replCoord->appendSlaveInfoData(&result); } return; } // TODO(dannenberg) replAllDead is bad and should be removed when master slave is removed if (replAllDead) { result.append("ismaster", 0); string s = string("dead: ") + replAllDead; result.append("info", s); } else { result.appendBool("ismaster", getGlobalReplicationCoordinator()->isMasterForReportingPurposes()); } if (level) { BSONObjBuilder sources(result.subarrayStart("sources")); int n = 0; list<BSONObj> src; { const char* localSources = "local.sources"; AutoGetCollectionForRead ctx(txn, localSources); unique_ptr<PlanExecutor> exec(InternalPlanner::collectionScan( txn, localSources, ctx.getCollection(), PlanExecutor::YIELD_MANUAL)); BSONObj obj; PlanExecutor::ExecState state; while (PlanExecutor::ADVANCED == (state = exec->getNext(&obj, NULL))) { src.push_back(obj.getOwned()); } } for (list<BSONObj>::const_iterator i = src.begin(); i != src.end(); i++) { BSONObj s = *i; BSONObjBuilder bb; bb.append(s["host"]); string sourcename = s["source"].valuestr(); if (sourcename != "main") bb.append(s["source"]); { BSONElement e = s["syncedTo"]; BSONObjBuilder t(bb.subobjStart("syncedTo")); t.appendDate("time", e.timestampTime()); t.append("inc", e.timestampInc()); t.done(); } if (level > 1) { wassert(!txn->lockState()->isLocked()); // note: there is no so-style timeout on this connection; perhaps we should have // one. ScopedDbConnection conn(s["host"].valuestr()); DBClientConnection* cliConn = dynamic_cast<DBClientConnection*>(&conn.conn()); if (cliConn && replAuthenticate(cliConn)) { BSONObj first = conn->findOne((string) "local.oplog.$" + sourcename, Query().sort(BSON("$natural" << 1))); BSONObj last = conn->findOne((string) "local.oplog.$" + sourcename, Query().sort(BSON("$natural" << -1))); bb.appendDate("masterFirst", first["ts"].timestampTime()); bb.appendDate("masterLast", last["ts"].timestampTime()); const auto lag = (last["ts"].timestampTime() - s["syncedTo"].timestampTime()); bb.append("lagSeconds", durationCount<Milliseconds>(lag) / 1000.0); } conn.done(); } sources.append(BSONObjBuilder::numStr(n++), bb.obj()); } sources.done(); replCoord->appendSlaveInfoData(&result); } }
void runSyncThread() { Client::initThread("rsSync"); AuthorizationSession::get(cc())->grantInternalAuthorization(); ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator(); // Set initial indexPrefetch setting const std::string& prefetch = replCoord->getSettings().rsIndexPrefetch; if (!prefetch.empty()) { BackgroundSync::IndexPrefetchConfig prefetchConfig = BackgroundSync::PREFETCH_ALL; if (prefetch == "none") prefetchConfig = BackgroundSync::PREFETCH_NONE; else if (prefetch == "_id_only") prefetchConfig = BackgroundSync::PREFETCH_ID_ONLY; else if (prefetch == "all") prefetchConfig = BackgroundSync::PREFETCH_ALL; else { warning() << "unrecognized indexPrefetch setting " << prefetch << ", defaulting " << "to \"all\""; } BackgroundSync::get()->setIndexPrefetchConfig(prefetchConfig); } while (!inShutdown()) { // After a reconfig, we may not be in the replica set anymore, so // check that we are in the set (and not an arbiter) before // trying to sync with other replicas. // TODO(spencer): Use a condition variable to await loading a config if (replCoord->getMemberState().startup()) { warning() << "did not receive a valid config yet, sleeping 5 seconds "; sleepsecs(5); continue; } const MemberState memberState = replCoord->getMemberState(); // An arbiter can never transition to any other state, and doesn't replicate, ever if (memberState.arbiter()) { break; } // If we are removed then we don't belong to the set anymore if (memberState.removed()) { sleepsecs(5); continue; } try { if (memberState.primary() && !replCoord->isWaitingForApplierToDrain()) { sleepsecs(1); continue; } bool initialSyncRequested = BackgroundSync::get()->getInitialSyncRequestedFlag(); // Check criteria for doing an initial sync: // 1. If the oplog is empty, do an initial sync // 2. If minValid has _initialSyncFlag set, do an initial sync // 3. If initialSyncRequested is true if (getGlobalReplicationCoordinator()->getMyLastOptime().isNull() || getInitialSyncFlag() || initialSyncRequested) { syncDoInitialSync(); continue; // start from top again in case sync failed. } if (!replCoord->setFollowerMode(MemberState::RS_RECOVERING)) { continue; } /* we have some data. continue tailing. */ SyncTail tail(BackgroundSync::get(), multiSyncApply); tail.oplogApplication(); } catch (const DBException& e) { log() << "Received exception while syncing: " << e.toString(); sleepsecs(10); } catch (const std::exception& e) { log() << "Received exception while syncing: " << e.what(); sleepsecs(10); } } }
virtual bool run(OperationContext* txn, const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { int secs = (int) cmdObj.firstElement().numberInt(); return appendCommandStatus( result, getGlobalReplicationCoordinator()->processReplSetFreeze(secs, &result)); }
void SyncSourceFeedback::run() { Client::initThread("SyncSourceFeedbackThread"); OperationContextImpl txn; bool positionChanged = false; bool handshakeNeeded = false; ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator(); while (!inShutdown()) { // TODO(spencer): Remove once legacy repl coordinator is gone. { boost::unique_lock<boost::mutex> lock(_mtx); while (!_positionChanged && !_handshakeNeeded && !_shutdownSignaled) { _cond.wait(lock); } if (_shutdownSignaled) { break; } positionChanged = _positionChanged; handshakeNeeded = _handshakeNeeded; _positionChanged = false; _handshakeNeeded = false; } MemberState state = replCoord->getCurrentMemberState(); if (state.primary() || state.fatal() || state.startup()) { _resetConnection(); continue; } const Member* target = BackgroundSync::get()->getSyncTarget(); if (_syncTarget != target) { _resetConnection(); _syncTarget = target; } if (!hasConnection()) { // fix connection if need be if (!target) { sleepmillis(500); continue; } if (!_connect(&txn, target->h())) { sleepmillis(500); continue; } handshakeNeeded = true; } if (handshakeNeeded) { if (!replHandshake(&txn)) { boost::unique_lock<boost::mutex> lock(_mtx); _handshakeNeeded = true; continue; } } if (positionChanged) { if (!updateUpstream(&txn)) { boost::unique_lock<boost::mutex> lock(_mtx); _positionChanged = true; } } } cc().shutdown(); }
virtual bool run(OperationContext* txn, const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { Status status = getGlobalReplicationCoordinator()->processReplSetGetRBID(&result); return appendCommandStatus(result, status); }
void ReplSetImpl::loadConfig(OperationContext* txn) { startupStatus = LOADINGCONFIG; startupStatusMsg.set("loading " + rsConfigNs + " config (LOADINGCONFIG)"); LOG(1) << "loadConfig() " << rsConfigNs << endl; while (1) { try { OwnedPointerVector<ReplSetConfig> configs; try { configs.mutableVector().push_back(ReplSetConfig::makeDirect()); } catch (DBException& e) { log() << "replSet exception loading our local replset configuration object : " << e.toString() << rsLog; } for (vector<HostAndPort>::const_iterator i = _seeds->begin(); i != _seeds->end(); i++) { try { configs.mutableVector().push_back(ReplSetConfig::make(*i)); } catch (DBException& e) { log() << "replSet exception trying to load config from " << *i << " : " << e.toString() << rsLog; } } ReplSettings& replSettings = getGlobalReplicationCoordinator()->getSettings(); { scoped_lock lck(replSettings.discoveredSeeds_mx); if (replSettings.discoveredSeeds.size() > 0) { for (set<string>::iterator i = replSettings.discoveredSeeds.begin(); i != replSettings.discoveredSeeds.end(); i++) { try { configs.mutableVector().push_back( ReplSetConfig::make(HostAndPort(*i))); } catch (DBException&) { LOG(1) << "replSet exception trying to load config from discovered " "seed " << *i << rsLog; replSettings.discoveredSeeds.erase(*i); } } } } if (!replSettings.reconfig.isEmpty()) { try { configs.mutableVector().push_back(ReplSetConfig::make(replSettings.reconfig, true)); } catch (DBException& re) { log() << "replSet couldn't load reconfig: " << re.what() << rsLog; replSettings.reconfig = BSONObj(); } } int nok = 0; int nempty = 0; for (vector<ReplSetConfig*>::iterator i = configs.mutableVector().begin(); i != configs.mutableVector().end(); i++) { if ((*i)->ok()) nok++; if ((*i)->empty()) nempty++; } if (nok == 0) { if (nempty == (int) configs.mutableVector().size()) { startupStatus = EMPTYCONFIG; startupStatusMsg.set("can't get " + rsConfigNs + " config from self or any seed (EMPTYCONFIG)"); log() << "replSet can't get " << rsConfigNs << " config from self or any seed (EMPTYCONFIG)" << rsLog; static unsigned once; if (++once == 1) { log() << "replSet info you may need to run replSetInitiate -- rs.initia" "te() in the shell -- if that is not already done" << rsLog; } if (_seeds->size() == 0) { LOG(1) << "replSet info no seed hosts were specified on the --replSet " "command line" << rsLog; } } else { startupStatus = EMPTYUNREACHABLE; startupStatusMsg.set("can't currently get " + rsConfigNs + " config from self or any seed (EMPTYUNREACHABLE)"); log() << "replSet can't get " << rsConfigNs << " config from self or any seed (yet)" << rsLog; } sleepsecs(1); continue; } if (!_loadConfigFinish(txn, configs.mutableVector())) { log() << "replSet info Couldn't load config yet. Sleeping 20sec and will try " "again." << rsLog; sleepsecs(20); continue; } } catch (DBException& e) { startupStatus = BADCONFIG; startupStatusMsg.set("replSet error loading set config (BADCONFIG)"); log() << "replSet error loading configurations " << e.toString() << rsLog; log() << "replSet error replication will not start" << rsLog; sethbmsg("error loading set config"); _fatal(); throw; } break; } startupStatusMsg.set("? started"); startupStatus = STARTED; }
// @param reconf true if this is a reconfiguration and not an initial load of the configuration. // @return true if ok; throws if config really bad; false if config doesn't include self bool ReplSetImpl::initFromConfig(OperationContext* txn, ReplSetConfig& c, bool reconf) { // NOTE: haveNewConfig() writes the new config to disk before we get here. So // we cannot error out at this point, except fatally. Check errors earlier. lock lk(this); if (!getLastErrorDefault.isEmpty() || !c.getLastErrorDefaults.isEmpty()) { getLastErrorDefault = c.getLastErrorDefaults; } list<ReplSetConfig::MemberCfg*> newOnes; // additive short-cuts the new config setup. If we are just adding a // node/nodes and nothing else is changing, this is additive. If it's // not a reconfig, we're not adding anything bool additive = reconf; bool updateConfigs = false; { unsigned nfound = 0; int me = 0; for (vector<ReplSetConfig::MemberCfg>::iterator i = c.members.begin(); i != c.members.end(); i++) { ReplSetConfig::MemberCfg& m = *i; if (isSelf(m.h)) { me++; } if (reconf) { const Member *old = findById(m._id); if (old) { nfound++; verify((int) old->id() == m._id); if (!old->config().isSameIgnoringTags(m)) { additive = false; } if (!updateConfigs && old->config() != m) { updateConfigs = true; } } else { newOnes.push_back(&m); } } } if (me == 0) { // we're not in the config -- we must have been removed if (state().removed()) { // already took note of our ejection from the set // so just sit tight and poll again return false; } _members.orphanAll(); // kill off rsHealthPoll threads (because they Know Too Much about our past) endOldHealthTasks(); // close sockets to force clients to re-evaluate this member MessagingPort::closeAllSockets(0); // take note of our ejection changeState(MemberState::RS_REMOVED); // go into holding pattern log() << "replSet info self not present in the repl set configuration:" << rsLog; log() << c.toString() << rsLog; loadConfig(txn); // redo config from scratch return false; } uassert(13302, "replSet error self appears twice in the repl set configuration", me<=1); // if we found different members that the original config, reload everything if (reconf && config().members.size() != nfound) additive = false; } // If we are changing chaining rules, we don't want this to be an additive reconfig so that // the primary can step down and the sync targets change. // TODO: This can be removed once SERVER-5208 is fixed. if (reconf && config().chainingAllowed() != c.chainingAllowed()) { additive = false; } _cfg = new ReplSetConfig(c); { // Hack to force ReplicationCoordinatorImpl to have a config. // TODO(spencer): rm this once the ReplicationCoordinatorImpl can load its own config. HybridReplicationCoordinator* replCoord = dynamic_cast<HybridReplicationCoordinator*>(getGlobalReplicationCoordinator()); fassert(18648, replCoord); replCoord->setImplConfigHack(_cfg); } // config() is same thing but const, so we use that when we can for clarity below dassert(&config() == _cfg); verify(config().ok()); verify(_name.empty() || _name == config()._id); _name = config()._id; verify(!_name.empty()); // this is a shortcut for simple changes if (additive) { log() << "replSet info : additive change to configuration" << rsLog; if (updateConfigs) { // we have new configs for existing members, so we need to repopulate _members // with the most recent configs _members.orphanAll(); // for logging string members = ""; // not setting _self to 0 as other threads use _self w/o locking int me = 0; for(vector<ReplSetConfig::MemberCfg>::const_iterator i = config().members.begin(); i != config().members.end(); i++) { const ReplSetConfig::MemberCfg& m = *i; Member *mi; members += (members == "" ? "" : ", ") + m.h.toString(); if (isSelf(m.h)) { verify(me++ == 0); mi = new Member(m.h, m._id, &m, true); setSelfTo(mi); } else { mi = new Member(m.h, m._id, &m, false); _members.push(mi); } } // trigger a handshake to update the syncSource of our writeconcern information syncSourceFeedback.forwardSlaveHandshake(); } // add any new members for (list<ReplSetConfig::MemberCfg*>::const_iterator i = newOnes.begin(); i != newOnes.end(); i++) { ReplSetConfig::MemberCfg *m = *i; Member *mi = new Member(m->h, m->_id, m, false); // we will indicate that new members are up() initially so that we don't relinquish // our primary state because we can't (transiently) see a majority. they should be // up as we check that new members are up before getting here on reconfig anyway. mi->get_hbinfo().health = 0.1; _members.push(mi); startHealthTaskFor(mi); } // if we aren't creating new members, we may have to update the // groups for the current ones _cfg->updateMembers(_members); return true; } // start with no members. if this is a reconfig, drop the old ones. _members.orphanAll(); endOldHealthTasks(); int oldPrimaryId = -1; { const Member *p = box.getPrimary(); if (p) oldPrimaryId = p->id(); } forgetPrimary(txn); // not setting _self to 0 as other threads use _self w/o locking int me = 0; // For logging string members = ""; for (vector<ReplSetConfig::MemberCfg>::const_iterator i = config().members.begin(); i != config().members.end(); i++) { const ReplSetConfig::MemberCfg& m = *i; Member *mi; members += (members == "" ? "" : ", ") + m.h.toString(); if (isSelf(m.h)) { verify(me++ == 0); mi = new Member(m.h, m._id, &m, true); if (!reconf) { log() << "replSet I am " << m.h.toString() << rsLog; } setSelfTo(mi); if ((int)mi->id() == oldPrimaryId) box.setSelfPrimary(mi); } else { mi = new Member(m.h, m._id, &m, false); _members.push(mi); if ((int)mi->id() == oldPrimaryId) box.setOtherPrimary(mi); } } if (me == 0){ log() << "replSet warning did not detect own host in full reconfig, members " << members << " config: " << c << rsLog; } else { // Do this after we've found ourselves, since _self needs // to be set before we can start the heartbeat tasks for (Member *mb = _members.head(); mb; mb=mb->next()) { startHealthTaskFor(mb); } } return true; }
void appendReplicationInfo(OperationContext* txn, BSONObjBuilder& result, int level) { ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator(); if (replCoord->getSettings().usingReplSets()) { if (replCoord->getReplicationMode() != ReplicationCoordinator::modeReplSet || replCoord->getCurrentMemberState().shunned()) { result.append("ismaster", false); result.append("secondary", false); result.append("info", ReplSet::startupStatusMsg.get()); result.append( "isreplicaset" , true ); } else { theReplSet->fillIsMaster(result); } return; } if ( replAllDead ) { result.append("ismaster", 0); string s = string("dead: ") + replAllDead; result.append("info", s); } else { result.appendBool("ismaster", getGlobalReplicationCoordinator()->isMasterForReportingPurposes()); } if (level && replCoord->getSettings().usingReplSets()) { result.append( "info" , "is replica set" ); } else if ( level ) { BSONObjBuilder sources( result.subarrayStart( "sources" ) ); int n = 0; list<BSONObj> src; { const char* localSources = "local.sources"; Client::ReadContext ctx(txn, localSources); auto_ptr<PlanExecutor> exec( InternalPlanner::collectionScan(txn, localSources, ctx.ctx().db()->getCollection(txn, localSources))); BSONObj obj; Runner::RunnerState state; while (Runner::RUNNER_ADVANCED == (state = exec->getNext(&obj, NULL))) { src.push_back(obj); } } for( list<BSONObj>::const_iterator i = src.begin(); i != src.end(); i++ ) { BSONObj s = *i; BSONObjBuilder bb; bb.append( s["host"] ); string sourcename = s["source"].valuestr(); if ( sourcename != "main" ) bb.append( s["source"] ); { BSONElement e = s["syncedTo"]; BSONObjBuilder t( bb.subobjStart( "syncedTo" ) ); t.appendDate( "time" , e.timestampTime() ); t.append( "inc" , e.timestampInc() ); t.done(); } if ( level > 1 ) { wassert(txn->lockState()->threadState() == 0); // note: there is no so-style timeout on this connection; perhaps we should have one. ScopedDbConnection conn(s["host"].valuestr()); DBClientConnection *cliConn = dynamic_cast< DBClientConnection* >( &conn.conn() ); if ( cliConn && replAuthenticate(cliConn) ) { BSONObj first = conn->findOne( (string)"local.oplog.$" + sourcename, Query().sort( BSON( "$natural" << 1 ) ) ); BSONObj last = conn->findOne( (string)"local.oplog.$" + sourcename, Query().sort( BSON( "$natural" << -1 ) ) ); bb.appendDate( "masterFirst" , first["ts"].timestampTime() ); bb.appendDate( "masterLast" , last["ts"].timestampTime() ); double lag = (double) (last["ts"].timestampTime() - s["syncedTo"].timestampTime()); bb.append( "lagSeconds" , lag / 1000 ); } conn.done(); } sources.append( BSONObjBuilder::numStr( n++ ) , bb.obj() ); } sources.done(); } }
long long ReplClientInfo::getTerm() { if (_cachedTerm == kUninitializedTerm) { _cachedTerm = getGlobalReplicationCoordinator()->getTerm(); } return _cachedTerm; }