void SyncSourceFeedback::run() { Client::initThread("SyncSourceFeedbackThread"); OperationContextImpl txn; bool positionChanged = false; bool handshakeNeeded = false; ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator(); while (!inShutdown()) { // TODO(spencer): Remove once legacy repl coordinator is gone. { boost::unique_lock<boost::mutex> lock(_mtx); while (!_positionChanged && !_handshakeNeeded && !_shutdownSignaled) { _cond.wait(lock); } if (_shutdownSignaled) { break; } positionChanged = _positionChanged; handshakeNeeded = _handshakeNeeded; _positionChanged = false; _handshakeNeeded = false; } MemberState state = replCoord->getCurrentMemberState(); if (state.primary() || state.fatal() || state.startup()) { continue; } const Member* target = BackgroundSync::get()->getSyncTarget(); if (_syncTarget != target) { _resetConnection(); _syncTarget = target; } if (!hasConnection()) { // fix connection if need be if (!target) { sleepmillis(500); continue; } if (!_connect(&txn, target->fullName())) { sleepmillis(500); continue; } } if (handshakeNeeded) { if (!replHandshake(&txn)) { boost::unique_lock<boost::mutex> lock(_mtx); _handshakeNeeded = true; continue; } } if (positionChanged) { if (!updateUpstream(&txn)) { boost::unique_lock<boost::mutex> lock(_mtx); _positionChanged = true; } } } cc().shutdown(); }
void BackgroundSync::_producerThread() { MemberState state = theReplSet->state(); // we want to pause when the state changes to primary if (isAssumingPrimary() || state.primary()) { if (!_pause) { stop(); } sleepsecs(1); return; } if (state.fatal() || state.startup()) { sleepsecs(5); return; } // if this member has an empty oplog, we cannot start syncing if (theReplSet->lastOpTimeWritten.isNull()) { sleepsecs(1); return; } // we want to unpause when we're no longer primary // start() also loads _lastOpTimeFetched, which we know is set from the "if" else if (_pause) { start(); } produce(); }
void BackgroundSync::_producerThread(executor::TaskExecutor* taskExecutor) { const MemberState state = _replCoord->getMemberState(); // we want to pause when the state changes to primary if (_replCoord->isWaitingForApplierToDrain() || state.primary()) { if (!isPaused()) { stop(); } sleepsecs(1); return; } // TODO(spencer): Use a condition variable to await loading a config. if (state.startup()) { // Wait for a config to be loaded sleepsecs(1); return; } // We need to wait until initial sync has started. if (_replCoord->getMyLastOptime().isNull()) { sleepsecs(1); return; } // we want to unpause when we're no longer primary // start() also loads _lastOpTimeFetched, which we know is set from the "if" OperationContextImpl txn; if (isPaused()) { start(&txn); } _produce(&txn, taskExecutor); }
void BackgroundSync::_producerThread() { MemberState state = theReplSet->state(); // we want to pause when the state changes to primary if (isAssumingPrimary() || state.primary()) { if (!_pause) { stop(); } sleepsecs(1); return; } if (state.startup()) { sleepsecs(1); return; } OperationContextImpl txn; // We need to wait until initial sync has started. if (_replCoord->getMyLastOptime().isNull()) { sleepsecs(1); return; } // we want to unpause when we're no longer primary // start() also loads _lastOpTimeFetched, which we know is set from the "if" else if (_pause) { start(&txn); } produce(&txn); }
void BackgroundSync::notifierThread() { Client::initThread("rsSyncNotifier"); replLocalAuth(); // This makes the initial connection to our sync source for oplog position notification. // It also sets the supportsUpdater flag so we know which method to use. // If this function fails, we ignore that situation because it will be taken care of // the first time markOplog() is called in the loop below. { boost::unique_lock<boost::mutex> oplogLockSSF(theReplSet->syncSourceFeedback.oplock); connectOplogNotifier(); } theReplSet->syncSourceFeedback.go(); while (!inShutdown()) { bool clearTarget = false; if (!theReplSet) { sleepsecs(5); continue; } MemberState state = theReplSet->state(); if (state.primary() || state.fatal() || state.startup()) { sleepsecs(5); continue; } try { { boost::unique_lock<boost::mutex> lock(_lastOpMutex); while (_consumedOpTime == theReplSet->lastOpTimeWritten) { _lastOpCond.wait(lock); } } markOplog(); } catch (DBException &e) { clearTarget = true; log() << "replset tracking exception: " << e.getInfo() << rsLog; sleepsecs(1); } catch (std::exception &e2) { clearTarget = true; log() << "replset tracking error" << e2.what() << rsLog; sleepsecs(1); } if (clearTarget) { boost::unique_lock<boost::mutex> lock(_mutex); _oplogMarkerTarget = NULL; } } cc().shutdown(); }
void SyncSourceFeedback::run() { Client::initThread("SyncSourceFeedback"); ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator(); while (true) { // breaks once _shutdownSignaled is true { stdx::unique_lock<stdx::mutex> lock(_mtx); while (!_positionChanged && !_shutdownSignaled) { if (_cond.wait_for(lock, _keepAliveInterval) == stdx::cv_status::timeout) { break; } } if (_shutdownSignaled) { break; } _positionChanged = false; } auto txn = cc().makeOperationContext(); MemberState state = replCoord->getMemberState(); if (state.primary() || state.startup()) { _resetConnection(); continue; } const HostAndPort target = BackgroundSync::get()->getSyncTarget(); if (_syncTarget != target) { _resetConnection(); _syncTarget = target; } if (!hasConnection()) { // fix connection if need be if (target.empty()) { sleepmillis(500); stdx::unique_lock<stdx::mutex> lock(_mtx); _positionChanged = true; continue; } if (!_connect(txn.get(), target)) { sleepmillis(500); stdx::unique_lock<stdx::mutex> lock(_mtx); _positionChanged = true; continue; } } Status status = updateUpstream(txn.get()); if (!status.isOK()) { sleepmillis(500); stdx::unique_lock<stdx::mutex> lock(_mtx); _positionChanged = true; } } }
void BackgroundSync::notifierThread() { Client::initThread("rsSyncNotifier"); replLocalAuth(); theReplSet->syncSourceFeedback.go(); while (!inShutdown()) { bool clearTarget = false; if (!theReplSet) { sleepsecs(5); continue; } MemberState state = theReplSet->state(); if (state.primary() || state.fatal() || state.startup()) { sleepsecs(5); continue; } try { { boost::unique_lock<boost::mutex> lock(_lastOpMutex); while (_consumedOpTime == theReplSet->lastOpTimeWritten) { _lastOpCond.wait(lock); } } markOplog(); } catch (DBException &e) { clearTarget = true; log() << "replset tracking exception: " << e.getInfo() << rsLog; sleepsecs(1); } catch (std::exception &e2) { clearTarget = true; log() << "replset tracking error" << e2.what() << rsLog; sleepsecs(1); } if (clearTarget) { boost::unique_lock<boost::mutex> lock(_mutex); _oplogMarkerTarget = NULL; } } cc().shutdown(); }
virtual bool run(OperationContext* txn, const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { ScopedTransaction transaction(txn, MODE_X); Lock::GlobalWrite globalWriteLock(txn->lockState()); ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator(); if (getGlobalReplicationCoordinator()->getSettings().usingReplSets()) { const MemberState memberState = replCoord->getMemberState(); if (memberState.startup()) { return appendCommandStatus(result, Status(ErrorCodes::NotYetInitialized, "no replication yet active")); } if (memberState.primary() || !replCoord->setFollowerMode(MemberState::RS_STARTUP2)) { return appendCommandStatus(result, Status(ErrorCodes::NotSecondary, "primaries cannot resync")); } BackgroundSync::get()->setInitialSyncRequestedFlag(true); return true; } // below this comment pertains only to master/slave replication if ( cmdObj.getBoolField( "force" ) ) { if ( !waitForSyncToFinish(txn, errmsg ) ) return false; replAllDead = "resync forced"; } // TODO(dannenberg) replAllDead is bad and should be removed when masterslave is removed if (!replAllDead) { errmsg = "not dead, no need to resync"; return false; } if ( !waitForSyncToFinish(txn, errmsg ) ) return false; ReplSource::forceResyncDead( txn, "client" ); result.append( "info", "triggered resync for all sources" ); return true; }
void BackgroundSync::_producerThread() { const MemberState state = _replCoord->getMemberState(); // we want to pause when the state changes to primary if (_replCoord->isWaitingForApplierToDrain() || state.primary()) { if (!isPaused()) { stop(); } if (_replCoord->isWaitingForApplierToDrain()) { // Signal to consumers that we have entered the paused state if the signal isn't already // in the queue. const boost::optional<BSONObj> lastObjectPushed = _buffer.lastObjectPushed(); if (!lastObjectPushed || !lastObjectPushed->isEmpty()) { const BSONObj sentinelDoc; _buffer.pushEvenIfFull(sentinelDoc); bufferCountGauge.increment(); bufferSizeGauge.increment(sentinelDoc.objsize()); } } sleepsecs(1); return; } // TODO(spencer): Use a condition variable to await loading a config. if (state.startup()) { // Wait for a config to be loaded sleepsecs(1); return; } // We need to wait until initial sync has started. if (_replCoord->getMyLastOptime().isNull()) { sleepsecs(1); return; } // we want to unpause when we're no longer primary // start() also loads _lastOpTimeFetched, which we know is set from the "if" OperationContextImpl txn; if (isPaused()) { start(&txn); } _produce(&txn); }
void BackgroundSync::_runProducer() { const MemberState state = _replCoord->getMemberState(); // Stop when the state changes to primary. // // TODO(siyuan) Drain mode should imply we're the primary. Fix this condition and the one below // after fixing step-down during drain mode. if (!_replCoord->isCatchingUp() && (_replCoord->isWaitingForApplierToDrain() || state.primary())) { if (!isStopped()) { stop(); } if (_replCoord->isWaitingForApplierToDrain()) { auto txn = cc().makeOperationContext(); _signalNoNewDataForApplier(txn.get()); } sleepsecs(1); return; } // TODO(spencer): Use a condition variable to await loading a config. if (state.startup()) { // Wait for a config to be loaded sleepsecs(1); return; } // We need to wait until initial sync has started. if (_replCoord->getMyLastAppliedOpTime().isNull()) { sleepsecs(1); return; } // we want to start when we're no longer primary // start() also loads _lastOpTimeFetched, which we know is set from the "if" auto txn = cc().makeOperationContext(); if (isStopped()) { start(txn.get()); } _produce(txn.get()); }
void BackgroundSync::_producerThread( ReplicationCoordinatorExternalState* replicationCoordinatorExternalState) { const MemberState state = _replCoord->getMemberState(); // Stop when the state changes to primary. if (_replCoord->isWaitingForApplierToDrain() || state.primary()) { if (!isStopped()) { stop(); } if (_replCoord->isWaitingForApplierToDrain()) { _signalNoNewDataForApplier(); } sleepsecs(1); return; } // TODO(spencer): Use a condition variable to await loading a config. if (state.startup()) { // Wait for a config to be loaded sleepsecs(1); return; } // We need to wait until initial sync has started. if (_replCoord->getMyLastAppliedOpTime().isNull()) { sleepsecs(1); return; } // we want to start when we're no longer primary // start() also loads _lastOpTimeFetched, which we know is set from the "if" const ServiceContext::UniqueOperationContext txnPtr = cc().makeOperationContext(); OperationContext& txn = *txnPtr; if (isStopped()) { start(&txn); } _produce(&txn, replicationCoordinatorExternalState); }
void BackgroundSync::producerThread() { { boost::unique_lock<boost::mutex> lock(_mutex); _opSyncInProgress = true; } Client::initThread("rsBackgroundSync"); replLocalAuth(); uint32_t timeToSleep = 0; while (!_opSyncShouldExit) { try { if (timeToSleep) { { boost::unique_lock<boost::mutex> lck(_mutex); _opSyncRunning = false; // notify other threads that we are not running _opSyncRunningCondVar.notify_all(); } for (uint32_t i = 0; i < timeToSleep; i++) { sleepsecs(1); // get out if we need to if (_opSyncShouldExit) { break; } } timeToSleep = 0; } // get out if we need to if (_opSyncShouldExit) { break; } { boost::unique_lock<boost::mutex> lck(_mutex); _opSyncRunning = false; while (!_opSyncShouldRun && !_opSyncShouldExit) { // notify other threads that we are not running _opSyncRunningCondVar.notify_all(); // wait for permission that we can run _opSyncCanRunCondVar.wait(lck); } // notify other threads that we are running _opSyncRunningCondVar.notify_all(); _opSyncRunning = true; } // get out if we need to if (_opSyncShouldExit) { break; } MemberState state = theReplSet->state(); if (state.fatal() || state.startup()) { timeToSleep = 5; continue; } // this does the work of reading a remote oplog // and writing it to our oplog timeToSleep = produce(); } catch (DBException& e) { sethbmsg(str::stream() << "db exception in producer: " << e.toString()); timeToSleep = 10; } catch (std::exception& e2) { sethbmsg(str::stream() << "exception in producer: " << e2.what()); timeToSleep = 10; } } cc().shutdown(); { boost::unique_lock<boost::mutex> lock(_mutex); _opSyncRunning = false; _opSyncInProgress = false; } }
void SyncSourceFeedback::run(executor::TaskExecutor* executor, BackgroundSync* bgsync) { Client::initThread("SyncSourceFeedback"); HostAndPort syncTarget; // keepAliveInterval indicates how frequently to forward progress in the absence of updates. Milliseconds keepAliveInterval(0); while (true) { // breaks once _shutdownSignaled is true auto txn = cc().makeOperationContext(); if (keepAliveInterval == Milliseconds(0)) { keepAliveInterval = calculateKeepAliveInterval(txn.get(), _mtx); } { // Take SyncSourceFeedback lock before calling into ReplicationCoordinator // to avoid deadlock because ReplicationCoordinator could conceivably calling back into // this class. stdx::unique_lock<stdx::mutex> lock(_mtx); while (!_positionChanged && !_shutdownSignaled) { if (_cond.wait_for(lock, keepAliveInterval.toSystemDuration()) == stdx::cv_status::timeout) { MemberState state = ReplicationCoordinator::get(txn.get())->getMemberState(); if (!(state.primary() || state.startup())) { break; } } } if (_shutdownSignaled) { break; } _positionChanged = false; } { stdx::lock_guard<stdx::mutex> lock(_mtx); MemberState state = ReplicationCoordinator::get(txn.get())->getMemberState(); if (state.primary() || state.startup()) { continue; } } const HostAndPort target = bgsync->getSyncTarget(); // Log sync source changes. if (target.empty()) { if (syncTarget != target) { syncTarget = target; } // Loop back around again; the keepalive functionality will cause us to retry continue; } if (syncTarget != target) { LOG(1) << "setting syncSourceFeedback to " << target; syncTarget = target; // Update keepalive value from config. auto oldKeepAliveInterval = keepAliveInterval; keepAliveInterval = calculateKeepAliveInterval(txn.get(), _mtx); if (oldKeepAliveInterval != keepAliveInterval) { LOG(1) << "new syncSourceFeedback keep alive duration = " << keepAliveInterval << " (previously " << oldKeepAliveInterval << ")"; } } Reporter reporter( executor, makePrepareReplSetUpdatePositionCommandFn(txn.get(), _mtx, syncTarget, bgsync), syncTarget, keepAliveInterval); { stdx::lock_guard<stdx::mutex> lock(_mtx); _reporter = &reporter; } ON_BLOCK_EXIT([this]() { stdx::lock_guard<stdx::mutex> lock(_mtx); _reporter = nullptr; }); auto status = _updateUpstream(txn.get(), bgsync); if (!status.isOK()) { LOG(1) << "The replication progress command (replSetUpdatePosition) failed and will be " "retried: " << status; } } }