void SyncSourceResolver::_requiredOpTimeFetcherCallback( const StatusWith<Fetcher::QueryResponse>& queryResult, HostAndPort candidate, OpTime earliestOpTimeSeen, int rbid) { if (_isShuttingDown()) { _finishCallback(Status(ErrorCodes::CallbackCanceled, str::stream() << "sync source resolver shut down while looking for " "required optime " << _requiredOpTime.toString() << " in candidate's oplog: " << candidate)) .transitional_ignore(); return; } if (ErrorCodes::CallbackCanceled == queryResult.getStatus()) { _finishCallback(queryResult.getStatus()).transitional_ignore(); return; } if (!queryResult.isOK()) { // We got an error. const auto until = _taskExecutor->now() + kFetcherErrorBlacklistDuration; log() << "Blacklisting " << candidate << " due to required optime fetcher error: '" << queryResult.getStatus() << "' for " << kFetcherErrorBlacklistDuration << " until: " << until << ". required optime: " << _requiredOpTime; _syncSourceSelector->blacklistSyncSource(candidate, until); _chooseAndProbeNextSyncSource(earliestOpTimeSeen).transitional_ignore(); return; } const auto& queryResponse = queryResult.getValue(); auto status = _compareRequiredOpTimeWithQueryResponse(queryResponse); if (!status.isOK()) { const auto until = _taskExecutor->now() + kNoRequiredOpTimeBlacklistDuration; warning() << "We cannot use " << candidate.toString() << " as a sync source because it does not contain the necessary " "operations for us to reach a consistent state: " << status << " last fetched optime: " << _lastOpTimeFetched << ". required optime: " << _requiredOpTime << ". Blacklisting this sync source for " << kNoRequiredOpTimeBlacklistDuration << " until: " << until; _syncSourceSelector->blacklistSyncSource(candidate, until); _chooseAndProbeNextSyncSource(earliestOpTimeSeen).transitional_ignore(); return; } _finishCallback(candidate, rbid).ignore(); }
Status SyncSourceResolver::startup() { { stdx::lock_guard<stdx::mutex> lock(_mutex); switch (_state) { case State::kPreStart: _state = State::kRunning; break; case State::kRunning: return Status(ErrorCodes::IllegalOperation, "sync source resolver already started"); case State::kShuttingDown: return Status(ErrorCodes::ShutdownInProgress, "sync source resolver shutting down"); case State::kComplete: return Status(ErrorCodes::ShutdownInProgress, "sync source resolver completed"); } } return _chooseAndProbeNextSyncSource(OpTime()); }
void SyncSourceResolver::_rbidRequestCallback( HostAndPort candidate, OpTime earliestOpTimeSeen, const executor::TaskExecutor::RemoteCommandCallbackArgs& rbidReply) { if (rbidReply.response.status == ErrorCodes::CallbackCanceled) { _finishCallback(rbidReply.response.status).transitional_ignore(); return; } int rbid = ReplicationProcess::kUninitializedRollbackId; try { uassertStatusOK(rbidReply.response.status); uassertStatusOK(getStatusFromCommandResult(rbidReply.response.data)); rbid = rbidReply.response.data["rbid"].Int(); } catch (const DBException& ex) { const auto until = _taskExecutor->now() + kFetcherErrorBlacklistDuration; log() << "Blacklisting " << candidate << " due to error: '" << ex << "' for " << kFetcherErrorBlacklistDuration << " until: " << until; _syncSourceSelector->blacklistSyncSource(candidate, until); _chooseAndProbeNextSyncSource(earliestOpTimeSeen).transitional_ignore(); return; } if (!_requiredOpTime.isNull()) { // Schedule fetcher to look for '_requiredOpTime' in the remote oplog. // Unittest requires that this kind of failure be handled specially. auto status = _scheduleFetcher(_makeRequiredOpTimeFetcher(candidate, earliestOpTimeSeen, rbid)); if (!status.isOK()) { _finishCallback(status).transitional_ignore(); } return; } _finishCallback(candidate, rbid).ignore(); }
void SyncSourceResolver::_firstOplogEntryFetcherCallback( const StatusWith<Fetcher::QueryResponse>& queryResult, HostAndPort candidate, OpTime earliestOpTimeSeen) { if (_isShuttingDown()) { _finishCallback(Status(ErrorCodes::CallbackCanceled, str::stream() << "sync source resolver shut down while probing candidate: " << candidate)); return; } if (ErrorCodes::CallbackCanceled == queryResult.getStatus()) { _finishCallback(queryResult.getStatus()); return; } if (!queryResult.isOK()) { // We got an error. const auto until = _taskExecutor->now() + kFetcherErrorBlacklistDuration; log() << "Blacklisting " << candidate << " due to error: '" << queryResult.getStatus() << "' for " << kFetcherErrorBlacklistDuration << " until: " << until; _syncSourceSelector->blacklistSyncSource(candidate, until); _chooseAndProbeNextSyncSource(earliestOpTimeSeen); return; } const auto& queryResponse = queryResult.getValue(); const auto remoteEarliestOpTime = _parseRemoteEarliestOpTime(candidate, queryResponse); if (remoteEarliestOpTime.isNull()) { _chooseAndProbeNextSyncSource(earliestOpTimeSeen); return; } // remoteEarliestOpTime may come from a very old config, so we cannot compare their terms. if (_lastOpTimeFetched.getTimestamp() < remoteEarliestOpTime.getTimestamp()) { // We're too stale to use this sync source. const auto blacklistDuration = kTooStaleBlacklistDuration; const auto until = _taskExecutor->now() + Minutes(1); log() << "We are too stale to use " << candidate << " as a sync source. " << "Blacklisting this sync source" << " because our last fetched timestamp: " << _lastOpTimeFetched.getTimestamp() << " is before their earliest timestamp: " << remoteEarliestOpTime.getTimestamp() << " for " << blacklistDuration << " until: " << until; _syncSourceSelector->blacklistSyncSource(candidate, until); // If all the viable sync sources are too far ahead of us (i.e. we are "too stale" relative // each sync source), we will want to return the starting timestamp of the sync source // candidate that is closest to us. See SyncSourceResolverResponse::earliestOpTimeSeen. // We use "earliestOpTimeSeen" to keep track of the current minimum starting timestamp. if (earliestOpTimeSeen.isNull() || earliestOpTimeSeen.getTimestamp() > remoteEarliestOpTime.getTimestamp()) { earliestOpTimeSeen = remoteEarliestOpTime; } _chooseAndProbeNextSyncSource(earliestOpTimeSeen); return; } // Schedules fetcher to look for '_requiredOpTime' in the remote oplog. if (!_requiredOpTime.isNull()) { auto status = _scheduleFetcher(_makeRequiredOpTimeFetcher(candidate, earliestOpTimeSeen)); if (!status.isOK()) { _finishCallback(status); } return; } _finishCallback(candidate); }