Status SyncSourceResolver::_chooseAndProbeNextSyncSource(OpTime earliestOpTimeSeen) { auto candidateResult = _chooseNewSyncSource(); if (!candidateResult.isOK()) { return _finishCallback(candidateResult); } if (candidateResult.getValue().empty()) { if (earliestOpTimeSeen.isNull()) { return _finishCallback(candidateResult); } SyncSourceResolverResponse response; response.syncSourceStatus = {ErrorCodes::OplogStartMissing, "too stale to catch up"}; response.earliestOpTimeSeen = earliestOpTimeSeen; return _finishCallback(response); } auto status = _scheduleFetcher( _makeFirstOplogEntryFetcher(candidateResult.getValue(), earliestOpTimeSeen)); if (!status.isOK()) { return _finishCallback(status); } return Status::OK(); }
void SyncSourceResolver::_rbidRequestCallback( HostAndPort candidate, OpTime earliestOpTimeSeen, const executor::TaskExecutor::RemoteCommandCallbackArgs& rbidReply) { if (rbidReply.response.status == ErrorCodes::CallbackCanceled) { _finishCallback(rbidReply.response.status).transitional_ignore(); return; } int rbid = ReplicationProcess::kUninitializedRollbackId; try { uassertStatusOK(rbidReply.response.status); uassertStatusOK(getStatusFromCommandResult(rbidReply.response.data)); rbid = rbidReply.response.data["rbid"].Int(); } catch (const DBException& ex) { const auto until = _taskExecutor->now() + kFetcherErrorBlacklistDuration; log() << "Blacklisting " << candidate << " due to error: '" << ex << "' for " << kFetcherErrorBlacklistDuration << " until: " << until; _syncSourceSelector->blacklistSyncSource(candidate, until); _chooseAndProbeNextSyncSource(earliestOpTimeSeen).transitional_ignore(); return; } if (!_requiredOpTime.isNull()) { // Schedule fetcher to look for '_requiredOpTime' in the remote oplog. // Unittest requires that this kind of failure be handled specially. auto status = _scheduleFetcher(_makeRequiredOpTimeFetcher(candidate, earliestOpTimeSeen, rbid)); if (!status.isOK()) { _finishCallback(status).transitional_ignore(); } return; } _finishCallback(candidate, rbid).ignore(); }
void SyncSourceResolver::_firstOplogEntryFetcherCallback( const StatusWith<Fetcher::QueryResponse>& queryResult, HostAndPort candidate, OpTime earliestOpTimeSeen) { if (_isShuttingDown()) { _finishCallback(Status(ErrorCodes::CallbackCanceled, str::stream() << "sync source resolver shut down while probing candidate: " << candidate)); return; } if (ErrorCodes::CallbackCanceled == queryResult.getStatus()) { _finishCallback(queryResult.getStatus()); return; } if (!queryResult.isOK()) { // We got an error. const auto until = _taskExecutor->now() + kFetcherErrorBlacklistDuration; log() << "Blacklisting " << candidate << " due to error: '" << queryResult.getStatus() << "' for " << kFetcherErrorBlacklistDuration << " until: " << until; _syncSourceSelector->blacklistSyncSource(candidate, until); _chooseAndProbeNextSyncSource(earliestOpTimeSeen); return; } const auto& queryResponse = queryResult.getValue(); const auto remoteEarliestOpTime = _parseRemoteEarliestOpTime(candidate, queryResponse); if (remoteEarliestOpTime.isNull()) { _chooseAndProbeNextSyncSource(earliestOpTimeSeen); return; } // remoteEarliestOpTime may come from a very old config, so we cannot compare their terms. if (_lastOpTimeFetched.getTimestamp() < remoteEarliestOpTime.getTimestamp()) { // We're too stale to use this sync source. const auto blacklistDuration = kTooStaleBlacklistDuration; const auto until = _taskExecutor->now() + Minutes(1); log() << "We are too stale to use " << candidate << " as a sync source. " << "Blacklisting this sync source" << " because our last fetched timestamp: " << _lastOpTimeFetched.getTimestamp() << " is before their earliest timestamp: " << remoteEarliestOpTime.getTimestamp() << " for " << blacklistDuration << " until: " << until; _syncSourceSelector->blacklistSyncSource(candidate, until); // If all the viable sync sources are too far ahead of us (i.e. we are "too stale" relative // each sync source), we will want to return the starting timestamp of the sync source // candidate that is closest to us. See SyncSourceResolverResponse::earliestOpTimeSeen. // We use "earliestOpTimeSeen" to keep track of the current minimum starting timestamp. if (earliestOpTimeSeen.isNull() || earliestOpTimeSeen.getTimestamp() > remoteEarliestOpTime.getTimestamp()) { earliestOpTimeSeen = remoteEarliestOpTime; } _chooseAndProbeNextSyncSource(earliestOpTimeSeen); return; } // Schedules fetcher to look for '_requiredOpTime' in the remote oplog. if (!_requiredOpTime.isNull()) { auto status = _scheduleFetcher(_makeRequiredOpTimeFetcher(candidate, earliestOpTimeSeen)); if (!status.isOK()) { _finishCallback(status); } return; } _finishCallback(candidate); }