void SyncSourceResolver::_requiredOpTimeFetcherCallback(
    const StatusWith<Fetcher::QueryResponse>& queryResult,
    HostAndPort candidate,
    OpTime earliestOpTimeSeen,
    int rbid) {
    if (_isShuttingDown()) {
        _finishCallback(Status(ErrorCodes::CallbackCanceled,
                               str::stream() << "sync source resolver shut down while looking for "
                                                "required optime "
                                             << _requiredOpTime.toString()
                                             << " in candidate's oplog: "
                                             << candidate))
            .transitional_ignore();
        return;
    }

    if (ErrorCodes::CallbackCanceled == queryResult.getStatus()) {
        _finishCallback(queryResult.getStatus()).transitional_ignore();
        return;
    }

    if (!queryResult.isOK()) {
        // We got an error.
        const auto until = _taskExecutor->now() + kFetcherErrorBlacklistDuration;
        log() << "Blacklisting " << candidate << " due to required optime fetcher error: '"
              << queryResult.getStatus() << "' for " << kFetcherErrorBlacklistDuration
              << " until: " << until << ". required optime: " << _requiredOpTime;
        _syncSourceSelector->blacklistSyncSource(candidate, until);

        _chooseAndProbeNextSyncSource(earliestOpTimeSeen).transitional_ignore();
        return;
    }

    const auto& queryResponse = queryResult.getValue();
    auto status = _compareRequiredOpTimeWithQueryResponse(queryResponse);
    if (!status.isOK()) {
        const auto until = _taskExecutor->now() + kNoRequiredOpTimeBlacklistDuration;
        warning() << "We cannot use " << candidate.toString()
                  << " as a sync source because it does not contain the necessary "
                     "operations for us to reach a consistent state: "
                  << status << " last fetched optime: " << _lastOpTimeFetched
                  << ". required optime: " << _requiredOpTime
                  << ". Blacklisting this sync source for " << kNoRequiredOpTimeBlacklistDuration
                  << " until: " << until;
        _syncSourceSelector->blacklistSyncSource(candidate, until);

        _chooseAndProbeNextSyncSource(earliestOpTimeSeen).transitional_ignore();
        return;
    }

    _finishCallback(candidate, rbid).ignore();
}
Example #2
0
Status SyncSourceResolver::startup() {
    {
        stdx::lock_guard<stdx::mutex> lock(_mutex);
        switch (_state) {
            case State::kPreStart:
                _state = State::kRunning;
                break;
            case State::kRunning:
                return Status(ErrorCodes::IllegalOperation, "sync source resolver already started");
            case State::kShuttingDown:
                return Status(ErrorCodes::ShutdownInProgress, "sync source resolver shutting down");
            case State::kComplete:
                return Status(ErrorCodes::ShutdownInProgress, "sync source resolver completed");
        }
    }

    return _chooseAndProbeNextSyncSource(OpTime());
}
void SyncSourceResolver::_rbidRequestCallback(
    HostAndPort candidate,
    OpTime earliestOpTimeSeen,
    const executor::TaskExecutor::RemoteCommandCallbackArgs& rbidReply) {
    if (rbidReply.response.status == ErrorCodes::CallbackCanceled) {
        _finishCallback(rbidReply.response.status).transitional_ignore();
        return;
    }

    int rbid = ReplicationProcess::kUninitializedRollbackId;
    try {
        uassertStatusOK(rbidReply.response.status);
        uassertStatusOK(getStatusFromCommandResult(rbidReply.response.data));
        rbid = rbidReply.response.data["rbid"].Int();
    } catch (const DBException& ex) {
        const auto until = _taskExecutor->now() + kFetcherErrorBlacklistDuration;
        log() << "Blacklisting " << candidate << " due to error: '" << ex << "' for "
              << kFetcherErrorBlacklistDuration << " until: " << until;
        _syncSourceSelector->blacklistSyncSource(candidate, until);
        _chooseAndProbeNextSyncSource(earliestOpTimeSeen).transitional_ignore();
        return;
    }

    if (!_requiredOpTime.isNull()) {
        // Schedule fetcher to look for '_requiredOpTime' in the remote oplog.
        // Unittest requires that this kind of failure be handled specially.
        auto status =
            _scheduleFetcher(_makeRequiredOpTimeFetcher(candidate, earliestOpTimeSeen, rbid));
        if (!status.isOK()) {
            _finishCallback(status).transitional_ignore();
        }
        return;
    }

    _finishCallback(candidate, rbid).ignore();
}
Example #4
0
void SyncSourceResolver::_firstOplogEntryFetcherCallback(
    const StatusWith<Fetcher::QueryResponse>& queryResult,
    HostAndPort candidate,
    OpTime earliestOpTimeSeen) {
    if (_isShuttingDown()) {
        _finishCallback(Status(ErrorCodes::CallbackCanceled,
                               str::stream()
                                   << "sync source resolver shut down while probing candidate: "
                                   << candidate));
        return;
    }

    if (ErrorCodes::CallbackCanceled == queryResult.getStatus()) {
        _finishCallback(queryResult.getStatus());
        return;
    }

    if (!queryResult.isOK()) {
        // We got an error.
        const auto until = _taskExecutor->now() + kFetcherErrorBlacklistDuration;
        log() << "Blacklisting " << candidate << " due to error: '" << queryResult.getStatus()
              << "' for " << kFetcherErrorBlacklistDuration << " until: " << until;
        _syncSourceSelector->blacklistSyncSource(candidate, until);

        _chooseAndProbeNextSyncSource(earliestOpTimeSeen);
        return;
    }

    const auto& queryResponse = queryResult.getValue();
    const auto remoteEarliestOpTime = _parseRemoteEarliestOpTime(candidate, queryResponse);
    if (remoteEarliestOpTime.isNull()) {
        _chooseAndProbeNextSyncSource(earliestOpTimeSeen);
        return;
    }

    // remoteEarliestOpTime may come from a very old config, so we cannot compare their terms.
    if (_lastOpTimeFetched.getTimestamp() < remoteEarliestOpTime.getTimestamp()) {
        // We're too stale to use this sync source.
        const auto blacklistDuration = kTooStaleBlacklistDuration;
        const auto until = _taskExecutor->now() + Minutes(1);

        log() << "We are too stale to use " << candidate << " as a sync source. "
              << "Blacklisting this sync source"
              << " because our last fetched timestamp: " << _lastOpTimeFetched.getTimestamp()
              << " is before their earliest timestamp: " << remoteEarliestOpTime.getTimestamp()
              << " for " << blacklistDuration << " until: " << until;

        _syncSourceSelector->blacklistSyncSource(candidate, until);

        // If all the viable sync sources are too far ahead of us (i.e. we are "too stale" relative
        // each sync source), we will want to return the starting timestamp of the sync source
        // candidate that is closest to us. See SyncSourceResolverResponse::earliestOpTimeSeen.
        // We use "earliestOpTimeSeen" to keep track of the current minimum starting timestamp.
        if (earliestOpTimeSeen.isNull() ||
            earliestOpTimeSeen.getTimestamp() > remoteEarliestOpTime.getTimestamp()) {
            earliestOpTimeSeen = remoteEarliestOpTime;
        }

        _chooseAndProbeNextSyncSource(earliestOpTimeSeen);
        return;
    }

    // Schedules fetcher to look for '_requiredOpTime' in the remote oplog.
    if (!_requiredOpTime.isNull()) {
        auto status = _scheduleFetcher(_makeRequiredOpTimeFetcher(candidate, earliestOpTimeSeen));
        if (!status.isOK()) {
            _finishCallback(status);
        }
        return;
    }

    _finishCallback(candidate);
}