Пример #1
0
void SyncSourceResolver::_requiredOpTimeFetcherCallback(
    const StatusWith<Fetcher::QueryResponse>& queryResult,
    HostAndPort candidate,
    OpTime earliestOpTimeSeen,
    int rbid) {
    if (_isShuttingDown()) {
        _finishCallback(Status(ErrorCodes::CallbackCanceled,
                               str::stream() << "sync source resolver shut down while looking for "
                                                "required optime "
                                             << _requiredOpTime.toString()
                                             << " in candidate's oplog: "
                                             << candidate))
            .transitional_ignore();
        return;
    }

    if (ErrorCodes::CallbackCanceled == queryResult.getStatus()) {
        _finishCallback(queryResult.getStatus()).transitional_ignore();
        return;
    }

    if (!queryResult.isOK()) {
        // We got an error.
        const auto until = _taskExecutor->now() + kFetcherErrorBlacklistDuration;
        log() << "Blacklisting " << candidate << " due to required optime fetcher error: '"
              << queryResult.getStatus() << "' for " << kFetcherErrorBlacklistDuration
              << " until: " << until << ". required optime: " << _requiredOpTime;
        _syncSourceSelector->blacklistSyncSource(candidate, until);

        _chooseAndProbeNextSyncSource(earliestOpTimeSeen).transitional_ignore();
        return;
    }

    const auto& queryResponse = queryResult.getValue();
    auto status = _compareRequiredOpTimeWithQueryResponse(queryResponse);
    if (!status.isOK()) {
        const auto until = _taskExecutor->now() + kNoRequiredOpTimeBlacklistDuration;
        warning() << "We cannot use " << candidate.toString()
                  << " as a sync source because it does not contain the necessary "
                     "operations for us to reach a consistent state: "
                  << status << " last fetched optime: " << _lastOpTimeFetched
                  << ". required optime: " << _requiredOpTime
                  << ". Blacklisting this sync source for " << kNoRequiredOpTimeBlacklistDuration
                  << " until: " << until;
        _syncSourceSelector->blacklistSyncSource(candidate, until);

        _chooseAndProbeNextSyncSource(earliestOpTimeSeen).transitional_ignore();
        return;
    }

    _finishCallback(candidate, rbid).ignore();
}
Пример #2
0
void CollectionCloner::_insertDocumentsCallback(
    const executor::TaskExecutor::CallbackArgs& cbd,
    bool lastBatch,
    std::shared_ptr<OnCompletionGuard> onCompletionGuard) {
    if (!cbd.status.isOK()) {
        stdx::lock_guard<stdx::mutex> lock(_mutex);
        onCompletionGuard->setResultAndCancelRemainingWork_inlock(lock, cbd.status);
        return;
    }

    UniqueLock lk(_mutex);
    std::vector<BSONObj> docs;
    if (_documentsToInsert.size() == 0) {
        warning() << "_insertDocumentsCallback, but no documents to insert for ns:" << _destNss;
        if (lastBatch) {
            onCompletionGuard->setResultAndCancelRemainingWork_inlock(lk, Status::OK());
        }
        return;
    }
    _documentsToInsert.swap(docs);
    _stats.documentsCopied += docs.size();
    ++_stats.fetchBatches;
    _progressMeter.hit(int(docs.size()));
    invariant(_collLoader);
    const auto status = _collLoader->insertDocuments(docs.cbegin(), docs.cend());
    if (!status.isOK()) {
        onCompletionGuard->setResultAndCancelRemainingWork_inlock(lk, status);
        return;
    }

    MONGO_FAIL_POINT_BLOCK(initialSyncHangDuringCollectionClone, options) {
        const BSONObj& data = options.getData();
        if (data["namespace"].String() == _destNss.ns() &&
            static_cast<int>(_stats.documentsCopied) >= data["numDocsToClone"].numberInt()) {
            lk.unlock();
            log() << "initial sync - initialSyncHangDuringCollectionClone fail point "
                     "enabled. Blocking until fail point is disabled.";
            while (MONGO_FAIL_POINT(initialSyncHangDuringCollectionClone) && !_isShuttingDown()) {
                mongo::sleepsecs(1);
            }
            lk.lock();
        }
    }

    if (lastBatch) {
        // Clean up resources once the last batch has been copied over and set the status to OK.
        onCompletionGuard->setResultAndCancelRemainingWork_inlock(lk, Status::OK());
    }
}
Пример #3
0
StatusWith<HostAndPort> SyncSourceResolver::_chooseNewSyncSource() {
    HostAndPort candidate;
    try {
        candidate = _syncSourceSelector->chooseNewSyncSource(_lastOpTimeFetched);
    } catch (...) {
        return exceptionToStatus();
    }

    if (_isShuttingDown()) {
        return Status(ErrorCodes::CallbackCanceled,
                      str::stream() << "sync source resolver shut down before probing candidate: "
                                    << candidate);
    }

    return candidate;
}
Пример #4
0
void SyncSourceResolver::_firstOplogEntryFetcherCallback(
    const StatusWith<Fetcher::QueryResponse>& queryResult,
    HostAndPort candidate,
    OpTime earliestOpTimeSeen) {
    if (_isShuttingDown()) {
        _finishCallback(Status(ErrorCodes::CallbackCanceled,
                               str::stream()
                                   << "sync source resolver shut down while probing candidate: "
                                   << candidate));
        return;
    }

    if (ErrorCodes::CallbackCanceled == queryResult.getStatus()) {
        _finishCallback(queryResult.getStatus());
        return;
    }

    if (!queryResult.isOK()) {
        // We got an error.
        const auto until = _taskExecutor->now() + kFetcherErrorBlacklistDuration;
        log() << "Blacklisting " << candidate << " due to error: '" << queryResult.getStatus()
              << "' for " << kFetcherErrorBlacklistDuration << " until: " << until;
        _syncSourceSelector->blacklistSyncSource(candidate, until);

        _chooseAndProbeNextSyncSource(earliestOpTimeSeen);
        return;
    }

    const auto& queryResponse = queryResult.getValue();
    const auto remoteEarliestOpTime = _parseRemoteEarliestOpTime(candidate, queryResponse);
    if (remoteEarliestOpTime.isNull()) {
        _chooseAndProbeNextSyncSource(earliestOpTimeSeen);
        return;
    }

    // remoteEarliestOpTime may come from a very old config, so we cannot compare their terms.
    if (_lastOpTimeFetched.getTimestamp() < remoteEarliestOpTime.getTimestamp()) {
        // We're too stale to use this sync source.
        const auto blacklistDuration = kTooStaleBlacklistDuration;
        const auto until = _taskExecutor->now() + Minutes(1);

        log() << "We are too stale to use " << candidate << " as a sync source. "
              << "Blacklisting this sync source"
              << " because our last fetched timestamp: " << _lastOpTimeFetched.getTimestamp()
              << " is before their earliest timestamp: " << remoteEarliestOpTime.getTimestamp()
              << " for " << blacklistDuration << " until: " << until;

        _syncSourceSelector->blacklistSyncSource(candidate, until);

        // If all the viable sync sources are too far ahead of us (i.e. we are "too stale" relative
        // each sync source), we will want to return the starting timestamp of the sync source
        // candidate that is closest to us. See SyncSourceResolverResponse::earliestOpTimeSeen.
        // We use "earliestOpTimeSeen" to keep track of the current minimum starting timestamp.
        if (earliestOpTimeSeen.isNull() ||
            earliestOpTimeSeen.getTimestamp() > remoteEarliestOpTime.getTimestamp()) {
            earliestOpTimeSeen = remoteEarliestOpTime;
        }

        _chooseAndProbeNextSyncSource(earliestOpTimeSeen);
        return;
    }

    // Schedules fetcher to look for '_requiredOpTime' in the remote oplog.
    if (!_requiredOpTime.isNull()) {
        auto status = _scheduleFetcher(_makeRequiredOpTimeFetcher(candidate, earliestOpTimeSeen));
        if (!status.isOK()) {
            _finishCallback(status);
        }
        return;
    }

    _finishCallback(candidate);
}
Пример #5
0
void CollectionCloner::_handleARMResultsCallback(
    const executor::TaskExecutor::CallbackArgs& cbd,
    std::shared_ptr<OnCompletionGuard> onCompletionGuard) {
    auto setResultAndCancelRemainingWork = [this](std::shared_ptr<OnCompletionGuard> guard,
                                                  Status status) {
        stdx::lock_guard<stdx::mutex> lock(_mutex);
        guard->setResultAndCancelRemainingWork_inlock(lock, status);
        return;
    };

    if (!cbd.status.isOK()) {
        // Wait for active inserts to complete.
        waitForDbWorker();
        Status newStatus = cbd.status.withContext(str::stream() << "Error querying collection '"
                                                                << _sourceNss.ns());
        setResultAndCancelRemainingWork(onCompletionGuard, cbd.status);
        return;
    }

    // Pull the documents from the ARM into a buffer until the entire batch has been processed.
    bool lastBatch;
    {
        UniqueLock lk(_mutex);
        auto nextBatchStatus = _bufferNextBatchFromArm(lk);
        if (!nextBatchStatus.isOK()) {
            if (_options.uuid && (nextBatchStatus.code() == ErrorCodes::OperationFailed ||
                                  nextBatchStatus.code() == ErrorCodes::CursorNotFound)) {
                // With these errors, it's possible the collection was dropped while we were
                // cloning.  If so, we'll execute the drop during oplog application, so it's OK to
                // just stop cloning.  This is only safe if cloning by UUID; if we are cloning by
                // name, we have no way to detect if the collection was dropped and another
                // collection with the same name created in the interim.
                _verifyCollectionWasDropped(lk, nextBatchStatus, onCompletionGuard, cbd.opCtx);
            } else {
                onCompletionGuard->setResultAndCancelRemainingWork_inlock(lk, nextBatchStatus);
            }
            return;
        }

        // Check if this is the last batch of documents to clone.
        lastBatch = _arm->remotesExhausted();
    }

    // Schedule the next document batch insertion.
    auto&& scheduleResult = _scheduleDbWorkFn([=](const executor::TaskExecutor::CallbackArgs& cbd) {
        _insertDocumentsCallback(cbd, lastBatch, onCompletionGuard);
    });
    if (!scheduleResult.isOK()) {
        Status newStatus = scheduleResult.getStatus().withContext(
            str::stream() << "Error cloning collection '" << _sourceNss.ns() << "'");
        setResultAndCancelRemainingWork(onCompletionGuard, scheduleResult.getStatus());
        return;
    }

    MONGO_FAIL_POINT_BLOCK(initialSyncHangCollectionClonerAfterHandlingBatchResponse, nssData) {
        const BSONObj& data = nssData.getData();
        auto nss = data["nss"].str();
        // Only hang when cloning the specified collection, or if no collection was specified.
        if (nss.empty() || _destNss.toString() == nss) {
            while (MONGO_FAIL_POINT(initialSyncHangCollectionClonerAfterHandlingBatchResponse) &&
                   !_isShuttingDown()) {
                log() << "initialSyncHangCollectionClonerAfterHandlingBatchResponse fail point "
                         "enabled for "
                      << _destNss.toString() << ". Blocking until fail point is disabled.";
                mongo::sleepsecs(1);
            }
        }
    }

    // If the remote cursors are not exhausted, schedule this callback again to handle
    // the impending cursor response.
    if (!lastBatch) {
        Status scheduleStatus = _scheduleNextARMResultsCallback(onCompletionGuard);
        if (!scheduleStatus.isOK()) {
            setResultAndCancelRemainingWork(onCompletionGuard, scheduleStatus);
            return;
        }
    }
}
Пример #6
0
void CollectionCloner::_beginCollectionCallback(const executor::TaskExecutor::CallbackArgs& cbd) {
    if (!cbd.status.isOK()) {
        _finishCallback(cbd.status);
        return;
    }
    MONGO_FAIL_POINT_BLOCK(initialSyncHangCollectionClonerBeforeEstablishingCursor, nssData) {
        const BSONObj& data = nssData.getData();
        auto nss = data["nss"].str();
        // Only hang when cloning the specified collection, or if no collection was specified.
        if (nss.empty() || _destNss.toString() == nss) {
            while (MONGO_FAIL_POINT(initialSyncHangCollectionClonerBeforeEstablishingCursor) &&
                   !_isShuttingDown()) {
                log() << "initialSyncHangCollectionClonerBeforeEstablishingCursor fail point "
                         "enabled for "
                      << _destNss.toString() << ". Blocking until fail point is disabled.";
                mongo::sleepsecs(1);
            }
        }
    }
    if (!_idIndexSpec.isEmpty() && _options.autoIndexId == CollectionOptions::NO) {
        warning()
            << "Found the _id_ index spec but the collection specified autoIndexId of false on ns:"
            << this->_sourceNss;
    }

    auto collectionBulkLoader = _storageInterface->createCollectionForBulkLoading(
        _destNss, _options, _idIndexSpec, _indexSpecs);

    if (!collectionBulkLoader.isOK()) {
        _finishCallback(collectionBulkLoader.getStatus());
        return;
    }

    _stats.indexes = _indexSpecs.size();
    if (!_idIndexSpec.isEmpty()) {
        ++_stats.indexes;
    }

    _collLoader = std::move(collectionBulkLoader.getValue());

    BSONObjBuilder cmdObj;
    EstablishCursorsCommand cursorCommand;
    // The 'find' command is used when the number of cloning cursors is 1 to ensure
    // the correctness of the collection cloning process until 'parallelCollectionScan'
    // can be tested more extensively in context of initial sync.
    if (_maxNumClonerCursors == 1) {
        cmdObj.appendElements(
            makeCommandWithUUIDorCollectionName("find", _options.uuid, _sourceNss));
        cmdObj.append("noCursorTimeout", true);
        // Set batchSize to be 0 to establish the cursor without fetching any documents,
        // similar to the response format of 'parallelCollectionScan'.
        cmdObj.append("batchSize", 0);
        cursorCommand = Find;
    } else {
        cmdObj.appendElements(makeCommandWithUUIDorCollectionName(
            "parallelCollectionScan", _options.uuid, _sourceNss));
        cmdObj.append("numCursors", _maxNumClonerCursors);
        cursorCommand = ParallelCollScan;
    }

    Client::initThreadIfNotAlready();
    auto opCtx = cc().getOperationContext();

    MONGO_FAIL_POINT_BLOCK(initialSyncHangBeforeCollectionClone, options) {
        const BSONObj& data = options.getData();
        if (data["namespace"].String() == _destNss.ns()) {
            log() << "initial sync - initialSyncHangBeforeCollectionClone fail point "
                     "enabled. Blocking until fail point is disabled.";
            while (MONGO_FAIL_POINT(initialSyncHangBeforeCollectionClone) && !_isShuttingDown()) {
                mongo::sleepsecs(1);
            }
        }
    }

    _establishCollectionCursorsScheduler = stdx::make_unique<RemoteCommandRetryScheduler>(
        _executor,
        RemoteCommandRequest(_source,
                             _sourceNss.db().toString(),
                             cmdObj.obj(),
                             ReadPreferenceSetting::secondaryPreferredMetadata(),
                             opCtx,
                             RemoteCommandRequest::kNoTimeout),
        [=](const RemoteCommandCallbackArgs& rcbd) {
            _establishCollectionCursorsCallback(rcbd, cursorCommand);
        },
        RemoteCommandRetryScheduler::makeRetryPolicy(
            numInitialSyncCollectionFindAttempts.load(),
            executor::RemoteCommandRequest::kNoTimeout,
            RemoteCommandRetryScheduler::kAllRetriableErrors));
    auto scheduleStatus = _establishCollectionCursorsScheduler->startup();
    LOG(1) << "Attempting to establish cursors with maxNumClonerCursors: " << _maxNumClonerCursors;

    if (!scheduleStatus.isOK()) {
        _establishCollectionCursorsScheduler.reset();
        _finishCallback(scheduleStatus);
        return;
    }
}
Пример #7
0
void AbstractOplogFetcher::_callback(const Fetcher::QueryResponseStatus& result,
                                     BSONObjBuilder* getMoreBob) {
    Status responseStatus =
        _checkForShutdownAndConvertStatus(result.getStatus(), "error in fetcher batch callback");
    if (ErrorCodes::CallbackCanceled == responseStatus) {
        LOG(1) << _getComponentName() << " oplog query cancelled to " << _getSource() << ": "
               << redact(responseStatus);
        _finishCallback(responseStatus);
        return;
    }

    // If target cut connections between connecting and querying (for
    // example, because it stepped down) we might not have a cursor.
    if (!responseStatus.isOK()) {
        BSONObj findCommandObj =
            _makeFindCommandObject(_nss, _getLastOpTimeWithHashFetched().opTime);
        BSONObj metadataObj = _makeMetadataObject();
        {
            stdx::lock_guard<stdx::mutex> lock(_mutex);
            if (_fetcherRestarts == _maxFetcherRestarts) {
                log() << "Error returned from oplog query (no more query restarts left): "
                      << redact(responseStatus);
            } else {
                log() << "Restarting oplog query due to error: " << redact(responseStatus)
                      << ". Last fetched optime (with hash): " << _lastFetched
                      << ". Restarts remaining: " << (_maxFetcherRestarts - _fetcherRestarts);
                _fetcherRestarts++;
                // Destroying current instance in _shuttingDownFetcher will possibly block.
                _shuttingDownFetcher.reset();
                // Move the old fetcher into the shutting down instance.
                _shuttingDownFetcher.swap(_fetcher);
                // Create and start fetcher with current term and new starting optime.
                _fetcher = _makeFetcher(findCommandObj, metadataObj);
                auto scheduleStatus = _scheduleFetcher_inlock();
                if (scheduleStatus.isOK()) {
                    log() << "Scheduled new oplog query " << _fetcher->toString();
                    return;
                }
                error() << "Error scheduling new oplog query: " << redact(scheduleStatus)
                        << ". Returning current oplog query error: " << redact(responseStatus);
            }
        }
        _finishCallback(responseStatus);
        return;
    }

    // Reset fetcher restart counter on successful response.
    {
        stdx::lock_guard<stdx::mutex> lock(_mutex);
        invariant(_isActive_inlock());
        _fetcherRestarts = 0;
    }

    if (_isShuttingDown()) {
        _finishCallback(
            Status(ErrorCodes::CallbackCanceled, _getComponentName() + " shutting down"));
        return;
    }

    // At this point we have a successful batch and can call the subclass's _onSuccessfulBatch.
    const auto& queryResponse = result.getValue();
    auto batchResult = _onSuccessfulBatch(queryResponse);
    if (!batchResult.isOK()) {
        // The stopReplProducer fail point expects this to return successfully. If another fail
        // point wants this to return unsuccessfully, it should use a different error code.
        if (batchResult.getStatus() == ErrorCodes::FailPointEnabled) {
            _finishCallback(Status::OK());
            return;
        }
        _finishCallback(batchResult.getStatus());
        return;
    }

    // No more data. Stop processing and return Status::OK.
    if (!getMoreBob) {
        _finishCallback(Status::OK());
        return;
    }

    // We have now processed the batch and should move forward our view of _lastFetched. Note that
    // the _lastFetched value will not be updated until the _onSuccessfulBatch function is
    // completed.
    const auto& documents = queryResponse.documents;
    if (documents.size() > 0) {
        auto lastDocRes = AbstractOplogFetcher::parseOpTimeWithHash(documents.back());
        if (!lastDocRes.isOK()) {
            _finishCallback(lastDocRes.getStatus());
            return;
        }
        auto lastDoc = lastDocRes.getValue();
        LOG(3) << _getComponentName()
               << " setting last fetched optime ahead after batch: " << lastDoc.opTime
               << "; hash: " << lastDoc.value;

        stdx::lock_guard<stdx::mutex> lock(_mutex);
        _lastFetched = lastDoc;
    }

    // Check for shutdown to save an unnecessary `getMore` request.
    if (_isShuttingDown()) {
        _finishCallback(
            Status(ErrorCodes::CallbackCanceled, _getComponentName() + " shutting down"));
        return;
    }

    // The _onSuccessfulBatch function returns the `getMore` command we want to send.
    getMoreBob->appendElements(batchResult.getValue());
}