void SyncSourceResolver::_requiredOpTimeFetcherCallback( const StatusWith<Fetcher::QueryResponse>& queryResult, HostAndPort candidate, OpTime earliestOpTimeSeen, int rbid) { if (_isShuttingDown()) { _finishCallback(Status(ErrorCodes::CallbackCanceled, str::stream() << "sync source resolver shut down while looking for " "required optime " << _requiredOpTime.toString() << " in candidate's oplog: " << candidate)) .transitional_ignore(); return; } if (ErrorCodes::CallbackCanceled == queryResult.getStatus()) { _finishCallback(queryResult.getStatus()).transitional_ignore(); return; } if (!queryResult.isOK()) { // We got an error. const auto until = _taskExecutor->now() + kFetcherErrorBlacklistDuration; log() << "Blacklisting " << candidate << " due to required optime fetcher error: '" << queryResult.getStatus() << "' for " << kFetcherErrorBlacklistDuration << " until: " << until << ". required optime: " << _requiredOpTime; _syncSourceSelector->blacklistSyncSource(candidate, until); _chooseAndProbeNextSyncSource(earliestOpTimeSeen).transitional_ignore(); return; } const auto& queryResponse = queryResult.getValue(); auto status = _compareRequiredOpTimeWithQueryResponse(queryResponse); if (!status.isOK()) { const auto until = _taskExecutor->now() + kNoRequiredOpTimeBlacklistDuration; warning() << "We cannot use " << candidate.toString() << " as a sync source because it does not contain the necessary " "operations for us to reach a consistent state: " << status << " last fetched optime: " << _lastOpTimeFetched << ". required optime: " << _requiredOpTime << ". Blacklisting this sync source for " << kNoRequiredOpTimeBlacklistDuration << " until: " << until; _syncSourceSelector->blacklistSyncSource(candidate, until); _chooseAndProbeNextSyncSource(earliestOpTimeSeen).transitional_ignore(); return; } _finishCallback(candidate, rbid).ignore(); }
void CollectionCloner::_insertDocumentsCallback( const executor::TaskExecutor::CallbackArgs& cbd, bool lastBatch, std::shared_ptr<OnCompletionGuard> onCompletionGuard) { if (!cbd.status.isOK()) { stdx::lock_guard<stdx::mutex> lock(_mutex); onCompletionGuard->setResultAndCancelRemainingWork_inlock(lock, cbd.status); return; } UniqueLock lk(_mutex); std::vector<BSONObj> docs; if (_documentsToInsert.size() == 0) { warning() << "_insertDocumentsCallback, but no documents to insert for ns:" << _destNss; if (lastBatch) { onCompletionGuard->setResultAndCancelRemainingWork_inlock(lk, Status::OK()); } return; } _documentsToInsert.swap(docs); _stats.documentsCopied += docs.size(); ++_stats.fetchBatches; _progressMeter.hit(int(docs.size())); invariant(_collLoader); const auto status = _collLoader->insertDocuments(docs.cbegin(), docs.cend()); if (!status.isOK()) { onCompletionGuard->setResultAndCancelRemainingWork_inlock(lk, status); return; } MONGO_FAIL_POINT_BLOCK(initialSyncHangDuringCollectionClone, options) { const BSONObj& data = options.getData(); if (data["namespace"].String() == _destNss.ns() && static_cast<int>(_stats.documentsCopied) >= data["numDocsToClone"].numberInt()) { lk.unlock(); log() << "initial sync - initialSyncHangDuringCollectionClone fail point " "enabled. Blocking until fail point is disabled."; while (MONGO_FAIL_POINT(initialSyncHangDuringCollectionClone) && !_isShuttingDown()) { mongo::sleepsecs(1); } lk.lock(); } } if (lastBatch) { // Clean up resources once the last batch has been copied over and set the status to OK. onCompletionGuard->setResultAndCancelRemainingWork_inlock(lk, Status::OK()); } }
StatusWith<HostAndPort> SyncSourceResolver::_chooseNewSyncSource() { HostAndPort candidate; try { candidate = _syncSourceSelector->chooseNewSyncSource(_lastOpTimeFetched); } catch (...) { return exceptionToStatus(); } if (_isShuttingDown()) { return Status(ErrorCodes::CallbackCanceled, str::stream() << "sync source resolver shut down before probing candidate: " << candidate); } return candidate; }
void SyncSourceResolver::_firstOplogEntryFetcherCallback( const StatusWith<Fetcher::QueryResponse>& queryResult, HostAndPort candidate, OpTime earliestOpTimeSeen) { if (_isShuttingDown()) { _finishCallback(Status(ErrorCodes::CallbackCanceled, str::stream() << "sync source resolver shut down while probing candidate: " << candidate)); return; } if (ErrorCodes::CallbackCanceled == queryResult.getStatus()) { _finishCallback(queryResult.getStatus()); return; } if (!queryResult.isOK()) { // We got an error. const auto until = _taskExecutor->now() + kFetcherErrorBlacklistDuration; log() << "Blacklisting " << candidate << " due to error: '" << queryResult.getStatus() << "' for " << kFetcherErrorBlacklistDuration << " until: " << until; _syncSourceSelector->blacklistSyncSource(candidate, until); _chooseAndProbeNextSyncSource(earliestOpTimeSeen); return; } const auto& queryResponse = queryResult.getValue(); const auto remoteEarliestOpTime = _parseRemoteEarliestOpTime(candidate, queryResponse); if (remoteEarliestOpTime.isNull()) { _chooseAndProbeNextSyncSource(earliestOpTimeSeen); return; } // remoteEarliestOpTime may come from a very old config, so we cannot compare their terms. if (_lastOpTimeFetched.getTimestamp() < remoteEarliestOpTime.getTimestamp()) { // We're too stale to use this sync source. const auto blacklistDuration = kTooStaleBlacklistDuration; const auto until = _taskExecutor->now() + Minutes(1); log() << "We are too stale to use " << candidate << " as a sync source. " << "Blacklisting this sync source" << " because our last fetched timestamp: " << _lastOpTimeFetched.getTimestamp() << " is before their earliest timestamp: " << remoteEarliestOpTime.getTimestamp() << " for " << blacklistDuration << " until: " << until; _syncSourceSelector->blacklistSyncSource(candidate, until); // If all the viable sync sources are too far ahead of us (i.e. we are "too stale" relative // each sync source), we will want to return the starting timestamp of the sync source // candidate that is closest to us. See SyncSourceResolverResponse::earliestOpTimeSeen. // We use "earliestOpTimeSeen" to keep track of the current minimum starting timestamp. if (earliestOpTimeSeen.isNull() || earliestOpTimeSeen.getTimestamp() > remoteEarliestOpTime.getTimestamp()) { earliestOpTimeSeen = remoteEarliestOpTime; } _chooseAndProbeNextSyncSource(earliestOpTimeSeen); return; } // Schedules fetcher to look for '_requiredOpTime' in the remote oplog. if (!_requiredOpTime.isNull()) { auto status = _scheduleFetcher(_makeRequiredOpTimeFetcher(candidate, earliestOpTimeSeen)); if (!status.isOK()) { _finishCallback(status); } return; } _finishCallback(candidate); }
void CollectionCloner::_handleARMResultsCallback( const executor::TaskExecutor::CallbackArgs& cbd, std::shared_ptr<OnCompletionGuard> onCompletionGuard) { auto setResultAndCancelRemainingWork = [this](std::shared_ptr<OnCompletionGuard> guard, Status status) { stdx::lock_guard<stdx::mutex> lock(_mutex); guard->setResultAndCancelRemainingWork_inlock(lock, status); return; }; if (!cbd.status.isOK()) { // Wait for active inserts to complete. waitForDbWorker(); Status newStatus = cbd.status.withContext(str::stream() << "Error querying collection '" << _sourceNss.ns()); setResultAndCancelRemainingWork(onCompletionGuard, cbd.status); return; } // Pull the documents from the ARM into a buffer until the entire batch has been processed. bool lastBatch; { UniqueLock lk(_mutex); auto nextBatchStatus = _bufferNextBatchFromArm(lk); if (!nextBatchStatus.isOK()) { if (_options.uuid && (nextBatchStatus.code() == ErrorCodes::OperationFailed || nextBatchStatus.code() == ErrorCodes::CursorNotFound)) { // With these errors, it's possible the collection was dropped while we were // cloning. If so, we'll execute the drop during oplog application, so it's OK to // just stop cloning. This is only safe if cloning by UUID; if we are cloning by // name, we have no way to detect if the collection was dropped and another // collection with the same name created in the interim. _verifyCollectionWasDropped(lk, nextBatchStatus, onCompletionGuard, cbd.opCtx); } else { onCompletionGuard->setResultAndCancelRemainingWork_inlock(lk, nextBatchStatus); } return; } // Check if this is the last batch of documents to clone. lastBatch = _arm->remotesExhausted(); } // Schedule the next document batch insertion. auto&& scheduleResult = _scheduleDbWorkFn([=](const executor::TaskExecutor::CallbackArgs& cbd) { _insertDocumentsCallback(cbd, lastBatch, onCompletionGuard); }); if (!scheduleResult.isOK()) { Status newStatus = scheduleResult.getStatus().withContext( str::stream() << "Error cloning collection '" << _sourceNss.ns() << "'"); setResultAndCancelRemainingWork(onCompletionGuard, scheduleResult.getStatus()); return; } MONGO_FAIL_POINT_BLOCK(initialSyncHangCollectionClonerAfterHandlingBatchResponse, nssData) { const BSONObj& data = nssData.getData(); auto nss = data["nss"].str(); // Only hang when cloning the specified collection, or if no collection was specified. if (nss.empty() || _destNss.toString() == nss) { while (MONGO_FAIL_POINT(initialSyncHangCollectionClonerAfterHandlingBatchResponse) && !_isShuttingDown()) { log() << "initialSyncHangCollectionClonerAfterHandlingBatchResponse fail point " "enabled for " << _destNss.toString() << ". Blocking until fail point is disabled."; mongo::sleepsecs(1); } } } // If the remote cursors are not exhausted, schedule this callback again to handle // the impending cursor response. if (!lastBatch) { Status scheduleStatus = _scheduleNextARMResultsCallback(onCompletionGuard); if (!scheduleStatus.isOK()) { setResultAndCancelRemainingWork(onCompletionGuard, scheduleStatus); return; } } }
void CollectionCloner::_beginCollectionCallback(const executor::TaskExecutor::CallbackArgs& cbd) { if (!cbd.status.isOK()) { _finishCallback(cbd.status); return; } MONGO_FAIL_POINT_BLOCK(initialSyncHangCollectionClonerBeforeEstablishingCursor, nssData) { const BSONObj& data = nssData.getData(); auto nss = data["nss"].str(); // Only hang when cloning the specified collection, or if no collection was specified. if (nss.empty() || _destNss.toString() == nss) { while (MONGO_FAIL_POINT(initialSyncHangCollectionClonerBeforeEstablishingCursor) && !_isShuttingDown()) { log() << "initialSyncHangCollectionClonerBeforeEstablishingCursor fail point " "enabled for " << _destNss.toString() << ". Blocking until fail point is disabled."; mongo::sleepsecs(1); } } } if (!_idIndexSpec.isEmpty() && _options.autoIndexId == CollectionOptions::NO) { warning() << "Found the _id_ index spec but the collection specified autoIndexId of false on ns:" << this->_sourceNss; } auto collectionBulkLoader = _storageInterface->createCollectionForBulkLoading( _destNss, _options, _idIndexSpec, _indexSpecs); if (!collectionBulkLoader.isOK()) { _finishCallback(collectionBulkLoader.getStatus()); return; } _stats.indexes = _indexSpecs.size(); if (!_idIndexSpec.isEmpty()) { ++_stats.indexes; } _collLoader = std::move(collectionBulkLoader.getValue()); BSONObjBuilder cmdObj; EstablishCursorsCommand cursorCommand; // The 'find' command is used when the number of cloning cursors is 1 to ensure // the correctness of the collection cloning process until 'parallelCollectionScan' // can be tested more extensively in context of initial sync. if (_maxNumClonerCursors == 1) { cmdObj.appendElements( makeCommandWithUUIDorCollectionName("find", _options.uuid, _sourceNss)); cmdObj.append("noCursorTimeout", true); // Set batchSize to be 0 to establish the cursor without fetching any documents, // similar to the response format of 'parallelCollectionScan'. cmdObj.append("batchSize", 0); cursorCommand = Find; } else { cmdObj.appendElements(makeCommandWithUUIDorCollectionName( "parallelCollectionScan", _options.uuid, _sourceNss)); cmdObj.append("numCursors", _maxNumClonerCursors); cursorCommand = ParallelCollScan; } Client::initThreadIfNotAlready(); auto opCtx = cc().getOperationContext(); MONGO_FAIL_POINT_BLOCK(initialSyncHangBeforeCollectionClone, options) { const BSONObj& data = options.getData(); if (data["namespace"].String() == _destNss.ns()) { log() << "initial sync - initialSyncHangBeforeCollectionClone fail point " "enabled. Blocking until fail point is disabled."; while (MONGO_FAIL_POINT(initialSyncHangBeforeCollectionClone) && !_isShuttingDown()) { mongo::sleepsecs(1); } } } _establishCollectionCursorsScheduler = stdx::make_unique<RemoteCommandRetryScheduler>( _executor, RemoteCommandRequest(_source, _sourceNss.db().toString(), cmdObj.obj(), ReadPreferenceSetting::secondaryPreferredMetadata(), opCtx, RemoteCommandRequest::kNoTimeout), [=](const RemoteCommandCallbackArgs& rcbd) { _establishCollectionCursorsCallback(rcbd, cursorCommand); }, RemoteCommandRetryScheduler::makeRetryPolicy( numInitialSyncCollectionFindAttempts.load(), executor::RemoteCommandRequest::kNoTimeout, RemoteCommandRetryScheduler::kAllRetriableErrors)); auto scheduleStatus = _establishCollectionCursorsScheduler->startup(); LOG(1) << "Attempting to establish cursors with maxNumClonerCursors: " << _maxNumClonerCursors; if (!scheduleStatus.isOK()) { _establishCollectionCursorsScheduler.reset(); _finishCallback(scheduleStatus); return; } }
void AbstractOplogFetcher::_callback(const Fetcher::QueryResponseStatus& result, BSONObjBuilder* getMoreBob) { Status responseStatus = _checkForShutdownAndConvertStatus(result.getStatus(), "error in fetcher batch callback"); if (ErrorCodes::CallbackCanceled == responseStatus) { LOG(1) << _getComponentName() << " oplog query cancelled to " << _getSource() << ": " << redact(responseStatus); _finishCallback(responseStatus); return; } // If target cut connections between connecting and querying (for // example, because it stepped down) we might not have a cursor. if (!responseStatus.isOK()) { BSONObj findCommandObj = _makeFindCommandObject(_nss, _getLastOpTimeWithHashFetched().opTime); BSONObj metadataObj = _makeMetadataObject(); { stdx::lock_guard<stdx::mutex> lock(_mutex); if (_fetcherRestarts == _maxFetcherRestarts) { log() << "Error returned from oplog query (no more query restarts left): " << redact(responseStatus); } else { log() << "Restarting oplog query due to error: " << redact(responseStatus) << ". Last fetched optime (with hash): " << _lastFetched << ". Restarts remaining: " << (_maxFetcherRestarts - _fetcherRestarts); _fetcherRestarts++; // Destroying current instance in _shuttingDownFetcher will possibly block. _shuttingDownFetcher.reset(); // Move the old fetcher into the shutting down instance. _shuttingDownFetcher.swap(_fetcher); // Create and start fetcher with current term and new starting optime. _fetcher = _makeFetcher(findCommandObj, metadataObj); auto scheduleStatus = _scheduleFetcher_inlock(); if (scheduleStatus.isOK()) { log() << "Scheduled new oplog query " << _fetcher->toString(); return; } error() << "Error scheduling new oplog query: " << redact(scheduleStatus) << ". Returning current oplog query error: " << redact(responseStatus); } } _finishCallback(responseStatus); return; } // Reset fetcher restart counter on successful response. { stdx::lock_guard<stdx::mutex> lock(_mutex); invariant(_isActive_inlock()); _fetcherRestarts = 0; } if (_isShuttingDown()) { _finishCallback( Status(ErrorCodes::CallbackCanceled, _getComponentName() + " shutting down")); return; } // At this point we have a successful batch and can call the subclass's _onSuccessfulBatch. const auto& queryResponse = result.getValue(); auto batchResult = _onSuccessfulBatch(queryResponse); if (!batchResult.isOK()) { // The stopReplProducer fail point expects this to return successfully. If another fail // point wants this to return unsuccessfully, it should use a different error code. if (batchResult.getStatus() == ErrorCodes::FailPointEnabled) { _finishCallback(Status::OK()); return; } _finishCallback(batchResult.getStatus()); return; } // No more data. Stop processing and return Status::OK. if (!getMoreBob) { _finishCallback(Status::OK()); return; } // We have now processed the batch and should move forward our view of _lastFetched. Note that // the _lastFetched value will not be updated until the _onSuccessfulBatch function is // completed. const auto& documents = queryResponse.documents; if (documents.size() > 0) { auto lastDocRes = AbstractOplogFetcher::parseOpTimeWithHash(documents.back()); if (!lastDocRes.isOK()) { _finishCallback(lastDocRes.getStatus()); return; } auto lastDoc = lastDocRes.getValue(); LOG(3) << _getComponentName() << " setting last fetched optime ahead after batch: " << lastDoc.opTime << "; hash: " << lastDoc.value; stdx::lock_guard<stdx::mutex> lock(_mutex); _lastFetched = lastDoc; } // Check for shutdown to save an unnecessary `getMore` request. if (_isShuttingDown()) { _finishCallback( Status(ErrorCodes::CallbackCanceled, _getComponentName() + " shutting down")); return; } // The _onSuccessfulBatch function returns the `getMore` command we want to send. getMoreBob->appendElements(batchResult.getValue()); }