Status SyncSourceResolver::_chooseAndProbeNextSyncSource(OpTime earliestOpTimeSeen) { auto candidateResult = _chooseNewSyncSource(); if (!candidateResult.isOK()) { return _finishCallback(candidateResult); } if (candidateResult.getValue().empty()) { if (earliestOpTimeSeen.isNull()) { return _finishCallback(candidateResult); } SyncSourceResolverResponse response; response.syncSourceStatus = {ErrorCodes::OplogStartMissing, "too stale to catch up"}; response.earliestOpTimeSeen = earliestOpTimeSeen; return _finishCallback(response); } auto status = _scheduleFetcher( _makeFirstOplogEntryFetcher(candidateResult.getValue(), earliestOpTimeSeen)); if (!status.isOK()) { return _finishCallback(status); } return Status::OK(); }
void MultiApplier::_callback(const executor::TaskExecutor::CallbackArgs& cbd) { if (!cbd.status.isOK()) { _finishCallback(cbd.status, _operations); return; } invariant(!_operations.empty()); StatusWith<OpTime> applyStatus(ErrorCodes::InternalError, "not mutated"); try { auto txn = cc().makeOperationContext(); // Refer to multiSyncApply() and multiInitialSyncApply() in sync_tail.cpp. txn->setReplicatedWrites(false); // allow us to get through the magic barrier txn->lockState()->setIsBatchWriter(true); applyStatus = _multiApply(txn.get(), _operations, _applyOperation); } catch (...) { applyStatus = exceptionToStatus(); } if (!applyStatus.isOK()) { _finishCallback(applyStatus.getStatus(), _operations); return; } _finishCallback(applyStatus.getValue().getTimestamp(), _operations); }
void Applier::_callback(const ReplicationExecutor::CallbackData& cbd) { if (!cbd.status.isOK()) { _finishCallback(cbd.status, _operations); return; } invariant(cbd.txn); // Refer to multiSyncApply() and multiInitialSyncApply() in sync_tail.cpp. cbd.txn->setReplicatedWrites(false); // allow us to get through the magic barrier cbd.txn->lockState()->setIsBatchWriter(true); Status applyStatus(ErrorCodes::InternalError, "not mutated"); invariant(!_operations.empty()); for (auto i = _operations.cbegin(); i != _operations.cend(); ++i) { try { applyStatus = _applyOperation(cbd.txn, *i); } catch (...) { applyStatus = exceptionToStatus(); } if (!applyStatus.isOK()) { // 'i' points to last operation that was not applied. _finishCallback(applyStatus, Operations(i, _operations.cend())); return; } } _finishCallback(_operations.back().getField("ts").timestamp(), Operations()); }
void CollectionCloner::_findCallback(const StatusWith<Fetcher::QueryResponse>& fetchResult, Fetcher::NextAction* nextAction, BSONObjBuilder* getMoreBob) { if (!fetchResult.isOK()) { _finishCallback(nullptr, fetchResult.getStatus()); return; } auto batchData(fetchResult.getValue()); _documents = batchData.documents; bool lastBatch = *nextAction == Fetcher::NextAction::kNoAction; auto&& scheduleResult = _scheduleDbWorkFn(stdx::bind( &CollectionCloner::_insertDocumentsCallback, this, stdx::placeholders::_1, lastBatch)); if (!scheduleResult.isOK()) { _finishCallback(nullptr, scheduleResult.getStatus()); return; } if (*nextAction == Fetcher::NextAction::kGetMore) { invariant(getMoreBob); getMoreBob->append("getMore", batchData.cursorId); getMoreBob->append("collection", batchData.nss.coll()); } _dbWorkCallbackHandle = scheduleResult.getValue(); }
// TODO change the passed in function to be multiapply instead of apply inlock void MultiApplier::_callback(const ReplicationExecutor::CallbackArgs& cbd) { if (!cbd.status.isOK()) { _finishCallback(cbd.status, _operations); return; } invariant(cbd.txn); // Refer to multiSyncApply() and multiInitialSyncApply() in sync_tail.cpp. cbd.txn->setReplicatedWrites(false); // allow us to get through the magic barrier cbd.txn->lockState()->setIsBatchWriter(true); StatusWith<OpTime> applyStatus(ErrorCodes::InternalError, "not mutated"); invariant(!_operations.empty()); try { // TODO restructure to support moving _operations into this call. Can't do it today since // _finishCallback gets _operations on failure. applyStatus = _multiApply(cbd.txn, _operations, _applyOperation); } catch (...) { applyStatus = exceptionToStatus(); } if (!applyStatus.isOK()) { _finishCallback(applyStatus.getStatus(), _operations); return; } _finishCallback(applyStatus.getValue().getTimestamp(), Operations()); }
void SyncSourceResolver::_requiredOpTimeFetcherCallback( const StatusWith<Fetcher::QueryResponse>& queryResult, HostAndPort candidate, OpTime earliestOpTimeSeen, int rbid) { if (_isShuttingDown()) { _finishCallback(Status(ErrorCodes::CallbackCanceled, str::stream() << "sync source resolver shut down while looking for " "required optime " << _requiredOpTime.toString() << " in candidate's oplog: " << candidate)) .transitional_ignore(); return; } if (ErrorCodes::CallbackCanceled == queryResult.getStatus()) { _finishCallback(queryResult.getStatus()).transitional_ignore(); return; } if (!queryResult.isOK()) { // We got an error. const auto until = _taskExecutor->now() + kFetcherErrorBlacklistDuration; log() << "Blacklisting " << candidate << " due to required optime fetcher error: '" << queryResult.getStatus() << "' for " << kFetcherErrorBlacklistDuration << " until: " << until << ". required optime: " << _requiredOpTime; _syncSourceSelector->blacklistSyncSource(candidate, until); _chooseAndProbeNextSyncSource(earliestOpTimeSeen).transitional_ignore(); return; } const auto& queryResponse = queryResult.getValue(); auto status = _compareRequiredOpTimeWithQueryResponse(queryResponse); if (!status.isOK()) { const auto until = _taskExecutor->now() + kNoRequiredOpTimeBlacklistDuration; warning() << "We cannot use " << candidate.toString() << " as a sync source because it does not contain the necessary " "operations for us to reach a consistent state: " << status << " last fetched optime: " << _lastOpTimeFetched << ". required optime: " << _requiredOpTime << ". Blacklisting this sync source for " << kNoRequiredOpTimeBlacklistDuration << " until: " << until; _syncSourceSelector->blacklistSyncSource(candidate, until); _chooseAndProbeNextSyncSource(earliestOpTimeSeen).transitional_ignore(); return; } _finishCallback(candidate, rbid).ignore(); }
Status SyncSourceResolver::_finishCallback(HostAndPort hostAndPort, int rbid) { SyncSourceResolverResponse response; response.syncSourceStatus = std::move(hostAndPort); if (rbid != ReplicationProcess::kUninitializedRollbackId) { response.rbid = rbid; } return _finishCallback(response); }
void CollectionCloner::_beginCollectionCallback(const ReplicationExecutor::CallbackArgs& cbd) { OperationContext* txn = cbd.txn; if (!cbd.status.isOK()) { _finishCallback(txn, cbd.status); return; } Status status = _storageInterface->beginCollection(txn, _destNss, _options, _indexSpecs); if (!status.isOK()) { _finishCallback(txn, status); return; } Status scheduleStatus = _findFetcher.schedule(); if (!scheduleStatus.isOK()) { _finishCallback(txn, scheduleStatus); return; } }
void CollectionCloner::_findCallback(const StatusWith<Fetcher::QueryResponse>& fetchResult, Fetcher::NextAction* nextAction, BSONObjBuilder* getMoreBob) { if (!fetchResult.isOK()) { Status newStatus{fetchResult.getStatus().code(), str::stream() << "While querying collection '" << _sourceNss.ns() << "' there was an error '" << fetchResult.getStatus().reason() << "'"}; // TODO: cancel active inserts? _finishCallback(newStatus); return; } auto batchData(fetchResult.getValue()); bool lastBatch = *nextAction == Fetcher::NextAction::kNoAction; if (batchData.documents.size() > 0) { LockGuard lk(_mutex); _documents.insert(_documents.end(), batchData.documents.begin(), batchData.documents.end()); } else if (!batchData.first) { warning() << "No documents returned in batch; ns: " << _sourceNss << ", cursorId:" << batchData.cursorId << ", isLastBatch:" << lastBatch; } auto&& scheduleResult = _scheduleDbWorkFn(stdx::bind( &CollectionCloner::_insertDocumentsCallback, this, stdx::placeholders::_1, lastBatch)); if (!scheduleResult.isOK()) { Status newStatus{scheduleResult.getStatus().code(), str::stream() << "While cloning collection '" << _sourceNss.ns() << "' there was an error '" << scheduleResult.getStatus().reason() << "'"}; _finishCallback(newStatus); return; } if (!lastBatch) { invariant(getMoreBob); getMoreBob->append("getMore", batchData.cursorId); getMoreBob->append("collection", batchData.nss.coll()); } }
void CollectionCloner::_insertDocumentsCallback(const ReplicationExecutor::CallbackArgs& cbd, bool lastBatch) { OperationContext* txn = cbd.txn; if (!cbd.status.isOK()) { _finishCallback(txn, cbd.status); return; } Status status = _storageInterface->insertDocuments(txn, _destNss, _documents); if (!status.isOK()) { _finishCallback(txn, status); return; } if (!lastBatch) { return; } _finishCallback(txn, Status::OK()); }
void CollectionCloner::_insertDocumentsCallback(const executor::TaskExecutor::CallbackArgs& cbd, bool lastBatch) { if (!cbd.status.isOK()) { _finishCallback(cbd.status); return; } std::vector<BSONObj> docs; UniqueLock lk(_mutex); if (_documents.size() == 0) { warning() << "_insertDocumentsCallback, but no documents to insert for ns:" << _destNss; if (lastBatch) { lk.unlock(); _finishCallback(Status::OK()); } return; } _documents.swap(docs); _stats.documentsCopied += docs.size(); ++_stats.fetchBatches; _progressMeter.hit(int(docs.size())); invariant(_collLoader); const auto status = _collLoader->insertDocuments(docs.cbegin(), docs.cend()); lk.unlock(); if (!status.isOK()) { _finishCallback(status); return; } if (!lastBatch) { return; } // Done with last batch and time to call _finshCallback with Status::OK(). _finishCallback(Status::OK()); }
void AbstractOplogFetcher::_makeAndScheduleFetcherCallback( const executor::TaskExecutor::CallbackArgs& args) { Status responseStatus = _checkForShutdownAndConvertStatus(args, "error scheduling fetcher"); if (!responseStatus.isOK()) { _finishCallback(responseStatus); return; } BSONObj findCommandObj = _makeFindCommandObject(_nss, _getLastOpTimeWithHashFetched().opTime); BSONObj metadataObj = _makeMetadataObject(); Status scheduleStatus = Status::OK(); { stdx::lock_guard<stdx::mutex> lock(_mutex); _fetcher = _makeFetcher(findCommandObj, metadataObj); scheduleStatus = _scheduleFetcher_inlock(); } if (!scheduleStatus.isOK()) { _finishCallback(scheduleStatus); return; } }
void CollectionCloner::_listIndexesCallback(const Fetcher::QueryResponseStatus& fetchResult, Fetcher::NextAction* nextAction, BSONObjBuilder* getMoreBob) { if (!fetchResult.isOK()) { _finishCallback(nullptr, fetchResult.getStatus()); return; } auto batchData(fetchResult.getValue()); auto&& documents = batchData.documents; if (documents.empty()) { warning() << "No indexes found for collection " << _sourceNss.ns() << " while cloning from " << _source; } // We may be called with multiple batches leading to a need to grow _indexSpecs. _indexSpecs.reserve(_indexSpecs.size() + documents.size()); _indexSpecs.insert(_indexSpecs.end(), documents.begin(), documents.end()); // The fetcher will continue to call with kGetMore until an error or the last batch. if (*nextAction == Fetcher::NextAction::kGetMore) { invariant(getMoreBob); getMoreBob->append("getMore", batchData.cursorId); getMoreBob->append("collection", batchData.nss.coll()); return; } // We have all of the indexes now, so we can start cloning the collection data. auto&& scheduleResult = _scheduleDbWorkFn( stdx::bind(&CollectionCloner::_beginCollectionCallback, this, stdx::placeholders::_1)); if (!scheduleResult.isOK()) { _finishCallback(nullptr, scheduleResult.getStatus()); return; } _dbWorkCallbackHandle = scheduleResult.getValue(); }
void DatabaseCloner::_collectionClonerCallback(const Status& status, const NamespaceString& nss) { // Forward collection cloner result to caller. // Failure to clone a collection does not stop the database cloner // from cloning the rest of the collections in the listCollections result. _collectionWork(status, nss); _currentCollectionClonerIter++; LOG(1) << " cloning collection " << _currentCollectionClonerIter->getSourceNamespace(); if (_currentCollectionClonerIter != _collectionCloners.end()) { Status startStatus = _startCollectionCloner(*_currentCollectionClonerIter); if (!startStatus.isOK()) { LOG(1) << " failed to start collection cloning on " << _currentCollectionClonerIter->getSourceNamespace() << ": " << startStatus; _finishCallback(startStatus); return; } return; } _finishCallback(Status::OK()); }
void CollectionCloner::_beginCollectionCallback(const executor::TaskExecutor::CallbackArgs& cbd) { if (!cbd.status.isOK()) { _finishCallback(cbd.status); return; } UniqueLock lk(_mutex); if (!_idIndexSpec.isEmpty() && _options.autoIndexId == CollectionOptions::NO) { warning() << "Found the _id_ index spec but the collection specified autoIndexId of false on ns:" << this->_sourceNss; } auto status = _storageInterface->createCollectionForBulkLoading( _destNss, _options, _idIndexSpec, _indexSpecs); if (!status.isOK()) { lk.unlock(); _finishCallback(status.getStatus()); return; } _stats.indexes = _indexSpecs.size(); if (!_idIndexSpec.isEmpty()) { ++_stats.indexes; } _collLoader = std::move(status.getValue()); Status scheduleStatus = _findFetcher.schedule(); if (!scheduleStatus.isOK()) { lk.unlock(); _finishCallback(scheduleStatus); return; } }
void SyncSourceResolver::_rbidRequestCallback( HostAndPort candidate, OpTime earliestOpTimeSeen, const executor::TaskExecutor::RemoteCommandCallbackArgs& rbidReply) { if (rbidReply.response.status == ErrorCodes::CallbackCanceled) { _finishCallback(rbidReply.response.status).transitional_ignore(); return; } int rbid = ReplicationProcess::kUninitializedRollbackId; try { uassertStatusOK(rbidReply.response.status); uassertStatusOK(getStatusFromCommandResult(rbidReply.response.data)); rbid = rbidReply.response.data["rbid"].Int(); } catch (const DBException& ex) { const auto until = _taskExecutor->now() + kFetcherErrorBlacklistDuration; log() << "Blacklisting " << candidate << " due to error: '" << ex << "' for " << kFetcherErrorBlacklistDuration << " until: " << until; _syncSourceSelector->blacklistSyncSource(candidate, until); _chooseAndProbeNextSyncSource(earliestOpTimeSeen).transitional_ignore(); return; } if (!_requiredOpTime.isNull()) { // Schedule fetcher to look for '_requiredOpTime' in the remote oplog. // Unittest requires that this kind of failure be handled specially. auto status = _scheduleFetcher(_makeRequiredOpTimeFetcher(candidate, earliestOpTimeSeen, rbid)); if (!status.isOK()) { _finishCallback(status).transitional_ignore(); } return; } _finishCallback(candidate, rbid).ignore(); }
void CollectionCloner::_listIndexesCallback(const Fetcher::QueryResponseStatus& fetchResult, Fetcher::NextAction* nextAction, BSONObjBuilder* getMoreBob) { const bool collectionIsEmpty = fetchResult == ErrorCodes::NamespaceNotFound; if (collectionIsEmpty) { // Schedule collection creation and finish callback. auto&& scheduleResult = _scheduleDbWorkFn([this](const executor::TaskExecutor::CallbackArgs& cbd) { if (!cbd.status.isOK()) { _finishCallback(cbd.status); return; } auto txn = cbd.txn; txn->setReplicatedWrites(false); auto&& createStatus = _storageInterface->createCollection(txn, _destNss, _options); _finishCallback(createStatus); }); if (!scheduleResult.isOK()) { _finishCallback(scheduleResult.getStatus()); } return; }; if (!fetchResult.isOK()) { Status newStatus{fetchResult.getStatus().code(), str::stream() << "During listIndexes call on collection '" << _sourceNss.ns() << "' there was an error '" << fetchResult.getStatus().reason() << "'"}; _finishCallback(newStatus); return; } auto batchData(fetchResult.getValue()); auto&& documents = batchData.documents; if (documents.empty()) { warning() << "No indexes found for collection " << _sourceNss.ns() << " while cloning from " << _source; } UniqueLock lk(_mutex); // We may be called with multiple batches leading to a need to grow _indexSpecs. _indexSpecs.reserve(_indexSpecs.size() + documents.size()); for (auto&& doc : documents) { if (StringData("_id_") == doc["name"].str()) { _idIndexSpec = doc; continue; } _indexSpecs.push_back(doc); } lk.unlock(); // The fetcher will continue to call with kGetMore until an error or the last batch. if (*nextAction == Fetcher::NextAction::kGetMore) { invariant(getMoreBob); getMoreBob->append("getMore", batchData.cursorId); getMoreBob->append("collection", batchData.nss.coll()); return; } // We have all of the indexes now, so we can start cloning the collection data. auto&& scheduleResult = _scheduleDbWorkFn( stdx::bind(&CollectionCloner::_beginCollectionCallback, this, stdx::placeholders::_1)); if (!scheduleResult.isOK()) { _finishCallback(scheduleResult.getStatus()); return; } }
void CollectionCloner::_countCallback( const executor::TaskExecutor::RemoteCommandCallbackArgs& args) { // No need to reword status reason in the case of cancellation. if (ErrorCodes::CallbackCanceled == args.response.status) { _finishCallback(args.response.status); return; } if (!args.response.status.isOK()) { _finishCallback({args.response.status.code(), str::stream() << "During count call on collection '" << _sourceNss.ns() << "' from " << _source.toString() << ", there was an error '" << args.response.status.reason() << "'"}); return; } Status commandStatus = getStatusFromCommandResult(args.response.data); if (!commandStatus.isOK()) { _finishCallback({commandStatus.code(), str::stream() << "During count call on collection '" << _sourceNss.ns() << "' from " << _source.toString() << ", there was a command error '" << commandStatus.reason() << "'"}); return; } long long count = 0; auto countStatus = bsonExtractIntegerField(args.response.data, kCountResponseDocumentCountFieldName, &count); if (!countStatus.isOK()) { _finishCallback({countStatus.code(), str::stream() << "There was an error parsing document count from count " "command result on collection " << _sourceNss.ns() << " from " << _source.toString() << ": " << countStatus.reason()}); return; } if (count < 0) { _finishCallback({ErrorCodes::BadValue, str::stream() << "Count call on collection " << _sourceNss.ns() << " from " << _source.toString() << " returned negative document count: " << count}); return; } { LockGuard lk(_mutex); _stats.documentToCopy = count; _progressMeter.setTotalWhileRunning(static_cast<unsigned long long>(count)); } auto scheduleStatus = _listIndexesFetcher.schedule(); if (!scheduleStatus.isOK()) { _finishCallback(scheduleStatus); return; } }
void CollectionCloner::_establishCollectionCursorsCallback(const RemoteCommandCallbackArgs& rcbd, EstablishCursorsCommand cursorCommand) { if (_state == State::kShuttingDown) { Status shuttingDownStatus{ErrorCodes::CallbackCanceled, "Cloner shutting down."}; _finishCallback(shuttingDownStatus); return; } auto response = rcbd.response; if (!response.isOK()) { _finishCallback(response.status); return; } Status commandStatus = getStatusFromCommandResult(response.data); if (commandStatus == ErrorCodes::NamespaceNotFound) { _finishCallback(Status::OK()); return; } if (!commandStatus.isOK()) { _finishCallback(commandStatus.withContext( str::stream() << "Error querying collection '" << _sourceNss.ns() << "'")); return; } std::vector<CursorResponse> cursorResponses; Status parseResponseStatus = _parseCursorResponse(response.data, &cursorResponses, cursorCommand); if (!parseResponseStatus.isOK()) { _finishCallback(parseResponseStatus); return; } LOG(1) << "Collection cloner running with " << cursorResponses.size() << " cursors established."; // Initialize the 'AsyncResultsMerger'(ARM). std::vector<ClusterClientCursorParams::RemoteCursor> remoteCursors; for (auto&& cursorResponse : cursorResponses) { // A placeholder 'ShardId' is used until the ARM is made less sharding specific. remoteCursors.emplace_back( ShardId("CollectionClonerSyncSource"), _source, std::move(cursorResponse)); } // An empty list of authenticated users is passed into the cluster parameters // as user information is not used in the ARM in context of collection cloning. _clusterClientCursorParams = stdx::make_unique<ClusterClientCursorParams>(_sourceNss, UserNameIterator()); _clusterClientCursorParams->remotes = std::move(remoteCursors); if (_collectionCloningBatchSize > 0) _clusterClientCursorParams->batchSize = _collectionCloningBatchSize; Client::initThreadIfNotAlready(); _arm = stdx::make_unique<AsyncResultsMerger>( cc().getOperationContext(), _executor, _clusterClientCursorParams.get()); // This completion guard invokes _finishCallback on destruction. auto cancelRemainingWorkInLock = [this]() { _cancelRemainingWork_inlock(); }; auto finishCallbackFn = [this](const Status& status) { _finishCallback(status); }; auto onCompletionGuard = std::make_shared<OnCompletionGuard>(cancelRemainingWorkInLock, finishCallbackFn); // Lock guard must be declared after completion guard. If there is an error in this function // that will cause the destructor of the completion guard to run, the destructor must be run // outside the mutex. This is a necessary condition to invoke _finishCallback. stdx::lock_guard<stdx::mutex> lock(_mutex); Status scheduleStatus = _scheduleNextARMResultsCallback(onCompletionGuard); _arm->detachFromOperationContext(); if (!scheduleStatus.isOK()) { onCompletionGuard->setResultAndCancelRemainingWork_inlock(lock, scheduleStatus); return; } }
void CollectionCloner::_beginCollectionCallback(const executor::TaskExecutor::CallbackArgs& cbd) { if (!cbd.status.isOK()) { _finishCallback(cbd.status); return; } MONGO_FAIL_POINT_BLOCK(initialSyncHangCollectionClonerBeforeEstablishingCursor, nssData) { const BSONObj& data = nssData.getData(); auto nss = data["nss"].str(); // Only hang when cloning the specified collection, or if no collection was specified. if (nss.empty() || _destNss.toString() == nss) { while (MONGO_FAIL_POINT(initialSyncHangCollectionClonerBeforeEstablishingCursor) && !_isShuttingDown()) { log() << "initialSyncHangCollectionClonerBeforeEstablishingCursor fail point " "enabled for " << _destNss.toString() << ". Blocking until fail point is disabled."; mongo::sleepsecs(1); } } } if (!_idIndexSpec.isEmpty() && _options.autoIndexId == CollectionOptions::NO) { warning() << "Found the _id_ index spec but the collection specified autoIndexId of false on ns:" << this->_sourceNss; } auto collectionBulkLoader = _storageInterface->createCollectionForBulkLoading( _destNss, _options, _idIndexSpec, _indexSpecs); if (!collectionBulkLoader.isOK()) { _finishCallback(collectionBulkLoader.getStatus()); return; } _stats.indexes = _indexSpecs.size(); if (!_idIndexSpec.isEmpty()) { ++_stats.indexes; } _collLoader = std::move(collectionBulkLoader.getValue()); BSONObjBuilder cmdObj; EstablishCursorsCommand cursorCommand; // The 'find' command is used when the number of cloning cursors is 1 to ensure // the correctness of the collection cloning process until 'parallelCollectionScan' // can be tested more extensively in context of initial sync. if (_maxNumClonerCursors == 1) { cmdObj.appendElements( makeCommandWithUUIDorCollectionName("find", _options.uuid, _sourceNss)); cmdObj.append("noCursorTimeout", true); // Set batchSize to be 0 to establish the cursor without fetching any documents, // similar to the response format of 'parallelCollectionScan'. cmdObj.append("batchSize", 0); cursorCommand = Find; } else { cmdObj.appendElements(makeCommandWithUUIDorCollectionName( "parallelCollectionScan", _options.uuid, _sourceNss)); cmdObj.append("numCursors", _maxNumClonerCursors); cursorCommand = ParallelCollScan; } Client::initThreadIfNotAlready(); auto opCtx = cc().getOperationContext(); MONGO_FAIL_POINT_BLOCK(initialSyncHangBeforeCollectionClone, options) { const BSONObj& data = options.getData(); if (data["namespace"].String() == _destNss.ns()) { log() << "initial sync - initialSyncHangBeforeCollectionClone fail point " "enabled. Blocking until fail point is disabled."; while (MONGO_FAIL_POINT(initialSyncHangBeforeCollectionClone) && !_isShuttingDown()) { mongo::sleepsecs(1); } } } _establishCollectionCursorsScheduler = stdx::make_unique<RemoteCommandRetryScheduler>( _executor, RemoteCommandRequest(_source, _sourceNss.db().toString(), cmdObj.obj(), ReadPreferenceSetting::secondaryPreferredMetadata(), opCtx, RemoteCommandRequest::kNoTimeout), [=](const RemoteCommandCallbackArgs& rcbd) { _establishCollectionCursorsCallback(rcbd, cursorCommand); }, RemoteCommandRetryScheduler::makeRetryPolicy( numInitialSyncCollectionFindAttempts.load(), executor::RemoteCommandRequest::kNoTimeout, RemoteCommandRetryScheduler::kAllRetriableErrors)); auto scheduleStatus = _establishCollectionCursorsScheduler->startup(); LOG(1) << "Attempting to establish cursors with maxNumClonerCursors: " << _maxNumClonerCursors; if (!scheduleStatus.isOK()) { _establishCollectionCursorsScheduler.reset(); _finishCallback(scheduleStatus); return; } }
void AbstractOplogFetcher::_callback(const Fetcher::QueryResponseStatus& result, BSONObjBuilder* getMoreBob) { Status responseStatus = _checkForShutdownAndConvertStatus(result.getStatus(), "error in fetcher batch callback"); if (ErrorCodes::CallbackCanceled == responseStatus) { LOG(1) << _getComponentName() << " oplog query cancelled to " << _getSource() << ": " << redact(responseStatus); _finishCallback(responseStatus); return; } // If target cut connections between connecting and querying (for // example, because it stepped down) we might not have a cursor. if (!responseStatus.isOK()) { BSONObj findCommandObj = _makeFindCommandObject(_nss, _getLastOpTimeWithHashFetched().opTime); BSONObj metadataObj = _makeMetadataObject(); { stdx::lock_guard<stdx::mutex> lock(_mutex); if (_fetcherRestarts == _maxFetcherRestarts) { log() << "Error returned from oplog query (no more query restarts left): " << redact(responseStatus); } else { log() << "Restarting oplog query due to error: " << redact(responseStatus) << ". Last fetched optime (with hash): " << _lastFetched << ". Restarts remaining: " << (_maxFetcherRestarts - _fetcherRestarts); _fetcherRestarts++; // Destroying current instance in _shuttingDownFetcher will possibly block. _shuttingDownFetcher.reset(); // Move the old fetcher into the shutting down instance. _shuttingDownFetcher.swap(_fetcher); // Create and start fetcher with current term and new starting optime. _fetcher = _makeFetcher(findCommandObj, metadataObj); auto scheduleStatus = _scheduleFetcher_inlock(); if (scheduleStatus.isOK()) { log() << "Scheduled new oplog query " << _fetcher->toString(); return; } error() << "Error scheduling new oplog query: " << redact(scheduleStatus) << ". Returning current oplog query error: " << redact(responseStatus); } } _finishCallback(responseStatus); return; } // Reset fetcher restart counter on successful response. { stdx::lock_guard<stdx::mutex> lock(_mutex); invariant(_isActive_inlock()); _fetcherRestarts = 0; } if (_isShuttingDown()) { _finishCallback( Status(ErrorCodes::CallbackCanceled, _getComponentName() + " shutting down")); return; } // At this point we have a successful batch and can call the subclass's _onSuccessfulBatch. const auto& queryResponse = result.getValue(); auto batchResult = _onSuccessfulBatch(queryResponse); if (!batchResult.isOK()) { // The stopReplProducer fail point expects this to return successfully. If another fail // point wants this to return unsuccessfully, it should use a different error code. if (batchResult.getStatus() == ErrorCodes::FailPointEnabled) { _finishCallback(Status::OK()); return; } _finishCallback(batchResult.getStatus()); return; } // No more data. Stop processing and return Status::OK. if (!getMoreBob) { _finishCallback(Status::OK()); return; } // We have now processed the batch and should move forward our view of _lastFetched. Note that // the _lastFetched value will not be updated until the _onSuccessfulBatch function is // completed. const auto& documents = queryResponse.documents; if (documents.size() > 0) { auto lastDocRes = AbstractOplogFetcher::parseOpTimeWithHash(documents.back()); if (!lastDocRes.isOK()) { _finishCallback(lastDocRes.getStatus()); return; } auto lastDoc = lastDocRes.getValue(); LOG(3) << _getComponentName() << " setting last fetched optime ahead after batch: " << lastDoc.opTime << "; hash: " << lastDoc.value; stdx::lock_guard<stdx::mutex> lock(_mutex); _lastFetched = lastDoc; } // Check for shutdown to save an unnecessary `getMore` request. if (_isShuttingDown()) { _finishCallback( Status(ErrorCodes::CallbackCanceled, _getComponentName() + " shutting down")); return; } // The _onSuccessfulBatch function returns the `getMore` command we want to send. getMoreBob->appendElements(batchResult.getValue()); }
void DatabaseCloner::_finishCallback_inlock(UniqueLock& lk, const Status& status) { if (lk.owns_lock()) { lk.unlock(); } _finishCallback(status); }
void DatabaseCloner::_listCollectionsCallback(const StatusWith<Fetcher::QueryResponse>& result, Fetcher::NextAction* nextAction, BSONObjBuilder* getMoreBob) { if (!result.isOK()) { _finishCallback(result.getStatus()); return; } auto batchData(result.getValue()); auto&& documents = batchData.documents; // We may be called with multiple batches leading to a need to grow _collectionInfos. _collectionInfos.reserve(_collectionInfos.size() + documents.size()); std::copy_if(documents.begin(), documents.end(), std::back_inserter(_collectionInfos), _listCollectionsPredicate); // The fetcher will continue to call with kGetMore until an error or the last batch. if (*nextAction == Fetcher::NextAction::kGetMore) { invariant(getMoreBob); getMoreBob->append("getMore", batchData.cursorId); getMoreBob->append("collection", batchData.nss.coll()); return; } // Nothing to do for an empty database. if (_collectionInfos.empty()) { _finishCallback(Status::OK()); return; } _collectionNamespaces.reserve(_collectionInfos.size()); std::set<std::string> seen; for (auto&& info : _collectionInfos) { BSONElement nameElement = info.getField(kNameFieldName); if (nameElement.eoo()) { _finishCallback(Status(ErrorCodes::FailedToParse, str::stream() << "collection info must contain '" << kNameFieldName << "' " << "field : " << info)); return; } if (nameElement.type() != mongol::String) { _finishCallback(Status(ErrorCodes::TypeMismatch, str::stream() << "'" << kNameFieldName << "' field must be a string: " << info)); return; } const std::string collectionName = nameElement.String(); if (seen.find(collectionName) != seen.end()) { _finishCallback(Status(ErrorCodes::DuplicateKey, str::stream() << "collection info contains duplicate collection name " << "'" << collectionName << "': " << info)); return; } BSONElement optionsElement = info.getField(kOptionsFieldName); if (optionsElement.eoo()) { _finishCallback(Status(ErrorCodes::FailedToParse, str::stream() << "collection info must contain '" << kOptionsFieldName << "' " << "field : " << info)); return; } if (!optionsElement.isABSONObj()) { _finishCallback(Status(ErrorCodes::TypeMismatch, str::stream() << "'" << kOptionsFieldName << "' field must be an object: " << info)); return; } const BSONObj optionsObj = optionsElement.Obj(); CollectionOptions options; Status parseStatus = options.parse(optionsObj); if (!parseStatus.isOK()) { _finishCallback(parseStatus); return; } seen.insert(collectionName); _collectionNamespaces.emplace_back(_dbname, collectionName); auto&& nss = *_collectionNamespaces.crbegin(); try { _collectionCloners.emplace_back( _executor, _source, nss, options, stdx::bind( &DatabaseCloner::_collectionClonerCallback, this, stdx::placeholders::_1, nss), _storageInterface); } catch (const UserException& ex) { _finishCallback(ex.toStatus()); return; } } for (auto&& collectionCloner : _collectionCloners) { collectionCloner.setScheduleDbWorkFn(_scheduleDbWorkFn); } // Start first collection cloner. _currentCollectionClonerIter = _collectionCloners.begin(); LOG(1) << " cloning collection " << _currentCollectionClonerIter->getSourceNamespace(); Status startStatus = _startCollectionCloner(*_currentCollectionClonerIter); if (!startStatus.isOK()) { LOG(1) << " failed to start collection cloning on " << _currentCollectionClonerIter->getSourceNamespace() << ": " << startStatus; _finishCallback(startStatus); return; } }
Status SyncSourceResolver::_finishCallback(Status status) { invariant(!status.isOK()); SyncSourceResolverResponse response; response.syncSourceStatus = std::move(status); return _finishCallback(response); }
void SyncSourceResolver::_firstOplogEntryFetcherCallback( const StatusWith<Fetcher::QueryResponse>& queryResult, HostAndPort candidate, OpTime earliestOpTimeSeen) { if (_isShuttingDown()) { _finishCallback(Status(ErrorCodes::CallbackCanceled, str::stream() << "sync source resolver shut down while probing candidate: " << candidate)); return; } if (ErrorCodes::CallbackCanceled == queryResult.getStatus()) { _finishCallback(queryResult.getStatus()); return; } if (!queryResult.isOK()) { // We got an error. const auto until = _taskExecutor->now() + kFetcherErrorBlacklistDuration; log() << "Blacklisting " << candidate << " due to error: '" << queryResult.getStatus() << "' for " << kFetcherErrorBlacklistDuration << " until: " << until; _syncSourceSelector->blacklistSyncSource(candidate, until); _chooseAndProbeNextSyncSource(earliestOpTimeSeen); return; } const auto& queryResponse = queryResult.getValue(); const auto remoteEarliestOpTime = _parseRemoteEarliestOpTime(candidate, queryResponse); if (remoteEarliestOpTime.isNull()) { _chooseAndProbeNextSyncSource(earliestOpTimeSeen); return; } // remoteEarliestOpTime may come from a very old config, so we cannot compare their terms. if (_lastOpTimeFetched.getTimestamp() < remoteEarliestOpTime.getTimestamp()) { // We're too stale to use this sync source. const auto blacklistDuration = kTooStaleBlacklistDuration; const auto until = _taskExecutor->now() + Minutes(1); log() << "We are too stale to use " << candidate << " as a sync source. " << "Blacklisting this sync source" << " because our last fetched timestamp: " << _lastOpTimeFetched.getTimestamp() << " is before their earliest timestamp: " << remoteEarliestOpTime.getTimestamp() << " for " << blacklistDuration << " until: " << until; _syncSourceSelector->blacklistSyncSource(candidate, until); // If all the viable sync sources are too far ahead of us (i.e. we are "too stale" relative // each sync source), we will want to return the starting timestamp of the sync source // candidate that is closest to us. See SyncSourceResolverResponse::earliestOpTimeSeen. // We use "earliestOpTimeSeen" to keep track of the current minimum starting timestamp. if (earliestOpTimeSeen.isNull() || earliestOpTimeSeen.getTimestamp() > remoteEarliestOpTime.getTimestamp()) { earliestOpTimeSeen = remoteEarliestOpTime; } _chooseAndProbeNextSyncSource(earliestOpTimeSeen); return; } // Schedules fetcher to look for '_requiredOpTime' in the remote oplog. if (!_requiredOpTime.isNull()) { auto status = _scheduleFetcher(_makeRequiredOpTimeFetcher(candidate, earliestOpTimeSeen)); if (!status.isOK()) { _finishCallback(status); } return; } _finishCallback(candidate); }
void CollectionCloner::_countCallback( const executor::TaskExecutor::RemoteCommandCallbackArgs& args) { // No need to reword status reason in the case of cancellation. if (ErrorCodes::CallbackCanceled == args.response.status) { _finishCallback(args.response.status); return; } if (!args.response.status.isOK()) { _finishCallback(args.response.status.withContext( str::stream() << "Count call failed on collection '" << _sourceNss.ns() << "' from " << _source.toString())); return; } long long count = 0; Status commandStatus = getStatusFromCommandResult(args.response.data); if (commandStatus == ErrorCodes::NamespaceNotFound && _options.uuid) { // Querying by a non-existing collection by UUID returns an error. Treat same as // behavior of find by namespace and use count == 0. } else if (!commandStatus.isOK()) { _finishCallback(commandStatus.withContext( str::stream() << "Count call failed on collection '" << _sourceNss.ns() << "' from " << _source.toString())); return; } else { auto countStatus = bsonExtractIntegerField( args.response.data, kCountResponseDocumentCountFieldName, &count); if (!countStatus.isOK()) { _finishCallback(countStatus.withContext( str::stream() << "There was an error parsing document count from count " "command result on collection " << _sourceNss.ns() << " from " << _source.toString())); return; } } if (count < 0) { _finishCallback({ErrorCodes::BadValue, str::stream() << "Count call on collection " << _sourceNss.ns() << " from " << _source.toString() << " returned negative document count: " << count}); return; } { LockGuard lk(_mutex); _stats.documentToCopy = count; _progressMeter.setTotalWhileRunning(static_cast<unsigned long long>(count)); } auto scheduleStatus = _listIndexesFetcher.schedule(); if (!scheduleStatus.isOK()) { _finishCallback(scheduleStatus); return; } }
Status SyncSourceResolver::_finishCallback(StatusWith<HostAndPort> result) { SyncSourceResolverResponse response; response.syncSourceStatus = std::move(result); return _finishCallback(response); }
void CollectionCloner::_listIndexesCallback(const Fetcher::QueryResponseStatus& fetchResult, Fetcher::NextAction* nextAction, BSONObjBuilder* getMoreBob) { const bool collectionIsEmpty = fetchResult == ErrorCodes::NamespaceNotFound; if (collectionIsEmpty) { // Schedule collection creation and finish callback. auto&& scheduleResult = _scheduleDbWorkFn([this](const executor::TaskExecutor::CallbackArgs& cbd) { if (!cbd.status.isOK()) { _finishCallback(cbd.status); return; } auto opCtx = cbd.opCtx; UnreplicatedWritesBlock uwb(opCtx); auto&& createStatus = _storageInterface->createCollection(opCtx, _destNss, _options); _finishCallback(createStatus); }); if (!scheduleResult.isOK()) { _finishCallback(scheduleResult.getStatus()); } return; }; if (!fetchResult.isOK()) { _finishCallback(fetchResult.getStatus().withContext( str::stream() << "listIndexes call failed on collection '" << _sourceNss.ns() << "'")); return; } auto batchData(fetchResult.getValue()); auto&& documents = batchData.documents; if (documents.empty()) { warning() << "No indexes found for collection " << _sourceNss.ns() << " while cloning from " << _source; } UniqueLock lk(_mutex); // When listing indexes by UUID, the sync source may use a different name for the collection // as result of renaming or two-phase drop. As the index spec also includes a 'ns' field, this // must be rewritten. BSONObjBuilder nsFieldReplacementBuilder; nsFieldReplacementBuilder.append("ns", _sourceNss.ns()); BSONElement nsFieldReplacementElem = nsFieldReplacementBuilder.done().firstElement(); // We may be called with multiple batches leading to a need to grow _indexSpecs. _indexSpecs.reserve(_indexSpecs.size() + documents.size()); for (auto&& doc : documents) { // The addField replaces the 'ns' field with the correct name, see above. if (StringData("_id_") == doc["name"].str()) { _idIndexSpec = doc.addField(nsFieldReplacementElem); continue; } _indexSpecs.push_back(doc.addField(nsFieldReplacementElem)); } lk.unlock(); // The fetcher will continue to call with kGetMore until an error or the last batch. if (*nextAction == Fetcher::NextAction::kGetMore) { invariant(getMoreBob); getMoreBob->append("getMore", batchData.cursorId); getMoreBob->append("collection", batchData.nss.coll()); return; } // We have all of the indexes now, so we can start cloning the collection data. auto&& scheduleResult = _scheduleDbWorkFn( [=](const executor::TaskExecutor::CallbackArgs& cbd) { _beginCollectionCallback(cbd); }); if (!scheduleResult.isOK()) { _finishCallback(scheduleResult.getStatus()); return; } }