void AbstractOplogFetcher::_makeAndScheduleFetcherCallback( const executor::TaskExecutor::CallbackArgs& args) { Status responseStatus = _checkForShutdownAndConvertStatus(args, "error scheduling fetcher"); if (!responseStatus.isOK()) { _finishCallback(responseStatus); return; } BSONObj findCommandObj = _makeFindCommandObject(_nss, _getLastOpTimeWithHashFetched().opTime); BSONObj metadataObj = _makeMetadataObject(); Status scheduleStatus = Status::OK(); { stdx::lock_guard<stdx::mutex> lock(_mutex); _fetcher = _makeFetcher(findCommandObj, metadataObj); scheduleStatus = _scheduleFetcher_inlock(); } if (!scheduleStatus.isOK()) { _finishCallback(scheduleStatus); return; } }
StatusWith<BSONObj> OplogFetcher::_onSuccessfulBatch(const Fetcher::QueryResponse& queryResponse) { // Stop fetching and return on fail point. // This fail point makes the oplog fetcher ignore the downloaded batch of operations and not // error out. The FailPointEnabled error will be caught by the AbstractOplogFetcher. if (MONGO_FAIL_POINT(stopReplProducer)) { return Status(ErrorCodes::FailPointEnabled, "stopReplProducer fail point is enabled"); } const auto& documents = queryResponse.documents; auto firstDocToApply = documents.cbegin(); if (!documents.empty()) { LOG(2) << "oplog fetcher read " << documents.size() << " operations from remote oplog starting at " << documents.front()["ts"] << " and ending at " << documents.back()["ts"]; } else { LOG(2) << "oplog fetcher read 0 operations from remote oplog"; } auto oqMetadataResult = parseOplogQueryMetadata(queryResponse); if (!oqMetadataResult.isOK()) { error() << "invalid oplog query metadata from sync source " << _getSource() << ": " << oqMetadataResult.getStatus() << ": " << queryResponse.otherFields.metadata; return oqMetadataResult.getStatus(); } auto oqMetadata = oqMetadataResult.getValue(); // This lastFetched value is the last OpTime from the previous batch. auto lastFetched = _getLastOpTimeWithHashFetched(); // Check start of remote oplog and, if necessary, stop fetcher to execute rollback. if (queryResponse.first) { auto remoteRBID = oqMetadata ? boost::make_optional(oqMetadata->getRBID()) : boost::none; auto remoteLastApplied = oqMetadata ? boost::make_optional(oqMetadata->getLastOpApplied()) : boost::none; auto status = checkRemoteOplogStart(documents, lastFetched, remoteLastApplied, _requiredRBID, remoteRBID, _requireFresherSyncSource); if (!status.isOK()) { // Stop oplog fetcher and execute rollback if necessary. return status; } LOG(1) << "oplog fetcher successfully fetched from " << _getSource(); // If this is the first batch and no rollback is needed, skip the first document. firstDocToApply++; } auto validateResult = OplogFetcher::validateDocuments( documents, queryResponse.first, lastFetched.opTime.getTimestamp()); if (!validateResult.isOK()) { return validateResult.getStatus(); } auto info = validateResult.getValue(); // Process replset metadata. It is important that this happen after we've validated the // first batch, so we don't progress our knowledge of the commit point from a // response that triggers a rollback. rpc::ReplSetMetadata replSetMetadata; bool receivedReplMetadata = queryResponse.otherFields.metadata.hasElement(rpc::kReplSetMetadataFieldName); if (receivedReplMetadata) { const auto& metadataObj = queryResponse.otherFields.metadata; auto metadataResult = rpc::ReplSetMetadata::readFromMetadata(metadataObj); if (!metadataResult.isOK()) { error() << "invalid replication metadata from sync source " << _getSource() << ": " << metadataResult.getStatus() << ": " << metadataObj; return metadataResult.getStatus(); } replSetMetadata = metadataResult.getValue(); // We will only ever have OplogQueryMetadata if we have ReplSetMetadata, so it is safe // to call processMetadata() in this if block. _dataReplicatorExternalState->processMetadata(replSetMetadata, oqMetadata); } // Increment stats. We read all of the docs in the query. opsReadStats.increment(info.networkDocumentCount); networkByteStats.increment(info.networkDocumentBytes); // Record time for each batch. getmoreReplStats.recordMillis(durationCount<Milliseconds>(queryResponse.elapsedMillis)); // TODO: back pressure handling will be added in SERVER-23499. auto status = _enqueueDocumentsFn(firstDocToApply, documents.cend(), info); if (!status.isOK()) { return status; } if (_dataReplicatorExternalState->shouldStopFetching( _getSource(), replSetMetadata, oqMetadata)) { str::stream errMsg; errMsg << "sync source " << _getSource().toString(); errMsg << " (config version: " << replSetMetadata.getConfigVersion(); // If OplogQueryMetadata was provided, its values were used to determine if we should // stop fetching from this sync source. if (oqMetadata) { errMsg << "; last applied optime: " << oqMetadata->getLastOpApplied().toString(); errMsg << "; sync source index: " << oqMetadata->getSyncSourceIndex(); errMsg << "; primary index: " << oqMetadata->getPrimaryIndex(); } else { errMsg << "; last visible optime: " << replSetMetadata.getLastOpVisible().toString(); errMsg << "; sync source index: " << replSetMetadata.getSyncSourceIndex(); errMsg << "; primary index: " << replSetMetadata.getPrimaryIndex(); } errMsg << ") is no longer valid"; return Status(ErrorCodes::InvalidSyncSource, errMsg); } auto lastCommittedWithCurrentTerm = _dataReplicatorExternalState->getCurrentTermAndLastCommittedOpTime(); return makeGetMoreCommandObject(queryResponse.nss, queryResponse.cursorId, lastCommittedWithCurrentTerm, _getGetMoreMaxTime(), _batchSize); }
void AbstractOplogFetcher::_callback(const Fetcher::QueryResponseStatus& result, BSONObjBuilder* getMoreBob) { Status responseStatus = _checkForShutdownAndConvertStatus(result.getStatus(), "error in fetcher batch callback"); if (ErrorCodes::CallbackCanceled == responseStatus) { LOG(1) << _getComponentName() << " oplog query cancelled to " << _getSource() << ": " << redact(responseStatus); _finishCallback(responseStatus); return; } // If target cut connections between connecting and querying (for // example, because it stepped down) we might not have a cursor. if (!responseStatus.isOK()) { BSONObj findCommandObj = _makeFindCommandObject(_nss, _getLastOpTimeWithHashFetched().opTime); BSONObj metadataObj = _makeMetadataObject(); { stdx::lock_guard<stdx::mutex> lock(_mutex); if (_fetcherRestarts == _maxFetcherRestarts) { log() << "Error returned from oplog query (no more query restarts left): " << redact(responseStatus); } else { log() << "Restarting oplog query due to error: " << redact(responseStatus) << ". Last fetched optime (with hash): " << _lastFetched << ". Restarts remaining: " << (_maxFetcherRestarts - _fetcherRestarts); _fetcherRestarts++; // Destroying current instance in _shuttingDownFetcher will possibly block. _shuttingDownFetcher.reset(); // Move the old fetcher into the shutting down instance. _shuttingDownFetcher.swap(_fetcher); // Create and start fetcher with current term and new starting optime. _fetcher = _makeFetcher(findCommandObj, metadataObj); auto scheduleStatus = _scheduleFetcher_inlock(); if (scheduleStatus.isOK()) { log() << "Scheduled new oplog query " << _fetcher->toString(); return; } error() << "Error scheduling new oplog query: " << redact(scheduleStatus) << ". Returning current oplog query error: " << redact(responseStatus); } } _finishCallback(responseStatus); return; } // Reset fetcher restart counter on successful response. { stdx::lock_guard<stdx::mutex> lock(_mutex); invariant(_isActive_inlock()); _fetcherRestarts = 0; } if (_isShuttingDown()) { _finishCallback( Status(ErrorCodes::CallbackCanceled, _getComponentName() + " shutting down")); return; } // At this point we have a successful batch and can call the subclass's _onSuccessfulBatch. const auto& queryResponse = result.getValue(); auto batchResult = _onSuccessfulBatch(queryResponse); if (!batchResult.isOK()) { // The stopReplProducer fail point expects this to return successfully. If another fail // point wants this to return unsuccessfully, it should use a different error code. if (batchResult.getStatus() == ErrorCodes::FailPointEnabled) { _finishCallback(Status::OK()); return; } _finishCallback(batchResult.getStatus()); return; } // No more data. Stop processing and return Status::OK. if (!getMoreBob) { _finishCallback(Status::OK()); return; } // We have now processed the batch and should move forward our view of _lastFetched. Note that // the _lastFetched value will not be updated until the _onSuccessfulBatch function is // completed. const auto& documents = queryResponse.documents; if (documents.size() > 0) { auto lastDocRes = AbstractOplogFetcher::parseOpTimeWithHash(documents.back()); if (!lastDocRes.isOK()) { _finishCallback(lastDocRes.getStatus()); return; } auto lastDoc = lastDocRes.getValue(); LOG(3) << _getComponentName() << " setting last fetched optime ahead after batch: " << lastDoc.opTime << "; hash: " << lastDoc.value; stdx::lock_guard<stdx::mutex> lock(_mutex); _lastFetched = lastDoc; } // Check for shutdown to save an unnecessary `getMore` request. if (_isShuttingDown()) { _finishCallback( Status(ErrorCodes::CallbackCanceled, _getComponentName() + " shutting down")); return; } // The _onSuccessfulBatch function returns the `getMore` command we want to send. getMoreBob->appendElements(batchResult.getValue()); }
BSONObj AbstractOplogFetcher::getFindQuery_forTest() const { return _makeFindCommandObject(_nss, _getLastOpTimeWithHashFetched().opTime); }
OpTimeWithHash AbstractOplogFetcher::getLastOpTimeWithHashFetched_forTest() const { return _getLastOpTimeWithHashFetched(); }