StatusWith<executor::TaskExecutor::EventHandle> AsyncResultsMerger::nextEvent() { stdx::lock_guard<stdx::mutex> lk(_mutex); if (_lifecycleState != kAlive) { // Can't schedule further network operations if the ARM is being killed. return Status(ErrorCodes::IllegalOperation, "nextEvent() called on a killed AsyncResultsMerger"); } if (_currentEvent.isValid()) { // We can't make a new event if there's still an unsignaled one, as every event must // eventually be signaled. return Status(ErrorCodes::IllegalOperation, "nextEvent() called before an outstanding event was signaled"); } // Schedule remote work on hosts for which we need more results. for (size_t i = 0; i < _remotes.size(); ++i) { auto& remote = _remotes[i]; // It is illegal to call this method if there is an error received from any shard. invariant(remote.status.isOK()); if (!remote.hasNext() && !remote.exhausted() && !remote.cbHandle.isValid()) { // If we already have established a cursor with this remote, and there is no outstanding // request for which we have a valid callback handle, then schedule work to retrieve the // next batch. auto nextBatchStatus = askForNextBatch_inlock(i); if (!nextBatchStatus.isOK()) { return nextBatchStatus; } } } auto eventStatus = _executor->makeEvent(); if (!eventStatus.isOK()) { return eventStatus; } auto eventToReturn = eventStatus.getValue(); _currentEvent = eventToReturn; // It's possible that after we told the caller we had no ready results but before the call to // this method, new results became available. In this case we have to signal the event right // away so that the caller will not block. signalCurrentEventIfReady_inlock(); return eventToReturn; }
void AsyncResultsMerger::handleBatchResponse( const executor::TaskExecutor::RemoteCommandCallbackArgs& cbData, size_t remoteIndex) { stdx::lock_guard<stdx::mutex> lk(_mutex); auto& remote = _remotes[remoteIndex]; // Clear the callback handle. This indicates that we are no longer waiting on a response from // 'remote'. remote.cbHandle = executor::TaskExecutor::CallbackHandle(); // If we're in the process of shutting down then there's no need to process the batch. if (_lifecycleState != kAlive) { invariant(_lifecycleState == kKillStarted); // Make sure to wake up anyone waiting on '_currentEvent' if we're shutting down. signalCurrentEventIfReady_inlock(); // Make a best effort to parse the response and retrieve the cursor id. We need the cursor // id in order to issue a killCursors command against it. if (cbData.response.isOK()) { auto cursorResponse = parseCursorResponse(cbData.response.getValue().data, remote); if (cursorResponse.isOK()) { remote.cursorId = cursorResponse.getValue().getCursorId(); } } // If we're killed and we're not waiting on any more batches to come back, then we are ready // to kill the cursors on the remote hosts and clean up this cursor. Schedule the // killCursors command and signal that this cursor is safe now safe to destroy. We have to // promise not to touch any members of this class because 'this' could become invalid as // soon as we signal the event. if (!haveOutstandingBatchRequests_inlock()) { // If the event handle is invalid, then the executor is in the middle of shutting down, // and we can't schedule any more work for it to complete. if (_killCursorsScheduledEvent.isValid()) { scheduleKillCursors_inlock(); _executor->signalEvent(_killCursorsScheduledEvent); } _lifecycleState = kKillComplete; } return; } // Early return from this point on signal anyone waiting on an event, if ready() is true. ScopeGuard signaller = MakeGuard(&AsyncResultsMerger::signalCurrentEventIfReady_inlock, this); StatusWith<CursorResponse> cursorResponseStatus( cbData.response.isOK() ? parseCursorResponse(cbData.response.getValue().data, remote) : cbData.response.getStatus()); if (!cursorResponseStatus.isOK()) { auto shard = remote.getShard(); if (!shard) { remote.status = Status(cursorResponseStatus.getStatus().code(), str::stream() << "Could not find shard " << *remote.shardId << " containing host " << remote.getTargetHost().toString()); } else { shard->updateReplSetMonitor(remote.getTargetHost(), cursorResponseStatus.getStatus()); // Retry initial cursor establishment if possible. Never retry getMores to avoid // accidentally skipping results. if (!remote.cursorId && remote.retryCount < kMaxNumFailedHostRetryAttempts && shard->isRetriableError(cursorResponseStatus.getStatus().code(), Shard::RetryPolicy::kIdempotent)) { invariant(remote.shardId); LOG(1) << "Initial cursor establishment failed with retriable error and will be " "retried" << causedBy(redact(cursorResponseStatus.getStatus())); ++remote.retryCount; // Since we potentially updated the targeter that the last host it chose might be // faulty, the call below may end up getting a different host. remote.status = askForNextBatch_inlock(remoteIndex); if (remote.status.isOK()) { return; } // If we end up here, it means we failed to schedule the retry request, which is a // more // severe error that should not be retried. Just pass through to the error handling // logic below. } else { remote.status = cursorResponseStatus.getStatus(); } } // Unreachable host errors are swallowed if the 'allowPartialResults' option is set. We // remove the unreachable host entirely from consideration by marking it as exhausted. if (_params.isAllowPartialResults) { remote.status = Status::OK(); // Clear the results buffer and cursor id. std::queue<BSONObj> emptyBuffer; std::swap(remote.docBuffer, emptyBuffer); remote.cursorId = 0; } return; } // Cursor id successfully established. auto cursorResponse = std::move(cursorResponseStatus.getValue()); remote.cursorId = cursorResponse.getCursorId(); remote.initialCmdObj = boost::none; for (const auto& obj : cursorResponse.getBatch()) { // If there's a sort, we're expecting the remote node to give us back a sort key. if (!_params.sort.isEmpty() && obj[ClusterClientCursorParams::kSortKeyField].type() != BSONType::Object) { remote.status = Status(ErrorCodes::InternalError, str::stream() << "Missing field '" << ClusterClientCursorParams::kSortKeyField << "' in document: " << obj); return; } remote.docBuffer.push(obj); ++remote.fetchedCount; } // If we're doing a sorted merge, then we have to make sure to put this remote onto the // merge queue. if (!_params.sort.isEmpty() && !cursorResponse.getBatch().empty()) { _mergeQueue.push(remoteIndex); } // If the cursor is tailable and we just received an empty batch, the next return value should // be boost::none in order to indicate the end of the batch. if (_params.isTailable && !remote.hasNext()) { _eofNext = true; } // If even after receiving this batch we still don't have anything buffered (i.e. the batchSize // was zero), then can schedule work to retrieve the next batch right away. // // We do not ask for the next batch if the cursor is tailable, as batches received from remote // tailable cursors should be passed through to the client without asking for more batches. if (!_params.isTailable && !remote.hasNext() && !remote.exhausted()) { remote.status = askForNextBatch_inlock(remoteIndex); if (!remote.status.isOK()) { return; } } // ScopeGuard requires dismiss on success, but we want waiter to be signalled on success as // well as failure. signaller.Dismiss(); signalCurrentEventIfReady_inlock(); }
void AsyncResultsMerger::handleBatchResponse( const executor::TaskExecutor::RemoteCommandCallbackArgs& cbData, OperationContext* opCtx, size_t remoteIndex) { stdx::lock_guard<stdx::mutex> lk(_mutex); auto& remote = _remotes[remoteIndex]; // Clear the callback handle. This indicates that we are no longer waiting on a response from // 'remote'. remote.cbHandle = executor::TaskExecutor::CallbackHandle(); // If we're in the process of shutting down then there's no need to process the batch. if (_lifecycleState != kAlive) { invariant(_lifecycleState == kKillStarted); // Make sure to wake up anyone waiting on '_currentEvent' if we're shutting down. signalCurrentEventIfReady_inlock(); // If we're killed and we're not waiting on any more batches to come back, then we are ready // to kill the cursors on the remote hosts and clean up this cursor. Schedule the // killCursors command and signal that this cursor is safe now safe to destroy. We have to // promise not to touch any members of this class because 'this' could become invalid as // soon as we signal the event. if (!haveOutstandingBatchRequests_inlock()) { // If the event handle is invalid, then the executor is in the middle of shutting down, // and we can't schedule any more work for it to complete. if (_killCursorsScheduledEvent.isValid()) { scheduleKillCursors_inlock(opCtx); _executor->signalEvent(_killCursorsScheduledEvent); } _lifecycleState = kKillComplete; } return; } // Early return from this point on signal anyone waiting on an event, if ready() is true. ScopeGuard signaller = MakeGuard(&AsyncResultsMerger::signalCurrentEventIfReady_inlock, this); StatusWith<CursorResponse> cursorResponseStatus( cbData.response.isOK() ? parseCursorResponse(cbData.response.data, remote) : cbData.response.status); if (!cursorResponseStatus.isOK()) { auto shard = remote.getShard(); if (!shard) { remote.status = Status(cursorResponseStatus.getStatus().code(), str::stream() << "Could not find shard containing host " << remote.getTargetHost().toString()); } else { shard->updateReplSetMonitor(remote.getTargetHost(), cursorResponseStatus.getStatus()); remote.status = cursorResponseStatus.getStatus(); } // Unreachable host errors are swallowed if the 'allowPartialResults' option is set. We // remove the unreachable host entirely from consideration by marking it as exhausted. if (_params->isAllowPartialResults) { remote.status = Status::OK(); // Clear the results buffer and cursor id. std::queue<ClusterQueryResult> emptyBuffer; std::swap(remote.docBuffer, emptyBuffer); remote.cursorId = 0; } return; } // Response successfully received. auto cursorResponse = std::move(cursorResponseStatus.getValue()); // Update the cursorId; it is sent as '0' when the cursor has been exhausted on the shard. remote.cursorId = cursorResponse.getCursorId(); // Save the batch in the remote's buffer. if (!addBatchToBuffer(remoteIndex, cursorResponse.getBatch())) { return; } // If the cursor is tailable and we just received an empty batch, the next return value should // be boost::none in order to indicate the end of the batch. // (Note: tailable cursors are only valid on unsharded collections, so the end of the batch from // one shard means the end of the overall batch). if (_params->isTailable && !remote.hasNext()) { _eofNext = true; } // If even after receiving this batch we still don't have anything buffered (i.e. the batchSize // was zero), then can schedule work to retrieve the next batch right away. // // We do not ask for the next batch if the cursor is tailable, as batches received from remote // tailable cursors should be passed through to the client without asking for more batches. if (!_params->isTailable && !remote.hasNext() && !remote.exhausted()) { remote.status = askForNextBatch_inlock(opCtx, remoteIndex); if (!remote.status.isOK()) { return; } } // ScopeGuard requires dismiss on success, but we want waiter to be signalled on success as // well as failure. signaller.Dismiss(); signalCurrentEventIfReady_inlock(); }
void AsyncResultsMerger::handleBatchResponse( const executor::TaskExecutor::RemoteCommandCallbackArgs& cbData, size_t remoteIndex) { stdx::lock_guard<stdx::mutex> lk(_mutex); auto& remote = _remotes[remoteIndex]; // Clear the callback handle. This indicates that we are no longer waiting on a response from // 'remote'. remote.cbHandle = executor::TaskExecutor::CallbackHandle(); // If we're in the process of shutting down then there's no need to process the batch. if (_lifecycleState != kAlive) { invariant(_lifecycleState == kKillStarted); // Make sure to wake up anyone waiting on '_currentEvent' if we're shutting down. signalCurrentEventIfReady_inlock(); // If we're killed and we're not waiting on any more batches to come back, then we are ready // to kill the cursors on the remote hosts and clean up this cursor. Schedule the // killCursors command and signal that this cursor is safe now safe to destroy. We have to // promise not to touch any members of this class because 'this' could become invalid as // soon as we signal the event. if (!haveOutstandingBatchRequests_inlock()) { // If the event handle is invalid, then the executor is in the middle of shutting down, // and we can't schedule any more work for it to complete. if (_killCursorsScheduledEvent.isValid()) { scheduleKillCursors_inlock(); _executor->signalEvent(_killCursorsScheduledEvent); } _lifecycleState = kKillComplete; } return; } // Early return from this point on signal anyone waiting on an event, if ready() is true. ScopeGuard signaller = MakeGuard(&AsyncResultsMerger::signalCurrentEventIfReady_inlock, this); if (!cbData.response.isOK()) { remote.status = cbData.response.getStatus(); // If we failed to retrieve the batch because we couldn't contact the remote, we notify that // targeter that the host is unreachable. The caller can then retry on a new host. if (remote.status == ErrorCodes::HostUnreachable && remote.shardId) { auto shard = _params.shardRegistry->getShard(_params.txn, *remote.shardId); if (!shard) { remote.status = Status(ErrorCodes::HostUnreachable, str::stream() << "Could not find shard " << *remote.shardId << " containing host " << remote.hostAndPort.toString()); } else { shard->getTargeter()->markHostUnreachable(remote.hostAndPort); } } return; } auto getMoreParseStatus = CursorResponse::parseFromBSON(cbData.response.getValue().data); if (!getMoreParseStatus.isOK()) { remote.status = getMoreParseStatus.getStatus(); return; } auto cursorResponse = getMoreParseStatus.getValue(); // If we have a cursor established, and we get a non-zero cursorid that is not equal to the // established cursorid, we will fail the operation. if (remote.cursorId && cursorResponse.cursorId != 0 && *remote.cursorId != cursorResponse.cursorId) { remote.status = Status(ErrorCodes::BadValue, str::stream() << "Expected cursorid " << *remote.cursorId << " but received " << cursorResponse.cursorId); return; } remote.cursorId = cursorResponse.cursorId; remote.cmdObj = boost::none; for (const auto& obj : cursorResponse.batch) { // If there's a sort, we're expecting the remote node to give us back a sort key. if (!_params.sort.isEmpty() && obj[ClusterClientCursorParams::kSortKeyField].type() != BSONType::Object) { remote.status = Status(ErrorCodes::InternalError, str::stream() << "Missing field '" << ClusterClientCursorParams::kSortKeyField << "' in document: " << obj); return; } remote.docBuffer.push(obj); ++remote.fetchedCount; } // If we're doing a sorted merge, then we have to make sure to put this remote onto the // merge queue. if (!_params.sort.isEmpty() && !cursorResponse.batch.empty()) { _mergeQueue.push(remoteIndex); } // If the cursor is tailable and we just received an empty batch, the next return value should // be boost::none in order to indicate the end of the batch. if (_params.isTailable && !remote.hasNext()) { _eofNext = true; } // If even after receiving this batch we still don't have anything buffered (i.e. the batchSize // was zero), then can schedule work to retrieve the next batch right away. // // We do not ask for the next batch if the cursor is tailable, as batches received from remote // tailable cursors should be passed through to the client without asking for more batches. if (!_params.isTailable && !remote.hasNext() && !remote.exhausted()) { auto nextBatchStatus = askForNextBatch_inlock(remoteIndex); if (!nextBatchStatus.isOK()) { remote.status = nextBatchStatus; return; } } // ScopeGuard requires dismiss on success, but we want waiter to be signalled on success as // well as failure. signaller.Dismiss(); signalCurrentEventIfReady_inlock(); }