コード例 #1
0
StatusWith<executor::TaskExecutor::EventHandle> AsyncResultsMerger::nextEvent() {
    stdx::lock_guard<stdx::mutex> lk(_mutex);

    if (_lifecycleState != kAlive) {
        // Can't schedule further network operations if the ARM is being killed.
        return Status(ErrorCodes::IllegalOperation,
                      "nextEvent() called on a killed AsyncResultsMerger");
    }

    if (_currentEvent.isValid()) {
        // We can't make a new event if there's still an unsignaled one, as every event must
        // eventually be signaled.
        return Status(ErrorCodes::IllegalOperation,
                      "nextEvent() called before an outstanding event was signaled");
    }

    // Schedule remote work on hosts for which we need more results.
    for (size_t i = 0; i < _remotes.size(); ++i) {
        auto& remote = _remotes[i];

        // It is illegal to call this method if there is an error received from any shard.
        invariant(remote.status.isOK());

        if (!remote.hasNext() && !remote.exhausted() && !remote.cbHandle.isValid()) {
            // If we already have established a cursor with this remote, and there is no outstanding
            // request for which we have a valid callback handle, then schedule work to retrieve the
            // next batch.
            auto nextBatchStatus = askForNextBatch_inlock(i);
            if (!nextBatchStatus.isOK()) {
                return nextBatchStatus;
            }
        }
    }

    auto eventStatus = _executor->makeEvent();
    if (!eventStatus.isOK()) {
        return eventStatus;
    }
    auto eventToReturn = eventStatus.getValue();
    _currentEvent = eventToReturn;

    // It's possible that after we told the caller we had no ready results but before the call to
    // this method, new results became available. In this case we have to signal the event right
    // away so that the caller will not block.
    signalCurrentEventIfReady_inlock();

    return eventToReturn;
}
コード例 #2
0
void AsyncResultsMerger::handleBatchResponse(
    const executor::TaskExecutor::RemoteCommandCallbackArgs& cbData, size_t remoteIndex) {
    stdx::lock_guard<stdx::mutex> lk(_mutex);

    auto& remote = _remotes[remoteIndex];

    // Clear the callback handle. This indicates that we are no longer waiting on a response from
    // 'remote'.
    remote.cbHandle = executor::TaskExecutor::CallbackHandle();

    // If we're in the process of shutting down then there's no need to process the batch.
    if (_lifecycleState != kAlive) {
        invariant(_lifecycleState == kKillStarted);

        // Make sure to wake up anyone waiting on '_currentEvent' if we're shutting down.
        signalCurrentEventIfReady_inlock();

        // Make a best effort to parse the response and retrieve the cursor id. We need the cursor
        // id in order to issue a killCursors command against it.
        if (cbData.response.isOK()) {
            auto cursorResponse = parseCursorResponse(cbData.response.getValue().data, remote);
            if (cursorResponse.isOK()) {
                remote.cursorId = cursorResponse.getValue().getCursorId();
            }
        }

        // If we're killed and we're not waiting on any more batches to come back, then we are ready
        // to kill the cursors on the remote hosts and clean up this cursor. Schedule the
        // killCursors command and signal that this cursor is safe now safe to destroy. We have to
        // promise not to touch any members of this class because 'this' could become invalid as
        // soon as we signal the event.
        if (!haveOutstandingBatchRequests_inlock()) {
            // If the event handle is invalid, then the executor is in the middle of shutting down,
            // and we can't schedule any more work for it to complete.
            if (_killCursorsScheduledEvent.isValid()) {
                scheduleKillCursors_inlock();
                _executor->signalEvent(_killCursorsScheduledEvent);
            }

            _lifecycleState = kKillComplete;
        }

        return;
    }

    // Early return from this point on signal anyone waiting on an event, if ready() is true.
    ScopeGuard signaller = MakeGuard(&AsyncResultsMerger::signalCurrentEventIfReady_inlock, this);

    StatusWith<CursorResponse> cursorResponseStatus(
        cbData.response.isOK() ? parseCursorResponse(cbData.response.getValue().data, remote)
                               : cbData.response.getStatus());

    if (!cursorResponseStatus.isOK()) {
        auto shard = remote.getShard();
        if (!shard) {
            remote.status = Status(cursorResponseStatus.getStatus().code(),
                                   str::stream() << "Could not find shard " << *remote.shardId
                                                 << " containing host "
                                                 << remote.getTargetHost().toString());
        } else {
            shard->updateReplSetMonitor(remote.getTargetHost(), cursorResponseStatus.getStatus());

            // Retry initial cursor establishment if possible.  Never retry getMores to avoid
            // accidentally skipping results.
            if (!remote.cursorId && remote.retryCount < kMaxNumFailedHostRetryAttempts &&
                shard->isRetriableError(cursorResponseStatus.getStatus().code(),
                                        Shard::RetryPolicy::kIdempotent)) {
                invariant(remote.shardId);
                LOG(1) << "Initial cursor establishment failed with retriable error and will be "
                          "retried"
                       << causedBy(redact(cursorResponseStatus.getStatus()));

                ++remote.retryCount;

                // Since we potentially updated the targeter that the last host it chose might be
                // faulty, the call below may end up getting a different host.
                remote.status = askForNextBatch_inlock(remoteIndex);
                if (remote.status.isOK()) {
                    return;
                }

                // If we end up here, it means we failed to schedule the retry request, which is a
                // more
                // severe error that should not be retried. Just pass through to the error handling
                // logic below.
            } else {
                remote.status = cursorResponseStatus.getStatus();
            }
        }

        // Unreachable host errors are swallowed if the 'allowPartialResults' option is set. We
        // remove the unreachable host entirely from consideration by marking it as exhausted.
        if (_params.isAllowPartialResults) {
            remote.status = Status::OK();

            // Clear the results buffer and cursor id.
            std::queue<BSONObj> emptyBuffer;
            std::swap(remote.docBuffer, emptyBuffer);
            remote.cursorId = 0;
        }

        return;
    }

    // Cursor id successfully established.
    auto cursorResponse = std::move(cursorResponseStatus.getValue());
    remote.cursorId = cursorResponse.getCursorId();
    remote.initialCmdObj = boost::none;

    for (const auto& obj : cursorResponse.getBatch()) {
        // If there's a sort, we're expecting the remote node to give us back a sort key.
        if (!_params.sort.isEmpty() &&
            obj[ClusterClientCursorParams::kSortKeyField].type() != BSONType::Object) {
            remote.status = Status(ErrorCodes::InternalError,
                                   str::stream() << "Missing field '"
                                                 << ClusterClientCursorParams::kSortKeyField
                                                 << "' in document: "
                                                 << obj);
            return;
        }

        remote.docBuffer.push(obj);
        ++remote.fetchedCount;
    }

    // If we're doing a sorted merge, then we have to make sure to put this remote onto the
    // merge queue.
    if (!_params.sort.isEmpty() && !cursorResponse.getBatch().empty()) {
        _mergeQueue.push(remoteIndex);
    }

    // If the cursor is tailable and we just received an empty batch, the next return value should
    // be boost::none in order to indicate the end of the batch.
    if (_params.isTailable && !remote.hasNext()) {
        _eofNext = true;
    }

    // If even after receiving this batch we still don't have anything buffered (i.e. the batchSize
    // was zero), then can schedule work to retrieve the next batch right away.
    //
    // We do not ask for the next batch if the cursor is tailable, as batches received from remote
    // tailable cursors should be passed through to the client without asking for more batches.
    if (!_params.isTailable && !remote.hasNext() && !remote.exhausted()) {
        remote.status = askForNextBatch_inlock(remoteIndex);
        if (!remote.status.isOK()) {
            return;
        }
    }

    // ScopeGuard requires dismiss on success, but we want waiter to be signalled on success as
    // well as failure.
    signaller.Dismiss();
    signalCurrentEventIfReady_inlock();
}
コード例 #3
0
void AsyncResultsMerger::handleBatchResponse(
    const executor::TaskExecutor::RemoteCommandCallbackArgs& cbData,
    OperationContext* opCtx,
    size_t remoteIndex) {
    stdx::lock_guard<stdx::mutex> lk(_mutex);

    auto& remote = _remotes[remoteIndex];

    // Clear the callback handle. This indicates that we are no longer waiting on a response from
    // 'remote'.
    remote.cbHandle = executor::TaskExecutor::CallbackHandle();

    // If we're in the process of shutting down then there's no need to process the batch.
    if (_lifecycleState != kAlive) {
        invariant(_lifecycleState == kKillStarted);

        // Make sure to wake up anyone waiting on '_currentEvent' if we're shutting down.
        signalCurrentEventIfReady_inlock();

        // If we're killed and we're not waiting on any more batches to come back, then we are ready
        // to kill the cursors on the remote hosts and clean up this cursor. Schedule the
        // killCursors command and signal that this cursor is safe now safe to destroy. We have to
        // promise not to touch any members of this class because 'this' could become invalid as
        // soon as we signal the event.
        if (!haveOutstandingBatchRequests_inlock()) {
            // If the event handle is invalid, then the executor is in the middle of shutting down,
            // and we can't schedule any more work for it to complete.
            if (_killCursorsScheduledEvent.isValid()) {
                scheduleKillCursors_inlock(opCtx);
                _executor->signalEvent(_killCursorsScheduledEvent);
            }

            _lifecycleState = kKillComplete;
        }

        return;
    }

    // Early return from this point on signal anyone waiting on an event, if ready() is true.
    ScopeGuard signaller = MakeGuard(&AsyncResultsMerger::signalCurrentEventIfReady_inlock, this);

    StatusWith<CursorResponse> cursorResponseStatus(
        cbData.response.isOK() ? parseCursorResponse(cbData.response.data, remote)
                               : cbData.response.status);

    if (!cursorResponseStatus.isOK()) {
        auto shard = remote.getShard();
        if (!shard) {
            remote.status = Status(cursorResponseStatus.getStatus().code(),
                                   str::stream() << "Could not find shard containing host "
                                                 << remote.getTargetHost().toString());
        } else {
            shard->updateReplSetMonitor(remote.getTargetHost(), cursorResponseStatus.getStatus());
            remote.status = cursorResponseStatus.getStatus();
        }

        // Unreachable host errors are swallowed if the 'allowPartialResults' option is set. We
        // remove the unreachable host entirely from consideration by marking it as exhausted.
        if (_params->isAllowPartialResults) {
            remote.status = Status::OK();

            // Clear the results buffer and cursor id.
            std::queue<ClusterQueryResult> emptyBuffer;
            std::swap(remote.docBuffer, emptyBuffer);
            remote.cursorId = 0;
        }

        return;
    }

    // Response successfully received.

    auto cursorResponse = std::move(cursorResponseStatus.getValue());

    // Update the cursorId; it is sent as '0' when the cursor has been exhausted on the shard.
    remote.cursorId = cursorResponse.getCursorId();

    // Save the batch in the remote's buffer.
    if (!addBatchToBuffer(remoteIndex, cursorResponse.getBatch())) {
        return;
    }

    // If the cursor is tailable and we just received an empty batch, the next return value should
    // be boost::none in order to indicate the end of the batch.
    // (Note: tailable cursors are only valid on unsharded collections, so the end of the batch from
    // one shard means the end of the overall batch).
    if (_params->isTailable && !remote.hasNext()) {
        _eofNext = true;
    }

    // If even after receiving this batch we still don't have anything buffered (i.e. the batchSize
    // was zero), then can schedule work to retrieve the next batch right away.
    //
    // We do not ask for the next batch if the cursor is tailable, as batches received from remote
    // tailable cursors should be passed through to the client without asking for more batches.
    if (!_params->isTailable && !remote.hasNext() && !remote.exhausted()) {
        remote.status = askForNextBatch_inlock(opCtx, remoteIndex);
        if (!remote.status.isOK()) {
            return;
        }
    }

    // ScopeGuard requires dismiss on success, but we want waiter to be signalled on success as
    // well as failure.
    signaller.Dismiss();
    signalCurrentEventIfReady_inlock();
}
コード例 #4
0
ファイル: async_results_merger.cpp プロジェクト: pypwxh/mongo
void AsyncResultsMerger::handleBatchResponse(
    const executor::TaskExecutor::RemoteCommandCallbackArgs& cbData, size_t remoteIndex) {
    stdx::lock_guard<stdx::mutex> lk(_mutex);

    auto& remote = _remotes[remoteIndex];

    // Clear the callback handle. This indicates that we are no longer waiting on a response from
    // 'remote'.
    remote.cbHandle = executor::TaskExecutor::CallbackHandle();

    // If we're in the process of shutting down then there's no need to process the batch.
    if (_lifecycleState != kAlive) {
        invariant(_lifecycleState == kKillStarted);

        // Make sure to wake up anyone waiting on '_currentEvent' if we're shutting down.
        signalCurrentEventIfReady_inlock();

        // If we're killed and we're not waiting on any more batches to come back, then we are ready
        // to kill the cursors on the remote hosts and clean up this cursor. Schedule the
        // killCursors command and signal that this cursor is safe now safe to destroy. We have to
        // promise not to touch any members of this class because 'this' could become invalid as
        // soon as we signal the event.
        if (!haveOutstandingBatchRequests_inlock()) {
            // If the event handle is invalid, then the executor is in the middle of shutting down,
            // and we can't schedule any more work for it to complete.
            if (_killCursorsScheduledEvent.isValid()) {
                scheduleKillCursors_inlock();
                _executor->signalEvent(_killCursorsScheduledEvent);
            }

            _lifecycleState = kKillComplete;
        }
        return;
    }

    // Early return from this point on signal anyone waiting on an event, if ready() is true.
    ScopeGuard signaller = MakeGuard(&AsyncResultsMerger::signalCurrentEventIfReady_inlock, this);

    if (!cbData.response.isOK()) {
        remote.status = cbData.response.getStatus();

        // If we failed to retrieve the batch because we couldn't contact the remote, we notify that
        // targeter that the host is unreachable. The caller can then retry on a new host.
        if (remote.status == ErrorCodes::HostUnreachable && remote.shardId) {
            auto shard = _params.shardRegistry->getShard(_params.txn, *remote.shardId);
            if (!shard) {
                remote.status =
                    Status(ErrorCodes::HostUnreachable,
                           str::stream() << "Could not find shard " << *remote.shardId
                                         << " containing host " << remote.hostAndPort.toString());
            } else {
                shard->getTargeter()->markHostUnreachable(remote.hostAndPort);
            }
        }

        return;
    }

    auto getMoreParseStatus = CursorResponse::parseFromBSON(cbData.response.getValue().data);
    if (!getMoreParseStatus.isOK()) {
        remote.status = getMoreParseStatus.getStatus();
        return;
    }

    auto cursorResponse = getMoreParseStatus.getValue();

    // If we have a cursor established, and we get a non-zero cursorid that is not equal to the
    // established cursorid, we will fail the operation.
    if (remote.cursorId && cursorResponse.cursorId != 0 &&
        *remote.cursorId != cursorResponse.cursorId) {
        remote.status = Status(ErrorCodes::BadValue,
                               str::stream() << "Expected cursorid " << *remote.cursorId
                                             << " but received " << cursorResponse.cursorId);
        return;
    }

    remote.cursorId = cursorResponse.cursorId;
    remote.cmdObj = boost::none;

    for (const auto& obj : cursorResponse.batch) {
        // If there's a sort, we're expecting the remote node to give us back a sort key.
        if (!_params.sort.isEmpty() &&
            obj[ClusterClientCursorParams::kSortKeyField].type() != BSONType::Object) {
            remote.status = Status(ErrorCodes::InternalError,
                                   str::stream() << "Missing field '"
                                                 << ClusterClientCursorParams::kSortKeyField
                                                 << "' in document: " << obj);
            return;
        }

        remote.docBuffer.push(obj);
        ++remote.fetchedCount;
    }

    // If we're doing a sorted merge, then we have to make sure to put this remote onto the
    // merge queue.
    if (!_params.sort.isEmpty() && !cursorResponse.batch.empty()) {
        _mergeQueue.push(remoteIndex);
    }

    // If the cursor is tailable and we just received an empty batch, the next return value should
    // be boost::none in order to indicate the end of the batch.
    if (_params.isTailable && !remote.hasNext()) {
        _eofNext = true;
    }

    // If even after receiving this batch we still don't have anything buffered (i.e. the batchSize
    // was zero), then can schedule work to retrieve the next batch right away.
    //
    // We do not ask for the next batch if the cursor is tailable, as batches received from remote
    // tailable cursors should be passed through to the client without asking for more batches.
    if (!_params.isTailable && !remote.hasNext() && !remote.exhausted()) {
        auto nextBatchStatus = askForNextBatch_inlock(remoteIndex);
        if (!nextBatchStatus.isOK()) {
            remote.status = nextBatchStatus;
            return;
        }
    }

    // ScopeGuard requires dismiss on success, but we want waiter to be signalled on success as
    // well as failure.
    signaller.Dismiss();
    signalCurrentEventIfReady_inlock();
}