예제 #1
0
Status NetworkInterfaceASIO::startCommand(const TaskExecutor::CallbackHandle& cbHandle,
                                          RemoteCommandRequest& request,
                                          const RemoteCommandCompletionFn& onFinish) {
    MONGO_ASIO_INVARIANT(onFinish, "Invalid completion function");
    {
        stdx::lock_guard<stdx::mutex> lk(_inProgressMutex);
        const auto insertResult = _inGetConnection.emplace(cbHandle);
        // We should never see the same CallbackHandle added twice
        MONGO_ASIO_INVARIANT_INLOCK(insertResult.second, "Same CallbackHandle added twice");
    }

    if (inShutdown()) {
        return {ErrorCodes::ShutdownInProgress, "NetworkInterfaceASIO shutdown in progress"};
    }

    LOG(2) << "startCommand: " << redact(request.toString());

    auto getConnectionStartTime = now();

    auto statusMetadata = attachMetadataIfNeeded(request, _metadataHook.get());
    if (!statusMetadata.isOK()) {
        return statusMetadata;
    }

    auto nextStep = [this, getConnectionStartTime, cbHandle, request, onFinish](
        StatusWith<ConnectionPool::ConnectionHandle> swConn) {

        if (!swConn.isOK()) {
            LOG(2) << "Failed to get connection from pool for request " << request.id << ": "
                   << swConn.getStatus();

            bool wasPreviouslyCanceled = false;
            {
                stdx::lock_guard<stdx::mutex> lk(_inProgressMutex);
                wasPreviouslyCanceled = _inGetConnection.erase(cbHandle) == 0;
            }

            Status status = wasPreviouslyCanceled
                ? Status(ErrorCodes::CallbackCanceled, "Callback canceled")
                : swConn.getStatus();
            if (status.code() == ErrorCodes::NetworkInterfaceExceededTimeLimit) {
                status = Status(ErrorCodes::ExceededTimeLimit, status.reason());
            }
            if (status.code() == ErrorCodes::ExceededTimeLimit) {
                _numTimedOutOps.fetchAndAdd(1);
            }
            if (status.code() != ErrorCodes::CallbackCanceled) {
                _numFailedOps.fetchAndAdd(1);
            }

            onFinish({status, now() - getConnectionStartTime});
            signalWorkAvailable();
            return;
        }

        auto conn = static_cast<connection_pool_asio::ASIOConnection*>(swConn.getValue().get());

        AsyncOp* op = nullptr;

        stdx::unique_lock<stdx::mutex> lk(_inProgressMutex);

        const auto eraseCount = _inGetConnection.erase(cbHandle);

        // If we didn't find the request, we've been canceled
        if (eraseCount == 0) {
            lk.unlock();

            onFinish({ErrorCodes::CallbackCanceled,
                      "Callback canceled",
                      now() - getConnectionStartTime});

            // Though we were canceled, we know that the stream is fine, so indicate success.
            conn->indicateSuccess();

            signalWorkAvailable();

            return;
        }

        // We can't release the AsyncOp until we know we were not canceled.
        auto ownedOp = conn->releaseAsyncOp();
        op = ownedOp.get();

        // This AsyncOp may be recycled. We expect timeout and canceled to be clean.
        // If this op was most recently used to connect, its state transitions won't have been
        // reset, so we do that here.
        MONGO_ASIO_INVARIANT_INLOCK(!op->canceled(), "AsyncOp has dirty canceled flag", op);
        MONGO_ASIO_INVARIANT_INLOCK(!op->timedOut(), "AsyncOp has dirty timeout flag", op);
        op->clearStateTransitions();

        // Now that we're inProgress, an external cancel can touch our op, but
        // not until we release the inProgressMutex.
        _inProgress.emplace(op, std::move(ownedOp));

        op->_cbHandle = std::move(cbHandle);
        op->_request = std::move(request);
        op->_onFinish = std::move(onFinish);
        op->_connectionPoolHandle = std::move(swConn.getValue());
        op->startProgress(getConnectionStartTime);

        // This ditches the lock and gets us onto the strand (so we're
        // threadsafe)
        op->_strand.post([this, op, getConnectionStartTime] {
            const auto timeout = op->_request.timeout;

            // Set timeout now that we have the correct request object
            if (timeout != RemoteCommandRequest::kNoTimeout) {
                // Subtract the time it took to get the connection from the pool from the request
                // timeout.
                auto getConnectionDuration = now() - getConnectionStartTime;
                if (getConnectionDuration >= timeout) {
                    // We only assume that the request timer is guaranteed to fire *after* the
                    // timeout duration - but make no stronger assumption. It is thus possible that
                    // we have already exceeded the timeout. In this case we timeout the operation
                    // manually.
                    std::stringstream msg;
                    msg << "Remote command timed out while waiting to get a connection from the "
                        << "pool, took " << getConnectionDuration << ", timeout was set to "
                        << timeout;
                    auto rs = ResponseStatus(ErrorCodes::NetworkInterfaceExceededTimeLimit,
                                             msg.str(),
                                             getConnectionDuration);
                    return _completeOperation(op, rs);
                }

                // The above conditional guarantees that the adjusted timeout will never underflow.
                MONGO_ASIO_INVARIANT(timeout > getConnectionDuration, "timeout underflowed", op);
                const auto adjustedTimeout = timeout - getConnectionDuration;
                const auto requestId = op->_request.id;

                try {
                    op->_timeoutAlarm =
                        op->_owner->_timerFactory->make(&op->_strand, adjustedTimeout);
                } catch (std::system_error& e) {
                    severe() << "Failed to construct timer for AsyncOp: " << e.what();
                    fassertFailed(40334);
                }

                std::shared_ptr<AsyncOp::AccessControl> access;
                std::size_t generation;
                {
                    stdx::lock_guard<stdx::mutex> lk(op->_access->mutex);
                    access = op->_access;
                    generation = access->id;
                }

                op->_timeoutAlarm->asyncWait(
                    [this, op, access, generation, requestId, adjustedTimeout](std::error_code ec) {
                        // We must pass a check for safe access before using op inside the
                        // callback or we may attempt access on an invalid pointer.
                        stdx::lock_guard<stdx::mutex> lk(access->mutex);
                        if (generation != access->id) {
                            // The operation has been cleaned up, do not access.
                            return;
                        }

                        if (!ec) {
                            LOG(2) << "Request " << requestId << " timed out"
                                   << ", adjusted timeout after getting connection from pool was "
                                   << adjustedTimeout << ", op was " << redact(op->toString());

                            op->timeOut_inlock();
                        } else {
                            LOG(2) << "Failed to time request " << requestId
                                   << "out: " << ec.message() << ", op was "
                                   << redact(op->toString());
                        }
                    });
            }

            _beginCommunication(op);
        });
    };

    _connectionPool.get(request.target, request.timeout, nextStep);
    return Status::OK();
}
예제 #2
0
void NetworkInterfaceASIO::startCommand(const TaskExecutor::CallbackHandle& cbHandle,
                                        const RemoteCommandRequest& request,
                                        const RemoteCommandCompletionFn& onFinish) {
    invariant(onFinish);
    {
        stdx::lock_guard<stdx::mutex> lk(_inProgressMutex);
        const auto insertResult = _inGetConnection.emplace(cbHandle);
        // We should never see the same CallbackHandle added twice
        invariant(insertResult.second);
    }

    LOG(2) << "startCommand: " << request.toString();

    auto startTime = now();

    auto nextStep = [this, startTime, cbHandle, request, onFinish](
        StatusWith<ConnectionPool::ConnectionHandle> swConn) {

        if (!swConn.isOK()) {
            LOG(2) << "Failed to get connection from pool: " << swConn.getStatus();

            bool wasPreviouslyCanceled = false;
            {
                stdx::lock_guard<stdx::mutex> lk(_inProgressMutex);
                wasPreviouslyCanceled = _inGetConnection.erase(cbHandle) == 0;
            }

            onFinish(wasPreviouslyCanceled
                         ? Status(ErrorCodes::CallbackCanceled, "Callback canceled")
                         : swConn.getStatus());
            signalWorkAvailable();
            return;
        }

        auto conn = static_cast<connection_pool_asio::ASIOConnection*>(swConn.getValue().get());

        AsyncOp* op = nullptr;

        stdx::unique_lock<stdx::mutex> lk(_inProgressMutex);

        const auto eraseCount = _inGetConnection.erase(cbHandle);

        // If we didn't find the request, we've been canceled
        if (eraseCount == 0) {
            lk.unlock();

            onFinish({ErrorCodes::CallbackCanceled, "Callback canceled"});

            // Though we were canceled, we know that the stream is fine, so indicate success.
            conn->indicateSuccess();

            signalWorkAvailable();

            return;
        }

        // We can't release the AsyncOp until we know we were not canceled.
        auto ownedOp = conn->releaseAsyncOp();
        op = ownedOp.get();

        // Sanity check that we are getting a clean AsyncOp.
        invariant(!op->canceled());
        invariant(!op->timedOut());

        // Now that we're inProgress, an external cancel can touch our op, but
        // not until we release the inProgressMutex.
        _inProgress.emplace(op, std::move(ownedOp));

        op->_cbHandle = std::move(cbHandle);
        op->_request = std::move(request);
        op->_onFinish = std::move(onFinish);
        op->_connectionPoolHandle = std::move(swConn.getValue());
        op->_start = startTime;

        // This ditches the lock and gets us onto the strand (so we're
        // threadsafe)
        op->_strand.post([this, op] {
            // Set timeout now that we have the correct request object
            if (op->_request.timeout != RemoteCommandRequest::kNoTimeout) {
                op->_timeoutAlarm =
                    op->_owner->_timerFactory->make(&op->_strand, op->_request.timeout);

                std::shared_ptr<AsyncOp::AccessControl> access;
                std::size_t generation;
                {
                    stdx::lock_guard<stdx::mutex> lk(op->_access->mutex);
                    access = op->_access;
                    generation = access->id;
                }

                op->_timeoutAlarm->asyncWait([this, op, access, generation](std::error_code ec) {
                    if (!ec) {
                        // We must pass a check for safe access before using op inside the
                        // callback or we may attempt access on an invalid pointer.
                        stdx::lock_guard<stdx::mutex> lk(access->mutex);
                        if (generation != access->id) {
                            // The operation has been cleaned up, do not access.
                            return;
                        }

                        LOG(2) << "Operation timed out: " << op->request().toString();

                        // An operation may be in mid-flight when it times out, so we
                        // cancel any in-progress async calls but do not complete the operation now.
                        op->_timedOut = 1;
                        if (op->_connection) {
                            op->_connection->cancel();
                        }
                    } else {
                        LOG(4) << "failed to time operation out: " << ec.message();
                    }
                });
            }

            _beginCommunication(op);
        });
    };

    // TODO: thread some higher level timeout through, rather than 5 minutes,
    // once we make timeouts pervasive in this api.
    _connectionPool.get(request.target, Minutes(5), nextStep);
}