示例#1
0
文件: mm_ep.c 项目: brminich/ucx
/* send a signal to remote interface using Unix-domain socket */
static ucs_status_t
uct_mm_ep_signal_remote(uct_mm_ep_t *ep, uct_mm_iface_conn_signal_t sig)
{
    uct_mm_iface_t *iface = ucs_derived_of(ep->super.super.iface, uct_mm_iface_t);
    int ret;

    /**
     * Send connect message to remote interface
     */
    ret = sendto(iface->signal_fd, &sig, sizeof(sig), 0,
                 (const struct sockaddr*)&ep->cached_signal_sockaddr,
                 ep->cached_signal_addrlen);
    if (ret >= 0) {
        ucs_assert(ret == sizeof(sig));
        ucs_debug("Sent connect from socket %d to %p", iface->signal_fd,
                  (const struct sockaddr*)&ep->cached_signal_sockaddr);

        if (ep->cbq_elem_on) {
            uct_mm_ep_remove_slow_path_callback(iface, ep);
        }

        /* point the ep->fifo_ctl to the remote fifo */
        uct_mm_ep_connected(ep);

        return UCS_OK;
    } else if (errno == EAGAIN) {
        /* If sending a signal has failed, retry.
         * Note that by default the receiver might have a limited backlog,
         * on Linux systems it is net.unix.max_dgram_qlen (10 by default).
         */
        ucs_debug("Failed to send connect from socket %d to %p", iface->signal_fd,
                  (const struct sockaddr*)&ep->cached_signal_sockaddr);

        /* If sending the Connect message failed with EAGAIN, try again later.
         * Don't keep trying now in a loop since this may cause a deadlock which
         * prevents the reading of incoming messages which blocks the remote sender.
         * Add the sending attempt as a callback to a slow progress.
         */
        if ((!ep->cbq_elem_on) && (sig == UCT_MM_IFACE_SIGNAL_CONNECT)) {
             ep->cbq_elem.cb = uct_mm_ep_signal_remote_slow_path_callback;
             uct_worker_slowpath_progress_register(iface->super.worker, &ep->cbq_elem);
             ep->cbq_elem_on = 1;
        }

        /* Return UCS_OK in this case even though couldn't send, so that the
         * calling flow would release the lock and allow the reading of incoming
         * Connect messages. */
        return UCS_OK;
    } else {
        if (errno == ECONNREFUSED) {
            ucs_debug("failed to send connect signal: connection refused");
        } else {
            ucs_error("failed to send connect signal: %m");
        }
        return UCS_ERR_IO_ERROR;
    }
}
示例#2
0
文件: ucp_ep.c 项目: alex--m/ucx
static int ucp_flush_check_completion(ucp_request_t *req)
{
    ucp_ep_h ep = req->send.ep;

    /* Check if flushed all lanes */
    if (req->send.uct_comp.count != 0) {
        return 0;
    }

    ucs_trace("adding slow-path callback to destroy ep %p", ep);
    ucp_ep_flush_slow_path_remove(req);
    req->send.flush.cbq_elem.cb = ucp_ep_flushed_slow_path_callback;
    req->send.flush.cbq_elem_on = 1;
    uct_worker_slowpath_progress_register(ep->worker->uct,
                                          &req->send.flush.cbq_elem);
    return 1;
}
示例#3
0
文件: ucp_ep.c 项目: alex--m/ucx
static ucs_status_t ucp_ep_flush_progress_pending(uct_pending_req_t *self)
{
    ucp_request_t *req = ucs_container_of(self, ucp_request_t, send.uct);
    ucp_lane_index_t lane = req->send.lane;
    ucp_ep_h ep = req->send.ep;
    ucs_status_t status;
    int completed;

    ucs_assert(!(req->flags & UCP_REQUEST_FLAG_COMPLETED));

    status = uct_ep_flush(ep->uct_eps[lane], 0, &req->send.uct_comp);
    ucs_trace("flushing ep %p lane[%d]: %s", ep, lane,
              ucs_status_string(status));
    if (status == UCS_OK) {
        --req->send.uct_comp.count; /* UCT endpoint is flushed */
    }

    /* since req->flush.pend.lane is still non-NULL, this function will not
     * put anything on pending.
     */
    ucp_ep_flush_progress(req);
    completed = ucp_flush_check_completion(req);

    /* If the operation has not completed, add slow-path progress to resume */
    if (!completed && req->send.flush.lanes && !req->send.flush.cbq_elem_on) {
        ucs_trace("ep %p: adding slow-path callback to resume flush", ep);
        req->send.flush.cbq_elem.cb = ucp_ep_flush_resume_slow_path_callback;
        req->send.flush.cbq_elem_on = 1;
        uct_worker_slowpath_progress_register(ep->worker->uct,
                                              &req->send.flush.cbq_elem);
    }

    if ((status == UCS_OK) || (status == UCS_INPROGRESS)) {
        req->send.lane = UCP_NULL_LANE;
        return UCS_OK;
    } else if (status == UCS_ERR_NO_RESOURCE) {
        return UCS_ERR_NO_RESOURCE;
    } else {
        ucp_ep_flush_error(req, status);
        return UCS_OK;
    }
}