Exemple #1
0
static unsigned uct_tcp_ep_send(uct_tcp_ep_t *ep)
{
    uct_tcp_iface_t *iface = ucs_derived_of(ep->super.super.iface, uct_tcp_iface_t);
    size_t send_length;
    ucs_status_t status;

    send_length = ep->length - ep->offset;
    ucs_assert(send_length > 0);

    status = uct_tcp_send(ep->fd, ep->buf + ep->offset, &send_length);
    if (status < 0) {
        return 0;
    }

    ucs_trace_data("tcp_ep %p: sent %zu bytes", ep, send_length);

    iface->outstanding -= send_length;
    ep->offset         += send_length;
    if (ep->offset == ep->length) {
        ep->offset = 0;
        ep->length = 0;
    }

    return send_length > 0;
}
Exemple #2
0
/* send a signal to remote interface using Unix-domain socket */
static ucs_status_t
uct_mm_ep_signal_remote(uct_mm_ep_t *ep, uct_mm_iface_conn_signal_t sig)
{
    uct_mm_iface_t *iface = ucs_derived_of(ep->super.super.iface, uct_mm_iface_t);
    int ret;

    /**
     * Send connect message to remote interface
     */
    for (;;) {
        ret = sendto(iface->signal_fd, &sig, sizeof(sig), 0,
                     (const struct sockaddr*)&ep->cached_signal_sockaddr,
                     ep->cached_signal_addrlen);
        if (ret >= 0) {
            ucs_assert(ret == sizeof(sig));
            return UCS_OK;
        } else if (errno == EAGAIN) {
            /* If sending a signal has failed, retry.
             * Note the by default the receiver might have a limited backlog,
             * on Linux systems it is net.unix.max_dgram_qlen (10 by default).
             */
            uct_mm_iface_recv_messages(iface);
        } else {
            if ((sig == UCT_MM_IFACE_SIGNAL_DISCONNECT) && (errno == ECONNREFUSED)) {
                ucs_debug("failed to send disconnect signal: connection refused");
            } else {
                ucs_error("failed to send %sconnect signal: %m",
                          (sig == UCT_MM_IFACE_SIGNAL_CONNECT) ? "" : "dis");
            }
            return UCS_ERR_IO_ERROR;
        }
    }
}
Exemple #3
0
/**
 * Register the memory on all PDs, except maybe for alloc_pd.
 * In case alloc_pd != NULL, alloc_pd_memh will hold the memory key obtained from
 * allocation. It will be put in the array of keys in the proper index.
 */
static ucs_status_t ucp_memh_reg_pds(ucp_context_h context, ucp_mem_h memh,
                                     uct_mem_h alloc_pd_memh)
{
    uct_mem_h dummy_pd_memh;
    unsigned uct_memh_count;
    ucs_status_t status;
    unsigned pd_index;

    memh->pd_map   = 0;
    uct_memh_count = 0;

    /* Register on all transports (except the one we used to allocate) */
    for (pd_index = 0; pd_index < context->num_pds; ++pd_index) {
        if (context->pds[pd_index] == memh->alloc_pd) {
            /* Add the memory handle we got from allocation */
            ucs_assert(memh->alloc_method == UCT_ALLOC_METHOD_PD);
            memh->pd_map |= UCS_BIT(pd_index);
            memh->uct[uct_memh_count++] = alloc_pd_memh;
        } else if (context->pd_attrs[pd_index].cap.flags & UCT_PD_FLAG_REG) {
            /* If the PD supports registration, register on it as well */
            status = uct_pd_mem_reg(context->pds[pd_index], memh->address,
                                    memh->length, &memh->uct[uct_memh_count]);
            if (status != UCS_OK) {
                ucp_memh_dereg_pds(context, memh, &dummy_pd_memh);
                return status;
            }

            memh->pd_map |= UCS_BIT(pd_index);
            ++uct_memh_count;
        }
    }
    return UCS_OK;
}
Exemple #4
0
ucs_status_t uct_rc_verbs_ep_get_bcopy(uct_ep_h tl_ep,
                                       uct_unpack_callback_t unpack_cb,
                                       void *arg, size_t length,
                                       uint64_t remote_addr, uct_rkey_t rkey,
                                       uct_completion_t *comp)
{
    uct_rc_verbs_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_rc_verbs_iface_t);
    uct_rc_verbs_ep_t *ep = ucs_derived_of(tl_ep, uct_rc_verbs_ep_t);
    uct_rc_iface_send_desc_t *desc;
    struct ibv_send_wr wr;
    struct ibv_sge sge;

    UCT_CHECK_LENGTH(length, iface->super.super.config.seg_size, "get_bcopy");
    UCT_RC_VERBS_CHECK_RES(iface, ep);
    UCT_RC_IFACE_GET_TX_DESC(&iface->super, iface->super.tx.mp, desc);

    ucs_assert(length <= iface->super.super.config.seg_size);

    desc->super.handler     = (comp == NULL) ?
                                uct_rc_ep_get_bcopy_handler_no_completion :
                                uct_rc_ep_get_bcopy_handler;
    desc->super.unpack_arg  = arg;
    desc->super.user_comp   = comp;
    desc->super.length      = length;
    desc->unpack_cb         = unpack_cb;

    uct_rc_verbs_fill_rdma_wr(&wr, IBV_WR_RDMA_READ, &sge, length, remote_addr,
                              rkey);

    UCT_TL_EP_STAT_OP(&ep->super.super, GET, BCOPY, length);
    uct_rc_verbs_ep_post_send_desc(ep, &wr, desc, IBV_SEND_SIGNALED);
    return UCS_INPROGRESS;
}
Exemple #5
0
static ucs_status_t uct_knem_rkey_release(uct_md_component_t *mdc, uct_rkey_t rkey,
                                          void *handle)
{
    ucs_assert(NULL == handle);
    ucs_free((void *)rkey);
    return UCS_OK;
}
Exemple #6
0
static size_t ucp_tag_pack_eager_first_generic(void *dest, void *arg)
{
    ucp_eager_first_hdr_t *hdr = dest;
    ucp_request_t *req = arg;
    size_t max_length, length;

    ucs_assert(req->send.state.offset == 0);

    max_length           = ucp_ep_config(req->send.ep)->max_am_bcopy - sizeof(*hdr);
    hdr->super.super.tag = req->send.tag;
    hdr->total_len       = req->send.length;

    ucs_assert(req->send.length > max_length);
    length = ucp_request_generic_dt_pack(req, hdr + 1, max_length);
    return sizeof(*hdr) + length;
}
Exemple #7
0
ucs_status_t uct_rc_verbs_ep_fc_ctrl(uct_rc_ep_t *rc_ep)
{
    uct_rc_verbs_iface_t *iface = ucs_derived_of(rc_ep->super.super.iface,
                                                 uct_rc_verbs_iface_t);
    uct_rc_verbs_ep_t *ep = ucs_derived_of(rc_ep, uct_rc_verbs_ep_t);
    uct_rc_hdr_t hdr;
    struct ibv_send_wr fc_wr;

    /* Do not check FC WND here to avoid head-to-head deadlock.
     * Credits grant should be sent regardless of FC wnd state. */
    ucs_assert(sizeof(hdr) <= iface->verbs_common.config.max_inline);
    UCT_RC_CHECK_RES(&iface->super, &ep->super);

    hdr.am_id                     = UCT_RC_EP_FC_PURE_GRANT;

    fc_wr.sg_list                 = iface->verbs_common.inl_sge;
    fc_wr.num_sge                 = 1;
    fc_wr.opcode                  = IBV_WR_SEND;
    fc_wr.next                    = NULL;

    iface->verbs_common.inl_sge[0].addr        = (uintptr_t)&hdr;
    iface->verbs_common.inl_sge[0].length      = sizeof(hdr);

    uct_rc_verbs_ep_post_send(iface, ep, &fc_wr, IBV_SEND_INLINE);
    return UCS_OK;
}
Exemple #8
0
static size_t ucp_tag_rndv_pack_rkey(ucp_request_t *sreq,
                                     ucp_rndv_rts_hdr_t *rndv_rts_hdr)
{
    ucp_ep_h ep = sreq->send.ep;
    size_t packed_rkey = 0;
    ucs_status_t status;

    ucs_assert(UCP_DT_IS_CONTIG(sreq->send.datatype));

    /* Check if the sender needs to register the send buffer -
     * is its datatype contiguous and does the receive side need it */
    if ((ucp_ep_is_rndv_lane_present(ep)) &&
        (ucp_ep_rndv_md_flags(ep) & UCT_MD_FLAG_NEED_RKEY)) {
        status = ucp_request_send_buffer_reg(sreq, ucp_ep_get_rndv_get_lane(ep));
        ucs_assert_always(status == UCS_OK);

        /* if the send buffer was registered, send the rkey */
        UCS_PROFILE_CALL(uct_md_mkey_pack, ucp_ep_md(ep, ucp_ep_get_rndv_get_lane(ep)),
                         sreq->send.state.dt.contig.memh, rndv_rts_hdr + 1);
        rndv_rts_hdr->flags |= UCP_RNDV_RTS_FLAG_PACKED_RKEY;
        packed_rkey = ucp_ep_md_attr(ep, ucp_ep_get_rndv_get_lane(ep))->rkey_packed_size;
    }

    return packed_rkey;
}
Exemple #9
0
static size_t ucp_tag_pack_eager_sync_only_dt(void *dest, void *arg)
{
    ucp_eager_sync_hdr_t *hdr = dest;
    ucp_request_t *req = arg;
    size_t length;

    hdr->super.super.tag = req->send.tag.tag;
    hdr->req.sender_uuid = req->send.ep->worker->uuid;
    hdr->req.reqptr      = (uintptr_t)req;

    ucs_assert(req->send.state.dt.offset == 0);
    length = ucp_dt_pack(req->send.datatype, hdr + 1, req->send.buffer,
                         &req->send.state.dt, req->send.length);
    ucs_assert(length == req->send.length);
    return sizeof(*hdr) + length;
}
Exemple #10
0
static UCS_F_ALWAYS_INLINE 
ucs_status_t uct_ud_mlx5_iface_poll_rx(uct_ud_mlx5_iface_t *iface)
{
    struct mlx5_cqe64 *cqe;
    uint16_t ci;
    uct_ib_iface_recv_desc_t *desc;
    uint32_t len;
    void *packet;
    ucs_status_t status;

    ci     = iface->rx.wq.cq_wqe_counter & iface->rx.wq.mask;
    packet = (void *)ntohll(iface->rx.wq.wqes[ci].addr);
    ucs_prefetch(packet + UCT_IB_GRH_LEN);
    desc   = (uct_ib_iface_recv_desc_t *)(packet - iface->super.super.config.rx_hdr_offset);

    cqe = uct_ib_mlx5_get_cqe(&iface->rx.cq, UCT_IB_MLX5_CQE64_SIZE_LOG);
    if (cqe == NULL) {
        status = UCS_ERR_NO_PROGRESS;
        goto out;
    }
    uct_ib_mlx5_log_cqe(cqe);
    ucs_assert(0 == (cqe->op_own & 
               (MLX5_INLINE_SCATTER_32|MLX5_INLINE_SCATTER_64)));
    ucs_assert(ntohs(cqe->wqe_counter) == iface->rx.wq.cq_wqe_counter);

    iface->super.rx.available++;
    iface->rx.wq.cq_wqe_counter++;

    len = ntohl(cqe->byte_cnt);
    VALGRIND_MAKE_MEM_DEFINED(packet, len);

    uct_ud_ep_process_rx(&iface->super,
                         (uct_ud_neth_t *)(packet + UCT_IB_GRH_LEN),
                         len - UCT_IB_GRH_LEN,
                         (uct_ud_recv_skb_t *)desc);
    status = UCS_OK;

out:
    if (iface->super.rx.available >= iface->super.config.rx_max_batch) {
        /* we need to try to post buffers always. Otherwise it is possible
         * to run out of rx wqes if receiver is slow and there are always
         * cqe to process
         */
        uct_ud_mlx5_iface_post_recv(iface);
    }
    return status;
}
Exemple #11
0
static ucs_status_t get_nic_address(uct_ugni_device_t *dev_p)
{
    int             alps_addr = -1;
    int             alps_dev_id = -1;
    int             i;
    char           *token, *pmi_env;

    pmi_env = getenv("PMI_GNI_DEV_ID");
    if (NULL == pmi_env) {
        gni_return_t ugni_rc;
        ugni_rc = GNI_CdmGetNicAddress(dev_p->device_id, &dev_p->address,
                                       &dev_p->cpu_id);
        if (GNI_RC_SUCCESS != ugni_rc) {
            ucs_error("GNI_CdmGetNicAddress failed, device %d, Error status: %s %d",
                      dev_p->device_id, gni_err_str[ugni_rc], ugni_rc);
            return UCS_ERR_NO_DEVICE;
        }
        CPU_SET(dev_p->cpu_id, &(dev_p->cpu_mask));
        ucs_debug("(GNI) NIC address: %d", dev_p->address);
    } else {
        while ((token = strtok(pmi_env, ":")) != NULL) {
            alps_dev_id = atoi(token);
            if (alps_dev_id == dev_p->device_id) {
                break;
            }
            pmi_env = NULL;
        }
        ucs_assert(alps_dev_id != -1);

        pmi_env = getenv("PMI_GNI_LOC_ADDR");
        ucs_assert(NULL != pmi_env);
        i = 0;
        while ((token = strtok(pmi_env, ":")) != NULL) {
            if (i == alps_dev_id) {
                alps_addr = atoi(token);
                break;
            }
            pmi_env = NULL;
            ++i;
        }
        ucs_assert(alps_addr != -1);
        dev_p->address = alps_addr;
        ucs_debug("(PMI) NIC address: %d", dev_p->address);
    }
    return UCS_OK;
}
Exemple #12
0
static ucs_status_t
uct_ud_mlx5_ep_am_short(uct_ep_h tl_ep, uint8_t id, uint64_t hdr,
                        const void *buffer, unsigned length)
{
    uct_ud_mlx5_ep_t *ep = ucs_derived_of(tl_ep, uct_ud_mlx5_ep_t);
    uct_ud_mlx5_iface_t *iface = ucs_derived_of(tl_ep->iface,
                                                uct_ud_mlx5_iface_t);
    struct mlx5_wqe_ctrl_seg *ctrl;
    struct mlx5_wqe_inl_data_seg *inl;
    uct_ud_am_short_hdr_t *am;
    uct_ud_neth_t *neth;
    unsigned wqe_size;
    uct_ud_send_skb_t *skb;

    /* data a written directly into tx wqe, so it is impossible to use
     * common ud am code
     */
    UCT_CHECK_AM_ID(id);
    UCT_CHECK_LENGTH(sizeof(uct_ud_neth_t) + sizeof(hdr) + length,
                    iface->super.config.max_inline, "am_short");

    uct_ud_enter(&iface->super);
    uct_ud_iface_progress_pending_tx(&iface->super);
    skb = uct_ud_ep_get_tx_skb(&iface->super, &ep->super);
    if (!skb) {
        uct_ud_leave(&iface->super);
        return UCS_ERR_NO_RESOURCE;
    }

    ctrl = iface->tx.wq.curr;
    /* Set inline segment which has AM id, AM header, and AM payload */
    inl = uct_ib_mlx5_get_next_seg(&iface->tx.wq, ctrl, UCT_UD_MLX5_WQE_SIZE);
    wqe_size = length + sizeof(*am) + sizeof(*neth);
    inl->byte_count = htonl(wqe_size | MLX5_INLINE_SEG);

    /* assume that neth and am header fit into one bb */
    ucs_assert(sizeof(*am) + sizeof(*neth) < MLX5_SEND_WQE_BB);
    neth = (void*)(inl + 1);
    uct_ud_am_set_neth(neth, &ep->super, id);

    am      = (void*)(neth + 1);
    am->hdr = hdr;
    uct_ib_mlx5_inline_copy(am + 1, buffer, length, &iface->tx.wq);

    wqe_size += UCT_UD_MLX5_WQE_SIZE + sizeof(*inl);
    UCT_CHECK_LENGTH(wqe_size, UCT_IB_MLX5_MAX_BB * MLX5_SEND_WQE_BB,
                     "am_short");
    UCT_UD_EP_HOOK_CALL_TX(&ep->super, neth);
    uct_ud_mlx5_post_send(iface, ep, ctrl, wqe_size);

    skb->len = sizeof(*neth) + sizeof(*am);
    memcpy(skb->neth, neth, skb->len); 
    uct_ud_iface_complete_tx_inl(&iface->super, &ep->super, skb,
                                 (char *)skb->neth + skb->len, buffer, length);
    UCT_TL_EP_STAT_OP(&ep->super.super, AM, SHORT, sizeof(hdr) + length);
    uct_ud_leave(&iface->super);
    return UCS_OK;
}
Exemple #13
0
/* Pack a string and return a pointer to storage right after the string */
static void* ucp_address_pack_string(const char *s, void *dest)
{
    size_t length = strlen(s);

    ucs_assert(length <= UINT8_MAX);
    *(uint8_t*)dest = length;
    memcpy(dest + 1, s, length);
    return dest + 1 + length;
}
Exemple #14
0
/* send a signal to remote interface using Unix-domain socket */
static ucs_status_t
uct_mm_ep_signal_remote(uct_mm_ep_t *ep, uct_mm_iface_conn_signal_t sig)
{
    uct_mm_iface_t *iface = ucs_derived_of(ep->super.super.iface, uct_mm_iface_t);
    int ret;

    /**
     * Send connect message to remote interface
     */
    ret = sendto(iface->signal_fd, &sig, sizeof(sig), 0,
                 (const struct sockaddr*)&ep->cached_signal_sockaddr,
                 ep->cached_signal_addrlen);
    if (ret >= 0) {
        ucs_assert(ret == sizeof(sig));
        ucs_debug("Sent connect from socket %d to %p", iface->signal_fd,
                  (const struct sockaddr*)&ep->cached_signal_sockaddr);

        if (ep->cbq_elem_on) {
            uct_mm_ep_remove_slow_path_callback(iface, ep);
        }

        /* point the ep->fifo_ctl to the remote fifo */
        uct_mm_ep_connected(ep);

        return UCS_OK;
    } else if (errno == EAGAIN) {
        /* If sending a signal has failed, retry.
         * Note that by default the receiver might have a limited backlog,
         * on Linux systems it is net.unix.max_dgram_qlen (10 by default).
         */
        ucs_debug("Failed to send connect from socket %d to %p", iface->signal_fd,
                  (const struct sockaddr*)&ep->cached_signal_sockaddr);

        /* If sending the Connect message failed with EAGAIN, try again later.
         * Don't keep trying now in a loop since this may cause a deadlock which
         * prevents the reading of incoming messages which blocks the remote sender.
         * Add the sending attempt as a callback to a slow progress.
         */
        if ((!ep->cbq_elem_on) && (sig == UCT_MM_IFACE_SIGNAL_CONNECT)) {
             ep->cbq_elem.cb = uct_mm_ep_signal_remote_slow_path_callback;
             uct_worker_slowpath_progress_register(iface->super.worker, &ep->cbq_elem);
             ep->cbq_elem_on = 1;
        }

        /* Return UCS_OK in this case even though couldn't send, so that the
         * calling flow would release the lock and allow the reading of incoming
         * Connect messages. */
        return UCS_OK;
    } else {
        if (errno == ECONNREFUSED) {
            ucs_debug("failed to send connect signal: connection refused");
        } else {
            ucs_error("failed to send connect signal: %m");
        }
        return UCS_ERR_IO_ERROR;
    }
}
Exemple #15
0
static size_t ucp_tag_pack_eager_first_dt(void *dest, void *arg)
{
    ucp_eager_first_hdr_t *hdr = dest;
    ucp_request_t *req = arg;
    size_t length;

    length               = ucp_ep_config(req->send.ep)->am.max_bcopy -
                                         sizeof(*hdr);
    hdr->super.super.tag = req->send.tag;
    hdr->total_len       = req->send.length;

    ucs_debug("pack eager_first paylen %zu", length);
    ucs_assert(req->send.state.offset == 0);
    ucs_assert(req->send.length > length);
    return sizeof(*hdr) + ucp_dt_pack(req->send.datatype, hdr + 1,
                                      req->send.buffer, &req->send.state,
                                      length);
}
Exemple #16
0
static size_t ucp_tag_pack_eager_sync_first_contig(void *dest, void *arg)
{
    ucp_eager_sync_first_hdr_t *hdr = dest;
    ucp_request_t *req = arg;
    size_t length;

    length                     = ucp_ep_config(req->send.ep)->max_am_bcopy - sizeof(*hdr);
    hdr->super.super.super.tag = req->send.tag;
    hdr->super.total_len       = req->send.length;
    hdr->req.sender_uuid       = req->send.ep->worker->uuid;
    hdr->req.reqptr            = (uintptr_t)req;

    ucs_debug("pack eager_sync_first paylen %zu", length);
    ucs_assert(req->send.state.offset == 0);
    ucs_assert(req->send.length > length);
    memcpy(hdr + 1, req->send.buffer, length);
    return sizeof(*hdr) + length;
}
Exemple #17
0
unsigned uct_tcp_ep_progress_rx(uct_tcp_ep_t *ep)
{
    uct_tcp_iface_t *iface = ucs_derived_of(ep->super.super.iface,
                                            uct_tcp_iface_t);
    uct_tcp_am_hdr_t *hdr;
    ucs_status_t status;
    size_t recv_length;
    ssize_t remainder;

    ucs_trace_func("ep=%p", ep);

    /* Receive next chunk of data */
    recv_length = iface->config.buf_size - ep->length;
    status = uct_tcp_recv(ep->fd, ep->buf + ep->length, &recv_length);
    if (status != UCS_OK) {
        if (status == UCS_ERR_CANCELED) {
            ucs_debug("tcp_ep %p: remote disconnected", ep);
            uct_tcp_ep_mod_events(ep, 0, EPOLLIN);
            uct_tcp_ep_destroy(&ep->super.super);
        }
        return 0;
    }

    ep->length += recv_length;
    ucs_trace_data("tcp_ep %p: recvd %zu bytes", ep, recv_length);

    /* Parse received active messages */
    while ((remainder = ep->length - ep->offset) >= sizeof(*hdr)) {
        hdr = ep->buf + ep->offset;
        if (remainder < sizeof(*hdr) + hdr->length) {
            break;
        }

        /* Full message was received */
        ep->offset += sizeof(*hdr) + hdr->length;

        if (hdr->am_id >= UCT_AM_ID_MAX) {
            ucs_error("invalid am id: %d", hdr->am_id);
            continue;
        }

        uct_iface_trace_am(&iface->super, UCT_AM_TRACE_TYPE_RECV, hdr->am_id,
                           hdr + 1, hdr->length, "RECV fd %d", ep->fd);
        uct_iface_invoke_am(&iface->super, hdr->am_id, hdr + 1,
                            hdr->length, 0);
    }

    /* Move the remaining data to the beginning of the buffer
     * TODO avoid extra copy on partial receive
     */
    ucs_assert(remainder >= 0);
    memmove(ep->buf, ep->buf + ep->offset, remainder);
    ep->offset = 0;
    ep->length = remainder;

    return recv_length > 0;
}
Exemple #18
0
ucs_status_t ucp_request_check_status(void *request)
{
    ucp_request_t *req = (ucp_request_t*)request - 1;

    if (req->flags & UCP_REQUEST_FLAG_COMPLETED) {
        ucs_assert(req->status != UCS_INPROGRESS);
        return req->status;
    }
    return UCS_INPROGRESS;
}
Exemple #19
0
/* Unpack a string and return pointer to next storage byte */
static const void* ucp_address_unpack_string(const void *src, char *s, size_t max)
{
    size_t length, avail;

    ucs_assert(max >= 1);
    length   = *(const uint8_t*)src;
    avail    = ucs_min(length, max - 1);
    memcpy(s, src + 1, avail);
    s[avail]  = '\0';
    return src + length + 1;
}
Exemple #20
0
static ucs_status_t 
uct_ud_mlx5_ep_put_short(uct_ep_h tl_ep, const void *buffer, unsigned length,
                         uint64_t remote_addr, uct_rkey_t rkey)
{
    uct_ud_mlx5_ep_t *ep = ucs_derived_of(tl_ep, uct_ud_mlx5_ep_t);
    uct_ud_mlx5_iface_t *iface = ucs_derived_of(tl_ep->iface,
                                                uct_ud_mlx5_iface_t);
    struct mlx5_wqe_ctrl_seg *ctrl;
    struct mlx5_wqe_inl_data_seg *inl;
    unsigned wqe_size;
    uct_ud_put_hdr_t *put_hdr;
    uct_ud_neth_t *neth;
    uct_ud_send_skb_t *skb;

    uct_ud_enter(&iface->super);
    uct_ud_iface_progress_pending_tx(&iface->super);
    skb = uct_ud_ep_get_tx_skb(&iface->super, &ep->super);
    if (!skb) {
        uct_ud_leave(&iface->super);
        return UCS_ERR_NO_RESOURCE;
    }

    ctrl = iface->tx.wq.curr;
    /* Set inline segment which has AM id, AM header, and AM payload */
    inl = uct_ib_mlx5_get_next_seg(&iface->tx.wq, ctrl, UCT_UD_MLX5_WQE_SIZE);
    wqe_size = length + sizeof(*put_hdr) + sizeof(*neth);
    inl->byte_count = htonl(wqe_size | MLX5_INLINE_SEG);

    /* assume that neth and am header fit into one bb */
    ucs_assert(sizeof(*put_hdr) + sizeof(*neth) < MLX5_SEND_WQE_BB);
    neth = (void*)(inl + 1);
    uct_ud_neth_init_data(&ep->super, neth);
    uct_ud_neth_set_type_put(&ep->super, neth);
    uct_ud_neth_ack_req(&ep->super, neth);

    put_hdr = (uct_ud_put_hdr_t *)(neth+1);
    put_hdr->rva = remote_addr;

    uct_ib_mlx5_inline_copy(put_hdr + 1, buffer, length, &iface->tx.wq);

    wqe_size += UCT_UD_MLX5_WQE_SIZE + sizeof(*inl);
    UCT_CHECK_LENGTH(wqe_size, UCT_IB_MLX5_MAX_BB * MLX5_SEND_WQE_BB,
                     "put_short");
    UCT_UD_EP_HOOK_CALL_TX(&ep->super, neth);
    uct_ud_mlx5_post_send(iface, ep, ctrl, wqe_size);

    skb->len = sizeof(*neth) + sizeof(*put_hdr);
    memcpy(skb->neth, neth, skb->len); 
    uct_ud_iface_complete_tx_inl(&iface->super, &ep->super, skb,
                                 (char *)skb->neth + skb->len, buffer, length);
    UCT_TL_EP_STAT_OP(&ep->super.super, PUT, SHORT, length);
    uct_ud_leave(&iface->super);
    return UCS_OK;
}
Exemple #21
0
static UCS_F_ALWAYS_INLINE ucs_status_ptr_t
ucp_tag_send_req(ucp_request_t *req, size_t count,
                 const ucp_ep_msg_config_t* msg_config,
                 size_t rndv_rma_thresh, size_t rndv_am_thresh,
                 ucp_send_callback_t cb, const ucp_proto_t *proto)
{
    size_t seg_size     = (msg_config->max_bcopy - proto->only_hdr_size);
    size_t rndv_thresh  = ucp_tag_get_rndv_threshold(req, count,
                                                     msg_config->max_iov,
                                                     rndv_rma_thresh,
                                                     rndv_am_thresh, seg_size);
    size_t zcopy_thresh = ucp_proto_get_zcopy_threshold(req, msg_config, count,
                                                        rndv_thresh);
    ssize_t max_short   = ucp_proto_get_short_max(req, msg_config);
    ucs_status_t status;

    ucs_trace_req("select tag request(%p) progress algorithm datatype=%lx "
                  "buffer=%p length=%zu max_short=%zd rndv_thresh=%zu "
                  "zcopy_thresh=%zu",
                  req, req->send.datatype, req->send.buffer, req->send.length,
                  max_short, rndv_thresh, zcopy_thresh);

    status = ucp_request_send_start(req, max_short, zcopy_thresh, seg_size,
                                    rndv_thresh, proto);
    if (ucs_unlikely(status != UCS_OK)) {
        if (status == UCS_ERR_NO_PROGRESS) {
             ucs_assert(req->send.length >= rndv_thresh);
            /* RMA/AM rendezvous */
            status = ucp_tag_send_start_rndv(req);
        }
        if (status != UCS_OK) {
            return UCS_STATUS_PTR(status);
        }
    }

    ucp_request_send_tag_stat(req);

    /*
     * Start the request.
     * If it is completed immediately, release the request and return the status.
     * Otherwise, return the request.
     */
    status = ucp_request_send(req);
    if (req->flags & UCP_REQUEST_FLAG_COMPLETED) {
        ucs_trace_req("releasing send request %p, returning status %s", req,
                      ucs_status_string(status));
        ucp_request_put(req);
        return UCS_STATUS_PTR(status);
    }

    ucp_request_set_callback(req, send.cb, cb)
    ucs_trace_req("returning send request %p", req);
    return req + 1;
}
Exemple #22
0
static size_t ucp_tag_pack_eager_first_dt(void *dest, void *arg)
{
    ucp_eager_first_hdr_t *hdr = dest;
    ucp_request_t *req = arg;
    size_t length;

    ucs_assert(req->send.lane == ucp_ep_get_am_lane(req->send.ep));

    length               = ucp_ep_get_max_bcopy(req->send.ep, req->send.lane) -
                           sizeof(*hdr);
    hdr->super.super.tag = req->send.tag.tag;
    hdr->total_len       = req->send.length;
    hdr->msg_id          = req->send.tag.message_id;

    ucs_assert(req->send.state.dt.offset == 0);
    ucs_assert(req->send.length > length);
    return sizeof(*hdr) + ucp_dt_pack(req->send.datatype, hdr + 1,
                                      req->send.buffer, &req->send.state.dt,
                                      length); 
}
Exemple #23
0
void ucp_ep_destroy(ucp_ep_h ep)
{
    ucs_debug("destroy ep %p", ep);
    ucp_wireup_stop(ep);
    if (ep->state & UCP_EP_STATE_READY_TO_SEND) {
        ucp_ep_destroy_uct_ep_safe(ep, ep->uct_ep);
    } else {
        ucs_assert(ep->uct_ep == NULL);
    }
    ucs_free(ep);
}
Exemple #24
0
ucs_status_t ucp_stream_recv_request_test(void *request, size_t *length_p)
{
    ucp_request_t *req   = (ucp_request_t*)request - 1;
    ucs_status_t  status = ucp_request_check_status(request);

    if (status != UCS_INPROGRESS) {
        ucs_assert(req->flags & UCP_REQUEST_FLAG_STREAM_RECV);
        *length_p = req->recv.stream.length;
    }

    return status;
}
Exemple #25
0
ucs_status_t ucp_tag_recv_request_test(void *request, ucp_tag_recv_info_t *info)
{
    ucp_request_t *req   = (ucp_request_t*)request - 1;
    ucs_status_t  status = ucp_request_check_status(request);

    if (status != UCS_INPROGRESS) {
        ucs_assert(req->flags & UCP_REQUEST_FLAG_RECV);
        *info = req->recv.tag.info;
    }

    return status;
}
Exemple #26
0
static ucs_status_t uct_perf_test_alloc_mem(ucx_perf_context_t *perf,
                                            ucx_perf_params_t *params)
{
    ucs_status_t status;

    /* TODO use params->alignment  */

    status = uct_iface_mem_alloc(perf->uct.iface, 
                                 params->message_size * params->thread_count, 0,
                                 "perftest", &perf->uct.send_mem);
    if (status != UCS_OK) {
        ucs_error("Failed allocate send buffer: %s", ucs_status_string(status));
        goto err;
    }

    ucs_assert(perf->uct.send_mem.md == perf->uct.md);
    perf->send_buffer = perf->uct.send_mem.address;

    status = uct_iface_mem_alloc(perf->uct.iface, 
                                 params->message_size * params->thread_count, 0,
                                 "perftest", &perf->uct.recv_mem);
    if (status != UCS_OK) {
        ucs_error("Failed allocate receive buffer: %s", ucs_status_string(status));
        goto err_free_send;
    }

    ucs_assert(perf->uct.recv_mem.md == perf->uct.md);
    perf->recv_buffer = perf->uct.recv_mem.address;

    perf->offset = 0;

    ucs_debug("allocated memory. Send buffer %p, Recv buffer %p",
              perf->send_buffer, perf->recv_buffer);
    return UCS_OK;

err_free_send:
    uct_iface_mem_free(&perf->uct.send_mem);
err:
    return status;
}
Exemple #27
0
static inline ucs_status_t uct_knem_rma(uct_ep_h tl_ep, const void *buffer,
                                        size_t length,  uint64_t remote_addr,
                                        uct_knem_key_t *key, int write)
{
    struct knem_cmd_inline_copy icopy;
    struct knem_cmd_param_iovec knem_iov[1];
    uct_knem_iface_t *knem_iface = ucs_derived_of(tl_ep->iface, uct_knem_iface_t);
    int knem_fd = knem_iface->knem_pd->knem_fd;
    int rc;

    UCT_KNEM_ZERO_LENGTH_POST(length);

    knem_iov[0].base = (uintptr_t)buffer;
    knem_iov[0].len = length;

    icopy.local_iovec_array = (uintptr_t) &knem_iov[0];
    icopy.local_iovec_nr = 1;
    icopy.remote_cookie = key->cookie;
    ucs_assert(remote_addr >= key->address);
    icopy.remote_offset = remote_addr - key->address;

    icopy.write = write; /* if 0 then, READ from the remote region into my local segments
                          * if 1 then, WRITE to the remote region from my local segment */
    icopy.flags = 0;     /* TBD: add check and support for KNEM_FLAG_DMA */
    icopy.current_status = 0;
    icopy.async_status_index = 0;
    icopy.pad = 0;

    ucs_assert(knem_fd > -1);
    rc = ioctl(knem_fd, KNEM_CMD_INLINE_COPY, &icopy);
    if (rc < 0) {
        ucs_error("KNEM inline copy failed, err = %d %m", rc);
        return UCS_ERR_IO_ERROR;
    }

    uct_knem_trace_data(remote_addr, (uintptr_t)key, "%s [length %zu]",
                        write?"PUT_ZCOPY":"GET_ZCOPY",
                        length);
    return UCS_OK;
}
Exemple #28
0
ucs_status_t uct_tcp_ep_flush(uct_ep_h tl_ep, unsigned flags,
                              uct_completion_t *comp)
{
    uct_tcp_ep_t *ep = ucs_derived_of(tl_ep, uct_tcp_ep_t);

    if (!uct_tcp_ep_can_send(ep)) {
        return UCS_ERR_NO_RESOURCE;
    }

    ucs_assert(ucs_queue_is_empty(&ep->pending_q));
    UCT_TL_EP_STAT_FLUSH(&ep->super);
    return UCS_OK;
}
Exemple #29
0
static int ucp_tag_rndv_is_get_op_possible(ucp_ep_h ep, uct_rkey_t rkey)
{
    uint64_t md_flags;

    ucs_assert(!ucp_ep_is_stub(ep));
    if (ucp_ep_is_rndv_lane_present(ep)) {
        md_flags = ucp_ep_rndv_md_flags(ep);
        return (((rkey != UCT_INVALID_RKEY) && (md_flags & UCT_MD_FLAG_REG)) ||
                !(md_flags & UCT_MD_FLAG_NEED_RKEY));
    } else {
        return 0;
    }
}
Exemple #30
0
static void ucp_ep_flush_completion(uct_completion_t *self, ucs_status_t status)
{
    ucp_request_t *req = ucs_container_of(self, ucp_request_t, send.uct_comp);

    ucs_assert(!(req->flags & UCP_REQUEST_FLAG_COMPLETED));

    if (status == UCS_OK) {
        req->status = status;
    }

    ucp_ep_flush_progress(req);
    ucp_flush_check_completion(req);
}