ssize_t uct_rc_mlx5_ep_put_bcopy(uct_ep_h tl_ep, uct_pack_callback_t pack_cb, void *arg, uint64_t remote_addr, uct_rkey_t rkey) { uct_rc_mlx5_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_rc_mlx5_iface_t); uct_rc_mlx5_ep_t *ep = ucs_derived_of(tl_ep, uct_rc_mlx5_ep_t); uct_rc_iface_send_desc_t *desc; size_t length; UCT_RC_CHECK_RES(&iface->super, &ep->super); UCT_RC_IFACE_GET_TX_DESC(&iface->super, &iface->super.tx.mp, desc); desc->super.handler = (uct_rc_send_handler_t)ucs_mpool_put; length = pack_cb(desc + 1, arg); uct_rc_mlx5_ep_bcopy_post(ep, MLX5_OPCODE_RDMA_WRITE, length, 0, NULL, 0, remote_addr, rkey, MLX5_WQE_CTRL_CQ_UPDATE, desc); UCT_TL_EP_STAT_OP(&ep->super.super, PUT, BCOPY, length); return length; }
ssize_t uct_rc_verbs_ep_put_bcopy(uct_ep_h tl_ep, uct_pack_callback_t pack_cb, void *arg, uint64_t remote_addr, uct_rkey_t rkey) { uct_rc_verbs_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_rc_verbs_iface_t); uct_rc_verbs_ep_t *ep = ucs_derived_of(tl_ep, uct_rc_verbs_ep_t); uct_rc_iface_send_desc_t *desc; struct ibv_send_wr wr; struct ibv_sge sge; size_t length; UCT_RC_CHECK_RES(&iface->super, &ep->super); UCT_RC_IFACE_GET_TX_PUT_BCOPY_DESC(&iface->super, &iface->super.tx.mp, desc, pack_cb, arg, length); UCT_RC_VERBS_FILL_RDMA_WR(wr, wr.opcode, IBV_WR_RDMA_WRITE, sge, length, remote_addr, rkey); UCT_TL_EP_STAT_OP(&ep->super.super, PUT, BCOPY, length); uct_rc_verbs_ep_post_send_desc(ep, &wr, desc, IBV_SEND_SIGNALED); return length; }
ssize_t uct_dc_mlx5_ep_am_bcopy(uct_ep_h tl_ep, uint8_t id, uct_pack_callback_t pack_cb, void *arg) { uct_dc_mlx5_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_dc_mlx5_iface_t); uct_dc_mlx5_ep_t *ep = ucs_derived_of(tl_ep, uct_dc_mlx5_ep_t); uct_rc_iface_send_desc_t *desc; size_t length; UCT_DC_CHECK_RES_AND_FC(&iface->super, &ep->super); UCT_RC_IFACE_GET_TX_AM_BCOPY_DESC(&iface->super.super, &iface->super.super.tx.mp, desc, id, pack_cb, arg, &length); uct_dc_mlx5_iface_bcopy_post(iface, ep, MLX5_OPCODE_SEND|UCT_RC_MLX5_OPCODE_FLAG_RAW, sizeof(uct_rc_hdr_t) + length, 0, NULL, 0, 0, 0, desc); UCT_RC_UPDATE_FC_WND(&iface->super.super, &ep->super.fc); UCT_TL_EP_STAT_OP(&ep->super.super, AM, BCOPY, length); return length; }
ucs_status_t uct_dc_mlx5_ep_get_bcopy(uct_ep_h tl_ep, uct_unpack_callback_t unpack_cb, void *arg, size_t length, uint64_t remote_addr, uct_rkey_t rkey, uct_completion_t *comp) { uct_dc_mlx5_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_dc_mlx5_iface_t); uct_dc_mlx5_ep_t *ep = ucs_derived_of(tl_ep, uct_dc_mlx5_ep_t); uct_rc_iface_send_desc_t *desc; UCT_CHECK_LENGTH(length, iface->super.super.super.config.seg_size, "get_bcopy"); UCT_DC_CHECK_RES(&iface->super, &ep->super); UCT_RC_IFACE_GET_TX_GET_BCOPY_DESC(&iface->super.super, &iface->super.super.tx.mp, desc, unpack_cb, comp, arg, length); uct_dc_mlx5_iface_bcopy_post(iface, ep, MLX5_OPCODE_RDMA_READ, length, 0, NULL, 0, remote_addr, rkey, desc); UCT_TL_EP_STAT_OP(&ep->super.super, GET, BCOPY, length); return UCS_INPROGRESS; }
ucs_status_t uct_dc_mlx5_ep_put_zcopy(uct_ep_h tl_ep, const uct_iov_t *iov, size_t iovcnt, uint64_t remote_addr, uct_rkey_t rkey, uct_completion_t *comp) { uct_dc_mlx5_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_dc_mlx5_iface_t); uct_dc_mlx5_ep_t *ep = ucs_derived_of(tl_ep, uct_dc_mlx5_ep_t); UCT_CHECK_IOV_SIZE(iovcnt, uct_ib_iface_get_max_iov(&iface->super.super.super), "uct_dc_mlx5_ep_put_zcopy"); UCT_CHECK_LENGTH(uct_iov_total_length(iov, iovcnt), UCT_IB_MAX_MESSAGE_SIZE, "put_zcopy"); UCT_DC_CHECK_RES(&iface->super, &ep->super); uct_dc_mlx5_iface_zcopy_post(iface, ep, MLX5_OPCODE_RDMA_WRITE, iov, iovcnt, 0, NULL, 0, remote_addr, rkey, comp); UCT_TL_EP_STAT_OP(&ep->super.super, PUT, ZCOPY, uct_iov_total_length(iov, iovcnt)); return UCS_INPROGRESS; }
ucs_status_t uct_ugni_ep_get_zcopy(uct_ep_h tl_ep, const uct_iov_t *iov, size_t iovcnt, uint64_t remote_addr, uct_rkey_t rkey, uct_completion_t *comp) { uct_ugni_ep_t *ep = ucs_derived_of(tl_ep, uct_ugni_ep_t); uct_ugni_rdma_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_ugni_rdma_iface_t); uct_ugni_base_desc_t *rdma; UCT_CHECK_PARAM_IOV(iov, iovcnt, buffer, length, memh); UCT_SKIP_ZERO_LENGTH(length); UCT_CHECK_LENGTH(ucs_align_up_pow2(length, UGNI_GET_ALIGN), 0, iface->config.rdma_max_size, "get_zcopy"); /* Special flow for an unalign data */ if (ucs_unlikely((GNI_DEVICE_GEMINI == iface->super.dev->type && ucs_check_if_align_pow2((uintptr_t)buffer, UGNI_GET_ALIGN)) || ucs_check_if_align_pow2(remote_addr, UGNI_GET_ALIGN) || ucs_check_if_align_pow2(length, UGNI_GET_ALIGN))) { return uct_ugni_ep_get_composed(tl_ep, buffer, length, memh, remote_addr, rkey, comp); } /* Everything is perfectly aligned */ UCT_TL_IFACE_GET_TX_DESC(&iface->super.super, &iface->free_desc, rdma, return UCS_ERR_NO_RESOURCE); /* Setup Callback */ uct_ugni_format_rdma(rdma, GNI_POST_RDMA_GET, buffer, remote_addr, memh, rkey, ucs_align_up_pow2(length, UGNI_GET_ALIGN), ep, iface->super.local_cq, comp); ucs_trace_data("Posting GET ZCOPY, GNI_PostRdma of size %"PRIx64" (%lu) " "from %p to %p, with [%"PRIx64" %"PRIx64"]", rdma->desc.length, length, (void *)rdma->desc.local_addr, (void *)rdma->desc.remote_addr, rdma->desc.remote_mem_hndl.qword1, rdma->desc.remote_mem_hndl.qword2); UCT_TL_EP_STAT_OP(ucs_derived_of(tl_ep, uct_base_ep_t), GET, ZCOPY, length); return uct_ugni_post_rdma(iface, ep, rdma); }
ucs_status_t uct_rc_mlx5_ep_am_short(uct_ep_h tl_ep, uint8_t id, uint64_t hdr, const void *payload, unsigned length) { uct_rc_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_rc_iface_t); uct_rc_mlx5_ep_t *ep = ucs_derived_of(tl_ep, uct_rc_mlx5_ep_t); UCT_RC_MLX5_CHECK_AM_SHORT(id, length, 0); UCT_RC_CHECK_RES(iface, &ep->super); UCT_RC_CHECK_FC_WND(iface, &ep->super, id); uct_rc_mlx5_txqp_inline_post(iface, IBV_QPT_RC, &ep->super.txqp, &ep->tx.wq, MLX5_OPCODE_SEND, payload, length, id, hdr, 0, 0, NULL, 0); UCT_TL_EP_STAT_OP(&ep->super.super, AM, SHORT, sizeof(hdr) + length); UCT_RC_UPDATE_FC_WND(iface, &ep->super, id); return UCS_OK; }
ucs_status_t uct_dc_mlx5_ep_am_zcopy(uct_ep_h tl_ep, uint8_t id, const void *header, unsigned header_length, const uct_iov_t *iov, size_t iovcnt, uct_completion_t *comp) { uct_dc_mlx5_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_dc_mlx5_iface_t); uct_dc_mlx5_ep_t *ep = ucs_derived_of(tl_ep, uct_dc_mlx5_ep_t); UCT_CHECK_IOV_SIZE(iovcnt, UCT_IB_MLX5_AM_ZCOPY_MAX_IOV, "uct_dc_mlx5_ep_am_zcopy"); UCT_RC_MLX5_CHECK_AM_ZCOPY(id, header_length, uct_iov_total_length(iov, iovcnt), iface->super.super.super.config.seg_size, UCT_IB_MLX5_AV_FULL_SIZE); UCT_DC_CHECK_RES(&iface->super, &ep->super); uct_dc_mlx5_iface_zcopy_post(iface, ep, MLX5_OPCODE_SEND, iov, iovcnt, id, header, header_length, 0, 0, comp); UCT_TL_EP_STAT_OP(&ep->super.super, AM, ZCOPY, header_length + uct_iov_total_length(iov, iovcnt)); return UCS_INPROGRESS; }
ucs_status_t uct_rc_verbs_ep_am_short(uct_ep_h tl_ep, uint8_t id, uint64_t hdr, const void *buffer, unsigned length) { uct_rc_verbs_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_rc_verbs_iface_t); uct_rc_verbs_ep_t *ep = ucs_derived_of(tl_ep, uct_rc_verbs_ep_t); uct_rc_am_short_hdr_t am; UCT_CHECK_AM_ID(id); UCT_CHECK_LENGTH(sizeof(am) + length, iface->config.max_inline, "am_short"); UCT_RC_VERBS_CHECK_RES(iface, ep); am.rc_hdr.am_id = id; am.am_hdr = hdr; iface->inl_sge[0].addr = (uintptr_t)&am; iface->inl_sge[0].length = sizeof(am); iface->inl_sge[1].addr = (uintptr_t)buffer; iface->inl_sge[1].length = length; UCT_TL_EP_STAT_OP(&ep->super.super, AM, SHORT, sizeof(hdr) + length); uct_rc_verbs_ep_post_send(iface, ep, &iface->inl_am_wr, IBV_SEND_INLINE); return UCS_OK; }
ucs_status_t uct_dc_mlx5_ep_am_short(uct_ep_h tl_ep, uint8_t id, uint64_t hdr, const void *buffer, unsigned length) { uct_dc_mlx5_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_dc_mlx5_iface_t); uct_dc_mlx5_ep_t *ep = ucs_derived_of(tl_ep, uct_dc_mlx5_ep_t); UCT_DC_MLX5_TXQP_DECL(txqp, txwq); UCT_RC_MLX5_CHECK_AM_SHORT(id, length, UCT_IB_MLX5_AV_FULL_SIZE); UCT_DC_CHECK_RES(&iface->super, &ep->super); UCT_DC_MLX5_IFACE_TXQP_GET(iface, ep, txqp, txwq); uct_rc_mlx5_txqp_inline_post(&iface->super.super, IBV_EXP_QPT_DC_INI, txqp, txwq, MLX5_OPCODE_SEND, buffer, length, id, hdr, 0, 0, &ep->av, uct_ib_mlx5_wqe_av_size(&ep->av)); UCT_TL_EP_STAT_OP(&ep->super.super, AM, SHORT, sizeof(hdr) + length); return UCS_OK; }
ssize_t uct_rc_verbs_ep_am_bcopy(uct_ep_h tl_ep, uint8_t id, uct_pack_callback_t pack_cb, void *arg) { uct_rc_verbs_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_rc_verbs_iface_t); uct_rc_verbs_ep_t *ep = ucs_derived_of(tl_ep, uct_rc_verbs_ep_t); uct_rc_iface_send_desc_t *desc; struct ibv_send_wr wr; struct ibv_sge sge; size_t length; UCT_CHECK_AM_ID(id); UCT_RC_CHECK_RES(&iface->super, &ep->super); UCT_RC_CHECK_FC_WND(&iface->super, &ep->super, id); UCT_RC_IFACE_GET_TX_AM_BCOPY_DESC(&iface->super, &iface->super.tx.mp, desc, id, pack_cb, arg, &length); UCT_RC_VERBS_FILL_AM_BCOPY_WR(wr, sge, length, wr.opcode); UCT_TL_EP_STAT_OP(&ep->super.super, AM, BCOPY, length); uct_rc_verbs_ep_post_send_desc(ep, &wr, desc, 0); UCT_RC_UPDATE_FC_WND(&iface->super, &ep->super, id); return length; }
ucs_status_t uct_rc_mlx5_ep_am_zcopy(uct_ep_h tl_ep, uint8_t id, const void *header, unsigned header_length, const uct_iov_t *iov, size_t iovcnt, uct_completion_t *comp) { uct_rc_mlx5_ep_t *ep = ucs_derived_of(tl_ep, uct_rc_mlx5_ep_t); uct_rc_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_rc_iface_t); ucs_status_t status; UCT_CHECK_IOV_SIZE(iovcnt, UCT_IB_MLX5_AM_ZCOPY_MAX_IOV, "uct_rc_mlx5_ep_am_zcopy"); UCT_RC_MLX5_CHECK_AM_ZCOPY(id, header_length, uct_iov_total_length(iov, iovcnt), iface->super.config.seg_size, 0); UCT_RC_CHECK_FC_WND(iface, &ep->super, id); status = uct_rc_mlx5_ep_zcopy_post(ep, MLX5_OPCODE_SEND, iov, iovcnt, id, header, header_length, 0, 0, 0, comp); if (ucs_likely(status >= 0)) { UCT_TL_EP_STAT_OP(&ep->super.super, AM, ZCOPY, header_length + uct_iov_total_length(iov, iovcnt)); UCT_RC_UPDATE_FC_WND(iface, &ep->super, id); } return status; }
ucs_status_t uct_dc_mlx5_ep_put_short(uct_ep_h tl_ep, const void *buffer, unsigned length, uint64_t remote_addr, uct_rkey_t rkey) { uct_dc_mlx5_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_dc_mlx5_iface_t); uct_dc_mlx5_ep_t *ep = ucs_derived_of(tl_ep, uct_dc_mlx5_ep_t); UCT_DC_MLX5_TXQP_DECL(txqp, txwq); UCT_RC_MLX5_CHECK_PUT_SHORT(length, UCT_IB_MLX5_AV_FULL_SIZE); UCT_DC_CHECK_RES(&iface->super, &ep->super); UCT_DC_MLX5_IFACE_TXQP_GET(iface, ep, txqp, txwq); uct_rc_mlx5_txqp_inline_post(&iface->super.super, IBV_EXP_QPT_DC_INI, txqp, txwq, MLX5_OPCODE_RDMA_WRITE, buffer, length, 0, 0, remote_addr, uct_ib_md_direct_rkey(rkey), &ep->av, uct_ib_mlx5_wqe_av_size(&ep->av)); UCT_TL_EP_STAT_OP(&ep->super.super, PUT, SHORT, length); return UCS_OK; }
ssize_t uct_dc_mlx5_ep_am_bcopy(uct_ep_h tl_ep, uint8_t id, uct_pack_callback_t pack_cb, void *arg, unsigned flags) { uct_dc_mlx5_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_dc_mlx5_iface_t); uct_dc_mlx5_ep_t *ep = ucs_derived_of(tl_ep, uct_dc_mlx5_ep_t); uct_rc_iface_send_desc_t *desc; size_t length; UCT_CHECK_AM_ID(id); UCT_DC_CHECK_RES_AND_FC(iface, ep); UCT_RC_IFACE_GET_TX_AM_BCOPY_DESC(&iface->super.super, &iface->super.super.tx.mp, desc, id, uct_rc_mlx5_am_hdr_fill, uct_rc_mlx5_hdr_t, pack_cb, arg, &length); uct_dc_mlx5_iface_bcopy_post(iface, ep, MLX5_OPCODE_SEND, sizeof(uct_rc_mlx5_hdr_t) + length, 0, 0, desc, MLX5_WQE_CTRL_SOLICITED, 0, desc + 1, NULL); UCT_RC_UPDATE_FC_WND(&iface->super.super, &ep->fc); UCT_TL_EP_STAT_OP(&ep->super, AM, BCOPY, length); return length; }
ucs_status_t uct_dc_mlx5_ep_am_short(uct_ep_h tl_ep, uint8_t id, uint64_t hdr, const void *buffer, unsigned length) { #if HAVE_IBV_EXP_DM uct_dc_mlx5_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_dc_mlx5_iface_t); uct_dc_mlx5_ep_t *ep = ucs_derived_of(tl_ep, uct_dc_mlx5_ep_t); ucs_status_t status; uct_rc_mlx5_dm_copy_data_t cache; if (ucs_likely((sizeof(uct_rc_mlx5_am_short_hdr_t) + length <= UCT_IB_MLX5_AM_MAX_SHORT(UCT_IB_MLX5_AV_FULL_SIZE)) || !iface->super.dm.dm)) { #endif return uct_dc_mlx5_ep_am_short_inline(tl_ep, id, hdr, buffer, length); #if HAVE_IBV_EXP_DM } UCT_CHECK_AM_ID(id); UCT_CHECK_LENGTH(length + sizeof(uct_rc_mlx5_am_short_hdr_t), 0, iface->super.dm.seg_len, "am_short"); UCT_DC_CHECK_RES_AND_FC(iface, ep); uct_rc_mlx5_am_hdr_fill(&cache.am_hdr.rc_hdr, id); cache.am_hdr.am_hdr = hdr; status = uct_dc_mlx5_ep_short_dm(ep, &cache, sizeof(cache.am_hdr), buffer, length, MLX5_OPCODE_SEND, MLX5_WQE_CTRL_SOLICITED | MLX5_WQE_CTRL_CQ_UPDATE, 0, 0); if (UCS_STATUS_IS_ERR(status)) { return status; } UCT_TL_EP_STAT_OP(&ep->super, AM, SHORT, sizeof(cache.am_hdr) + length); UCT_RC_UPDATE_FC_WND(&iface->super.super, &ep->fc); return UCS_OK; #endif }
ucs_status_t uct_rc_verbs_ep_am_zcopy(uct_ep_h tl_ep, uint8_t id, const void *header, unsigned header_length, const uct_iov_t *iov, size_t iovcnt, unsigned flags, uct_completion_t *comp) { uct_rc_verbs_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_rc_verbs_iface_t); uct_rc_verbs_ep_t *ep = ucs_derived_of(tl_ep, uct_rc_verbs_ep_t); uct_rc_iface_send_desc_t *desc = NULL; struct ibv_sge sge[UCT_IB_MAX_IOV]; /* First sge is reserved for the header */ struct ibv_send_wr wr; int send_flags; size_t sge_cnt; UCT_CHECK_IOV_SIZE(iovcnt, uct_ib_iface_get_max_iov(&iface->super.super) - 1, "uct_rc_verbs_ep_am_zcopy"); UCT_RC_CHECK_AM_ZCOPY(id, header_length, uct_iov_total_length(iov, iovcnt), iface->config.short_desc_size, iface->super.super.config.seg_size); UCT_RC_CHECK_RES(&iface->super, &ep->super); UCT_RC_CHECK_FC(&iface->super, &ep->super, id); UCT_RC_IFACE_GET_TX_AM_ZCOPY_DESC(&iface->super, &iface->short_desc_mp, desc, id, header, header_length, comp, &send_flags); sge[0].length = sizeof(uct_rc_hdr_t) + header_length; sge_cnt = uct_ib_verbs_sge_fill_iov(sge + 1, iov, iovcnt); UCT_RC_VERBS_FILL_AM_ZCOPY_WR_IOV(wr, sge, (sge_cnt + 1), wr.opcode); UCT_TL_EP_STAT_OP(&ep->super.super, AM, ZCOPY, (header_length + uct_iov_total_length(iov, iovcnt))); uct_rc_verbs_ep_post_send_desc(ep, &wr, desc, send_flags | IBV_SEND_SOLICITED, UCT_IB_MAX_ZCOPY_LOG_SGE(&iface->super.super)); UCT_RC_UPDATE_FC(&iface->super, &ep->super, id); return UCS_INPROGRESS; }
ucs_status_t uct_rc_verbs_ep_put_bcopy(uct_ep_h tl_ep, uct_pack_callback_t pack_cb, void *arg, size_t length, uint64_t remote_addr, uct_rkey_t rkey) { uct_rc_verbs_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_rc_verbs_iface_t); uct_rc_verbs_ep_t *ep = ucs_derived_of(tl_ep, uct_rc_verbs_ep_t); uct_rc_iface_send_desc_t *desc; struct ibv_send_wr wr; struct ibv_sge sge; UCT_CHECK_LENGTH(length, iface->super.super.config.seg_size, "put_bcopy"); UCT_RC_VERBS_ZERO_LENGTH_POST(length); UCT_RC_VERBS_CHECK_RES(iface, ep); UCT_RC_IFACE_GET_TX_DESC(&iface->super, iface->super.tx.mp, desc); desc->super.handler = (uct_rc_send_handler_t)ucs_mpool_put; pack_cb(desc + 1, arg, length); uct_rc_verbs_fill_rdma_wr(&wr, IBV_WR_RDMA_WRITE, &sge, length, remote_addr, rkey); UCT_TL_EP_STAT_OP(&ep->super.super, PUT, BCOPY, length); uct_rc_verbs_ep_post_send_desc(ep, &wr, desc, IBV_SEND_SIGNALED); return UCS_OK; }
ucs_status_t uct_rc_verbs_ep_get_bcopy(uct_ep_h tl_ep, uct_unpack_callback_t unpack_cb, void *arg, size_t length, uint64_t remote_addr, uct_rkey_t rkey, uct_completion_t *comp) { uct_rc_verbs_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_rc_verbs_iface_t); uct_rc_verbs_ep_t *ep = ucs_derived_of(tl_ep, uct_rc_verbs_ep_t); uct_rc_iface_send_desc_t *desc; struct ibv_send_wr wr; struct ibv_sge sge; UCT_CHECK_LENGTH(length, iface->super.super.config.seg_size, "get_bcopy"); UCT_RC_CHECK_RES(&iface->super, &ep->super); UCT_RC_IFACE_GET_TX_GET_BCOPY_DESC(&iface->super, &iface->super.tx.mp, desc, unpack_cb, comp, arg, length); UCT_RC_VERBS_FILL_RDMA_WR(wr, wr.opcode, IBV_WR_RDMA_READ, sge, length, remote_addr, rkey); UCT_TL_EP_STAT_OP(&ep->super.super, GET, BCOPY, length); uct_rc_verbs_ep_post_send_desc(ep, &wr, desc, IBV_SEND_SIGNALED); return UCS_INPROGRESS; }
ssize_t uct_rc_mlx5_ep_am_bcopy(uct_ep_h tl_ep, uint8_t id, uct_pack_callback_t pack_cb, void *arg) { uct_rc_mlx5_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_rc_mlx5_iface_t); uct_rc_mlx5_ep_t *ep = ucs_derived_of(tl_ep, uct_rc_mlx5_ep_t); uct_rc_iface_send_desc_t *desc; uct_rc_hdr_t *rch; size_t length; UCT_CHECK_AM_ID(id); UCT_RC_CHECK_RES(&iface->super, &ep->super); UCT_RC_IFACE_GET_TX_DESC(&iface->super, &iface->super.tx.mp, desc); desc->super.handler = (uct_rc_send_handler_t)ucs_mpool_put; rch = (void*)(desc + 1); rch->am_id = id; length = pack_cb(rch + 1, arg); uct_rc_mlx5_ep_bcopy_post(ep, MLX5_OPCODE_SEND|UCT_RC_MLX5_OPCODE_FLAG_RAW, sizeof(*rch) + length, 0, NULL, 0, 0, 0, 0, desc); UCT_TL_EP_STAT_OP(&ep->super.super, AM, BCOPY, length); return length; }
ucs_status_t uct_ugni_ep_put_short(uct_ep_h tl_ep, const void *buffer, unsigned length, uint64_t remote_addr, uct_rkey_t rkey) { uct_ugni_ep_t *ep = ucs_derived_of(tl_ep, uct_ugni_ep_t); uct_ugni_rdma_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_ugni_rdma_iface_t); uct_ugni_base_desc_t *fma; UCT_SKIP_ZERO_LENGTH(length); UCT_CHECK_LENGTH(length, 0, iface->config.fma_seg_size, "put_short"); UCT_TL_IFACE_GET_TX_DESC(&iface->super.super, &iface->free_desc, fma, return UCS_ERR_NO_RESOURCE); uct_ugni_format_fma(fma, GNI_POST_FMA_PUT, buffer, remote_addr, rkey, length, ep, NULL, NULL); ucs_trace_data("Posting PUT Short, GNI_PostFma of size %"PRIx64" from %p to " "%p, with [%"PRIx64" %"PRIx64"]", fma->desc.length, (void *)fma->desc.local_addr, (void *)fma->desc.remote_addr, fma->desc.remote_mem_hndl.qword1, fma->desc.remote_mem_hndl.qword2); UCT_TL_EP_STAT_OP(ucs_derived_of(tl_ep, uct_base_ep_t), PUT, SHORT, length); return uct_ugni_post_fma(iface, ep, fma, UCS_OK); }
ssize_t uct_cm_ep_am_bcopy(uct_ep_h tl_ep, uint8_t am_id, uct_pack_callback_t pack_cb, void *arg) { uct_cm_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_cm_iface_t); uct_cm_ep_t *ep = ucs_derived_of(tl_ep, uct_cm_ep_t); struct ib_cm_sidr_req_param req; struct ibv_sa_path_rec path; struct ib_cm_id *id; ucs_status_t status; uct_cm_hdr_t *hdr; size_t payload_len; size_t total_len; int ret; UCT_CHECK_AM_ID(am_id); uct_cm_enter(iface); if (iface->num_outstanding >= iface->config.max_outstanding) { status = UCS_ERR_NO_RESOURCE; goto err; } /* Allocate temporary contiguous buffer */ hdr = ucs_malloc(IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE, "cm_send_buf"); if (hdr == NULL) { status = UCS_ERR_NO_MEMORY; goto err; } payload_len = pack_cb(hdr + 1, arg); hdr->am_id = am_id; hdr->length = payload_len; total_len = sizeof(*hdr) + payload_len; status = uct_cm_ep_fill_path_rec(ep, &path); if (status != UCS_OK) { goto err_free; } /* Fill SIDR request */ memset(&req, 0, sizeof req); req.path = &path; req.service_id = ep->dest_addr.id; req.timeout_ms = iface->config.timeout_ms; req.private_data = hdr; req.private_data_len = total_len; req.max_cm_retries = iface->config.retry_count; /* Create temporary ID for this message. Will be released when getting REP. */ ret = ib_cm_create_id(iface->cmdev, &id, NULL); if (ret) { ucs_error("ib_cm_create_id() failed: %m"); status = UCS_ERR_IO_ERROR; goto err_free; } uct_cm_dump_path(&path); ret = ib_cm_send_sidr_req(id, &req); if (ret) { ucs_error("ib_cm_send_sidr_req() failed: %m"); status = UCS_ERR_IO_ERROR; goto err_destroy_id; } iface->outstanding[iface->num_outstanding++] = id; UCT_TL_EP_STAT_OP(&ep->super, AM, BCOPY, payload_len); uct_cm_leave(iface); uct_cm_iface_trace_data(iface, UCT_AM_TRACE_TYPE_SEND, hdr, "TX: SIDR_REQ [dlid %d svc 0x%"PRIx64"]", ntohs(path.dlid), req.service_id); ucs_free(hdr); return payload_len; err_destroy_id: ib_cm_destroy_id(id); err_free: ucs_free(hdr); err: uct_cm_leave(iface); return status; }
static UCS_F_ALWAYS_INLINE ssize_t uct_ugni_udt_ep_am_common_send(const unsigned is_short, uct_ugni_udt_ep_t *ep, uct_ugni_udt_iface_t *iface, uint8_t am_id, unsigned length, uint64_t header, const void *payload, uct_pack_callback_t pack_cb, void *arg) { gni_return_t ugni_rc; uint16_t msg_length; uct_ugni_udt_desc_t *desc; uct_ugni_udt_header_t *sheader, *rheader; ssize_t packed_length; UCT_CHECK_AM_ID(am_id); if (ucs_unlikely(NULL != ep->posted_desc)) { UCT_TL_IFACE_STAT_TX_NO_DESC(&iface->super.super); return UCS_ERR_NO_RESOURCE; } UCT_TL_IFACE_GET_TX_DESC(&iface->super.super, &iface->free_desc, desc, return UCS_ERR_NO_RESOURCE); rheader = uct_ugni_udt_get_rheader(desc, iface); rheader->type = UCT_UGNI_UDT_EMPTY; sheader = uct_ugni_udt_get_sheader(desc, iface); if (is_short) { uint64_t *hdr = (uint64_t *)uct_ugni_udt_get_spayload(desc, iface); *hdr = header; memcpy((void*)(hdr + 1), payload, length); sheader->length = length + sizeof(header); msg_length = sheader->length + sizeof(*sheader); UCT_TL_EP_STAT_OP(ucs_derived_of(ep, uct_base_ep_t), AM, SHORT, sizeof(header) + length); } else { packed_length = pack_cb((void *)uct_ugni_udt_get_spayload(desc, iface), arg); sheader->length = packed_length; msg_length = sheader->length + sizeof(*sheader); UCT_TL_EP_STAT_OP(ucs_derived_of(ep, uct_base_ep_t), AM, BCOPY, packed_length); } uct_iface_trace_am(&iface->super.super, UCT_AM_TRACE_TYPE_SEND, am_id, uct_ugni_udt_get_spayload(desc, iface), length, is_short ? "TX: AM_SHORT" : "TX: AM_BCOPY"); sheader->am_id = am_id; sheader->type = UCT_UGNI_UDT_PAYLOAD; ucs_assert_always(sheader->length <= GNI_DATAGRAM_MAXSIZE); pthread_mutex_lock(&uct_ugni_global_lock); ugni_rc = GNI_EpPostDataWId(ep->super.ep, sheader, msg_length, rheader, (uint16_t)iface->config.udt_seg_size, ep->super.hash_key); pthread_mutex_unlock(&uct_ugni_global_lock); UCT_UGNI_UDT_CHECK_RC(ugni_rc); ep->posted_desc = desc; ++ep->super.outstanding; ++iface->super.outstanding; return is_short ? UCS_OK : packed_length; }
static ucs_status_t uct_ugni_ep_get_composed_fma_rdma(uct_ep_h tl_ep, void *buffer, size_t length, uct_mem_h memh, uint64_t remote_addr, uct_rkey_t rkey, uct_completion_t *comp) { uct_ugni_rdma_fetch_desc_t *fma = NULL; uct_ugni_ep_t *ep = ucs_derived_of(tl_ep, uct_ugni_ep_t); uct_ugni_rdma_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_ugni_rdma_iface_t); uct_ugni_rdma_fetch_desc_t *rdma = NULL; size_t fma_length, rdma_length, aligned_fma_remote_start; uint64_t fma_remote_start, rdma_remote_start; ucs_status_t post_result; rdma_length = length - iface->config.fma_seg_size; fma_length = iface->config.fma_seg_size; UCT_TL_IFACE_GET_TX_DESC(&iface->super.super, &iface->free_desc_get_buffer, fma, return UCS_ERR_NO_RESOURCE); UCT_TL_IFACE_GET_TX_DESC(&iface->super.super, &iface->free_desc_get, rdma, return UCS_ERR_NO_RESOURCE); rdma_remote_start = remote_addr; fma_remote_start = rdma_remote_start + rdma_length; aligned_fma_remote_start = ucs_align_up_pow2(fma_remote_start, UGNI_GET_ALIGN); uct_ugni_format_get_fma(fma, GNI_POST_FMA_GET, aligned_fma_remote_start, rkey, fma_length, ep, comp, uct_ugni_unalign_fma_composed_cb, NULL, NULL); fma->tail = aligned_fma_remote_start - fma_remote_start; uct_ugni_format_unaligned_rdma(rdma, GNI_POST_RDMA_GET, buffer, rdma_remote_start, memh, rkey, rdma_length+fma->tail, ep, iface->super.local_cq, comp, uct_ugni_unalign_rdma_composed_cb); fma->head = rdma; rdma->head = fma; fma->network_completed_bytes = rdma->network_completed_bytes = 0; fma->user_buffer = rdma->user_buffer = buffer; fma->expected_bytes = rdma->expected_bytes = fma->super.desc.length + rdma->super.desc.length; ucs_trace_data("Posting split GET ZCOPY, GNI_PostFma of size %"PRIx64" (%lu) from %p to " "%p, with [%"PRIx64" %"PRIx64"] and GNI_PostRdma of size %"PRIx64" (%lu)" " from %p to %p, with [%"PRIx64" %"PRIx64"]", fma->super.desc.length, length, (void *)fma->super.desc.local_addr, (void *)fma->super.desc.remote_addr, fma->super.desc.remote_mem_hndl.qword1, fma->super.desc.remote_mem_hndl.qword2, rdma->super.desc.length, length, (void *)rdma->super.desc.local_addr, (void *)rdma->super.desc.remote_addr, rdma->super.desc.remote_mem_hndl.qword1, rdma->super.desc.remote_mem_hndl.qword2); UCT_TL_EP_STAT_OP(ucs_derived_of(tl_ep, uct_base_ep_t), GET, ZCOPY, length); post_result = uct_ugni_post_fma(iface, ep, &(fma->super), UCS_INPROGRESS); if(post_result != UCS_OK && post_result != UCS_INPROGRESS){ ucs_mpool_put(rdma); return post_result; } return uct_ugni_post_rdma(iface, ep, &(rdma->super)); }
/* A common mm active message sending function. * The first parameter indicates the origin of the call. * is_short = 1 - perform AM short sending * is_short = 0 - perform AM bcopy sending */ static UCS_F_ALWAYS_INLINE ssize_t uct_mm_ep_am_common_send(const unsigned is_short, uct_mm_ep_t *ep, uct_mm_iface_t *iface, uint8_t am_id, size_t length, uint64_t header, const void *payload, uct_pack_callback_t pack_cb, void *arg) { uct_mm_fifo_element_t *elem; ucs_status_t status; void *base_address; uint64_t head; UCT_CHECK_AM_ID(am_id); head = ep->fifo_ctl->head; /* check if there is room in the remote process's receive FIFO to write */ if (!UCT_MM_EP_IS_ABLE_TO_SEND(head, ep->cached_tail, iface->config.fifo_size)) { if (!ucs_arbiter_group_is_empty(&ep->arb_group)) { /* pending isn't empty. don't send now to prevent out-of-order sending */ UCS_STATS_UPDATE_COUNTER(ep->super.stats, UCT_EP_STAT_NO_RES, 1); return UCS_ERR_NO_RESOURCE; } else { /* pending is empty */ /* update the local copy of the tail to its actual value on the remote peer */ uct_mm_ep_update_cached_tail(ep); if (!UCT_MM_EP_IS_ABLE_TO_SEND(head, ep->cached_tail, iface->config.fifo_size)) { UCS_STATS_UPDATE_COUNTER(ep->super.stats, UCT_EP_STAT_NO_RES, 1); return UCS_ERR_NO_RESOURCE; } } } status = uct_mm_ep_get_remote_elem(ep, head, &elem); if (status != UCS_OK) { ucs_trace_poll("couldn't get an available FIFO element"); UCS_STATS_UPDATE_COUNTER(ep->super.stats, UCT_EP_STAT_NO_RES, 1); return status; } if (is_short) { /* AM_SHORT */ /* write to the remote FIFO */ *(uint64_t*) (elem + 1) = header; memcpy((void*) (elem + 1) + sizeof(header), payload, length); elem->flags |= UCT_MM_FIFO_ELEM_FLAG_INLINE; elem->length = length + sizeof(header); uct_iface_trace_am(&iface->super, UCT_AM_TRACE_TYPE_SEND, am_id, elem + 1, length + sizeof(header), "TX: AM_SHORT"); UCT_TL_EP_STAT_OP(&ep->super, AM, SHORT, sizeof(header) + length); } else { /* AM_BCOPY */ /* write to the remote descriptor */ /* get the base_address: local ptr to remote memory chunk after attaching to it */ base_address = uct_mm_ep_attach_remote_seg(ep, iface, elem); length = pack_cb(base_address + elem->desc_offset, arg); elem->flags &= ~UCT_MM_FIFO_ELEM_FLAG_INLINE; elem->length = length; uct_iface_trace_am(&iface->super, UCT_AM_TRACE_TYPE_SEND, am_id, base_address + elem->desc_offset, length, "TX: AM_BCOPY"); UCT_TL_EP_STAT_OP(&ep->super, AM, BCOPY, length); } elem->am_id = am_id; /* memory barrier - make sure that the memory is flushed before setting the * 'writing is complete' flag which the reader checks */ ucs_memory_cpu_store_fence(); /* change the owner bit to indicate that the writing is complete. * the owner bit flips after every FIFO wraparound */ if (head & iface->config.fifo_size) { elem->flags |= UCT_MM_FIFO_ELEM_FLAG_OWNER; } else { elem->flags &= ~UCT_MM_FIFO_ELEM_FLAG_OWNER; } if (is_short) { return UCS_OK; } else { return length; } }