ucs_status_t uct_rc_verbs_ep_am_zcopy(uct_ep_h tl_ep, uint8_t id, const void *header, unsigned header_length, const uct_iov_t *iov, size_t iovcnt, uct_completion_t *comp) { uct_rc_verbs_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_rc_verbs_iface_t); uct_rc_verbs_ep_t *ep = ucs_derived_of(tl_ep, uct_rc_verbs_ep_t); uct_rc_iface_send_desc_t *desc = NULL; struct ibv_sge sge[UCT_IB_MAX_IOV]; /* First sge is reserved for the header */ struct ibv_send_wr wr; int send_flags; size_t sge_cnt; UCT_CHECK_IOV_SIZE(iovcnt, uct_ib_iface_get_max_iov(&iface->super.super) - 1, "uct_rc_verbs_ep_am_zcopy"); UCT_RC_CHECK_AM_ZCOPY(id, header_length, uct_iov_total_length(iov, iovcnt), iface->verbs_common.config.short_desc_size, iface->super.super.config.seg_size); UCT_RC_CHECK_RES(&iface->super, &ep->super); UCT_RC_CHECK_FC_WND(&iface->super, &ep->super, id); UCT_RC_IFACE_GET_TX_AM_ZCOPY_DESC(&iface->super, &iface->verbs_common.short_desc_mp, desc, id, header, header_length, comp, &send_flags); sge[0].length = sizeof(uct_rc_hdr_t) + header_length; sge_cnt = uct_ib_verbs_sge_fill_iov(sge + 1, iov, iovcnt); UCT_RC_VERBS_FILL_AM_ZCOPY_WR_IOV(wr, sge, (sge_cnt + 1), wr.opcode); UCT_TL_EP_STAT_OP(&ep->super.super, AM, ZCOPY, (header_length + uct_iov_total_length(iov, iovcnt))); uct_rc_verbs_ep_post_send_desc(ep, &wr, desc, send_flags); UCT_RC_UPDATE_FC_WND(&iface->super, &ep->super, id); return UCS_INPROGRESS; }
ucs_status_t uct_dc_mlx5_ep_tag_eager_zcopy(uct_ep_h tl_ep, uct_tag_t tag, uint64_t imm, const uct_iov_t *iov, size_t iovcnt, unsigned flags, uct_completion_t *comp) { uct_dc_mlx5_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_dc_mlx5_iface_t); uct_dc_mlx5_ep_t *ep = ucs_derived_of(tl_ep, uct_dc_mlx5_ep_t); uint32_t app_ctx, ib_imm; int opcode; UCT_CHECK_IOV_SIZE(iovcnt, UCT_RC_MLX5_TM_EAGER_ZCOPY_MAX_IOV(UCT_IB_MLX5_AV_FULL_SIZE), "uct_dc_mlx5_ep_tag_eager_zcopy"); UCT_RC_CHECK_ZCOPY_DATA(sizeof(struct ibv_exp_tmh), uct_iov_total_length(iov, iovcnt), iface->super.super.super.config.seg_size); UCT_DC_MLX5_CHECK_RES(iface, ep); UCT_RC_MLX5_FILL_TM_IMM(imm, app_ctx, ib_imm, opcode, MLX5_OPCODE_SEND, _IMM); uct_dc_mlx5_iface_zcopy_post(iface, ep, opcode|UCT_RC_MLX5_OPCODE_FLAG_TM, iov, iovcnt, 0, "", 0, 0, 0, tag, app_ctx, ib_imm, comp, MLX5_WQE_CTRL_SOLICITED); UCT_TL_EP_STAT_OP(&ep->super, TAG, ZCOPY, uct_iov_total_length(iov, iovcnt)); return UCS_INPROGRESS; }
ucs_status_t uct_cuda_copy_ep_get_zcopy(uct_ep_h tl_ep, const uct_iov_t *iov, size_t iovcnt, uint64_t remote_addr, uct_rkey_t rkey, uct_completion_t *comp) { uct_cuda_copy_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_cuda_copy_iface_t); ucs_status_t status; if (iface->stream_d2h == 0) { status = UCT_CUDA_FUNC(cudaStreamCreateWithFlags(&iface->stream_d2h, cudaStreamNonBlocking)); if (UCS_OK != status) { return UCS_ERR_IO_ERROR; } } status = uct_cuda_copy_post_cuda_async_copy(tl_ep, iov[0].buffer, (void *)remote_addr, iov[0].length, cudaMemcpyDeviceToHost, iface->stream_d2h, &iface->outstanding_d2h_cuda_event_q, comp); UCT_TL_EP_STAT_OP(ucs_derived_of(tl_ep, uct_base_ep_t), GET, ZCOPY, uct_iov_total_length(iov, iovcnt)); uct_cuda_copy_trace_data(remote_addr, rkey, "GET_ZCOPY [length %zu]", uct_iov_total_length(iov, iovcnt)); return status; }
ucs_status_t uct_dc_mlx5_ep_am_zcopy(uct_ep_h tl_ep, uint8_t id, const void *header, unsigned header_length, const uct_iov_t *iov, size_t iovcnt, unsigned flags, uct_completion_t *comp) { uct_dc_mlx5_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_dc_mlx5_iface_t); uct_dc_mlx5_ep_t *ep = ucs_derived_of(tl_ep, uct_dc_mlx5_ep_t); UCT_CHECK_IOV_SIZE(iovcnt, UCT_IB_MLX5_AM_ZCOPY_MAX_IOV, "uct_dc_mlx5_ep_am_zcopy"); UCT_RC_MLX5_CHECK_AM_ZCOPY(id, header_length, uct_iov_total_length(iov, iovcnt), iface->super.super.super.config.seg_size, UCT_IB_MLX5_AV_FULL_SIZE); UCT_DC_CHECK_RES_AND_FC(iface, ep); uct_dc_mlx5_iface_zcopy_post(iface, ep, MLX5_OPCODE_SEND, iov, iovcnt, id, header, header_length, 0, 0, 0ul, 0, 0, comp, MLX5_WQE_CTRL_SOLICITED); UCT_RC_UPDATE_FC_WND(&iface->super.super, &ep->fc); UCT_TL_EP_STAT_OP(&ep->super, AM, ZCOPY, header_length + uct_iov_total_length(iov, iovcnt)); return UCS_INPROGRESS; }
ucs_status_t uct_rocm_copy_ep_get_zcopy(uct_ep_h tl_ep, const uct_iov_t *iov, size_t iovcnt, uint64_t remote_addr, uct_rkey_t rkey, uct_completion_t *comp) { ucs_status_t status; status = uct_rocm_copy_ep_zcopy(tl_ep, remote_addr, iov, 0); UCT_TL_EP_STAT_OP(ucs_derived_of(tl_ep, uct_base_ep_t), GET, ZCOPY, uct_iov_total_length(iov, iovcnt)); uct_rocm_copy_trace_data(remote_addr, rkey, "GET_ZCOPY [length %zu]", uct_iov_total_length(iov, iovcnt)); return status; }
ucs_status_t uct_rc_verbs_ep_tag_eager_zcopy(uct_ep_h tl_ep, uct_tag_t tag, uint64_t imm, const uct_iov_t *iov, size_t iovcnt, uct_completion_t *comp) { uct_rc_verbs_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_rc_verbs_iface_t); uct_rc_verbs_ep_t *ep = ucs_derived_of(tl_ep, uct_rc_verbs_ep_t); uct_rc_iface_send_desc_t *desc; struct ibv_sge sge[UCT_IB_MAX_IOV]; struct ibv_send_wr wr; int send_flags; size_t sge_cnt; uint32_t app_ctx; UCT_CHECK_IOV_SIZE(iovcnt, 1ul, "uct_rc_verbs_ep_tag_eager_zcopy"); UCT_RC_CHECK_ZCOPY_DATA(iface->tm.eager_hdr_size, uct_iov_total_length(iov, iovcnt), iface->super.super.config.seg_size); UCT_RC_CHECK_RES(&iface->super, &ep->super); sge_cnt = uct_ib_verbs_sge_fill_iov(sge + 1, iov, iovcnt); UCT_RC_VERBS_FILL_TM_IMM(wr, imm, app_ctx); UCT_RC_VERBS_GET_TM_ZCOPY_DESC(iface, &iface->verbs_common.short_desc_mp, desc, tag, app_ctx, comp, &send_flags, sge[0]); wr.num_sge = sge_cnt + 1; wr.sg_list = sge; uct_rc_verbs_ep_post_send_desc(ep, &wr, desc, send_flags); return UCS_INPROGRESS; }
ucs_status_t uct_dc_mlx5_ep_get_zcopy(uct_ep_h tl_ep, const uct_iov_t *iov, size_t iovcnt, uint64_t remote_addr, uct_rkey_t rkey, uct_completion_t *comp) { uct_dc_mlx5_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_dc_mlx5_iface_t); uct_dc_mlx5_ep_t *ep = ucs_derived_of(tl_ep, uct_dc_mlx5_ep_t); UCT_CHECK_IOV_SIZE(iovcnt, uct_ib_iface_get_max_iov(&iface->super.super.super), "uct_dc_mlx5_ep_get_zcopy"); UCT_CHECK_LENGTH(uct_iov_total_length(iov, iovcnt), UCT_IB_MAX_MESSAGE_SIZE, "get_zcopy"); UCT_DC_CHECK_RES(&iface->super, &ep->super); uct_dc_mlx5_iface_zcopy_post(iface, ep, MLX5_OPCODE_RDMA_READ, iov, iovcnt, 0, NULL, 0, remote_addr, rkey, comp); UCT_TL_EP_STAT_OP(&ep->super.super, GET, ZCOPY, uct_iov_total_length(iov, iovcnt)); return UCS_INPROGRESS; }
ucs_status_t uct_rc_mlx5_ep_get_zcopy(uct_ep_h tl_ep, const uct_iov_t *iov, size_t iovcnt, uint64_t remote_addr, uct_rkey_t rkey, uct_completion_t *comp) { uct_ib_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_ib_iface_t); uct_rc_mlx5_ep_t *ep = ucs_derived_of(tl_ep, uct_rc_mlx5_ep_t); ucs_status_t status; UCT_CHECK_IOV_SIZE(iovcnt, uct_ib_iface_get_max_iov(iface), "uct_rc_mlx5_ep_get_zcopy"); UCT_CHECK_LENGTH(uct_iov_total_length(iov, iovcnt), UCT_IB_MAX_MESSAGE_SIZE, "get_zcopy"); status = uct_rc_mlx5_ep_zcopy_post(ep, MLX5_OPCODE_RDMA_READ, iov, iovcnt, 0, NULL, 0, remote_addr, rkey, MLX5_WQE_CTRL_CQ_UPDATE, comp); UCT_TL_EP_STAT_OP_IF_SUCCESS(status, &ep->super.super, GET, ZCOPY, uct_iov_total_length(iov, iovcnt)); return status; }
ucs_status_t uct_rc_mlx5_ep_am_zcopy(uct_ep_h tl_ep, uint8_t id, const void *header, unsigned header_length, const uct_iov_t *iov, size_t iovcnt, uct_completion_t *comp) { uct_rc_mlx5_ep_t *ep = ucs_derived_of(tl_ep, uct_rc_mlx5_ep_t); uct_rc_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_rc_iface_t); ucs_status_t status; UCT_CHECK_IOV_SIZE(iovcnt, UCT_IB_MLX5_AM_ZCOPY_MAX_IOV, "uct_rc_mlx5_ep_am_zcopy"); UCT_RC_MLX5_CHECK_AM_ZCOPY(id, header_length, uct_iov_total_length(iov, iovcnt), iface->super.config.seg_size, 0); UCT_RC_CHECK_FC_WND(iface, &ep->super, id); status = uct_rc_mlx5_ep_zcopy_post(ep, MLX5_OPCODE_SEND, iov, iovcnt, id, header, header_length, 0, 0, 0, comp); if (ucs_likely(status >= 0)) { UCT_TL_EP_STAT_OP(&ep->super.super, AM, ZCOPY, header_length + uct_iov_total_length(iov, iovcnt)); UCT_RC_UPDATE_FC_WND(iface, &ep->super, id); } return status; }
ucs_status_t uct_rc_verbs_ep_put_zcopy(uct_ep_h tl_ep, const uct_iov_t *iov, size_t iovcnt, uint64_t remote_addr, uct_rkey_t rkey, uct_completion_t *comp) { uct_ib_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_ib_iface_t); uct_rc_verbs_ep_t *ep = ucs_derived_of(tl_ep, uct_rc_verbs_ep_t); ucs_status_t status; UCT_CHECK_IOV_SIZE(iovcnt, uct_ib_iface_get_max_iov(iface), "uct_rc_verbs_ep_put_zcopy"); status = uct_rc_verbs_ep_rdma_zcopy(ep, iov, iovcnt, remote_addr, rkey, comp, IBV_WR_RDMA_WRITE); UCT_TL_EP_STAT_OP_IF_SUCCESS(status, &ep->super.super, PUT, ZCOPY, uct_iov_total_length(iov, iovcnt)); return status; }