ucs_status_t uct_rc_mlx5_ep_am_zcopy(uct_ep_h tl_ep, uint8_t id, const void *header, unsigned header_length, const void *payload, size_t length, uct_mem_h memh, uct_completion_t *comp) { uct_rc_mlx5_ep_t *ep = ucs_derived_of(tl_ep, uct_rc_mlx5_ep_t); uct_rc_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_rc_iface_t); ucs_status_t status; UCT_CHECK_AM_ID(id); UCT_RC_CHECK_FC_WND(iface, &ep->super, id); UCT_CHECK_LENGTH(sizeof(struct mlx5_wqe_ctrl_seg) + sizeof(struct mlx5_wqe_data_seg) + sizeof(struct mlx5_wqe_inl_data_seg) + sizeof(uct_rc_hdr_t) + header_length, UCT_RC_MLX5_MAX_BB * MLX5_SEND_WQE_BB, "am zcopy"); UCT_CHECK_LENGTH(header_length + length + sizeof(uct_rc_hdr_t), ucs_derived_of(tl_ep->iface, uct_ib_iface_t)->config.seg_size, "am_zcopy"); UCT_CHECK_LENGTH(header_length + length, UCT_IB_MAX_MESSAGE_SIZE, "am_zcopy"); status = uct_rc_mlx5_ep_zcopy_post(ep, MLX5_OPCODE_SEND, payload, length, memh, id, header, header_length, 0, 0, 0, comp); if (ucs_likely(status >= 0)) { UCT_TL_EP_STAT_OP(&ep->super.super, AM, ZCOPY, header_length + length); UCT_RC_UPDATE_FC_WND(&ep->super); } return status; }
static ucs_status_t uct_ud_mlx5_ep_am_short(uct_ep_h tl_ep, uint8_t id, uint64_t hdr, const void *buffer, unsigned length) { uct_ud_mlx5_ep_t *ep = ucs_derived_of(tl_ep, uct_ud_mlx5_ep_t); uct_ud_mlx5_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_ud_mlx5_iface_t); struct mlx5_wqe_ctrl_seg *ctrl; struct mlx5_wqe_inl_data_seg *inl; uct_ud_am_short_hdr_t *am; uct_ud_neth_t *neth; unsigned wqe_size; uct_ud_send_skb_t *skb; /* data a written directly into tx wqe, so it is impossible to use * common ud am code */ UCT_CHECK_AM_ID(id); UCT_CHECK_LENGTH(sizeof(uct_ud_neth_t) + sizeof(hdr) + length, iface->super.config.max_inline, "am_short"); uct_ud_enter(&iface->super); uct_ud_iface_progress_pending_tx(&iface->super); skb = uct_ud_ep_get_tx_skb(&iface->super, &ep->super); if (!skb) { uct_ud_leave(&iface->super); return UCS_ERR_NO_RESOURCE; } ctrl = iface->tx.wq.curr; /* Set inline segment which has AM id, AM header, and AM payload */ inl = uct_ib_mlx5_get_next_seg(&iface->tx.wq, ctrl, UCT_UD_MLX5_WQE_SIZE); wqe_size = length + sizeof(*am) + sizeof(*neth); inl->byte_count = htonl(wqe_size | MLX5_INLINE_SEG); /* assume that neth and am header fit into one bb */ ucs_assert(sizeof(*am) + sizeof(*neth) < MLX5_SEND_WQE_BB); neth = (void*)(inl + 1); uct_ud_am_set_neth(neth, &ep->super, id); am = (void*)(neth + 1); am->hdr = hdr; uct_ib_mlx5_inline_copy(am + 1, buffer, length, &iface->tx.wq); wqe_size += UCT_UD_MLX5_WQE_SIZE + sizeof(*inl); UCT_CHECK_LENGTH(wqe_size, UCT_IB_MLX5_MAX_BB * MLX5_SEND_WQE_BB, "am_short"); UCT_UD_EP_HOOK_CALL_TX(&ep->super, neth); uct_ud_mlx5_post_send(iface, ep, ctrl, wqe_size); skb->len = sizeof(*neth) + sizeof(*am); memcpy(skb->neth, neth, skb->len); uct_ud_iface_complete_tx_inl(&iface->super, &ep->super, skb, (char *)skb->neth + skb->len, buffer, length); UCT_TL_EP_STAT_OP(&ep->super.super, AM, SHORT, sizeof(hdr) + length); uct_ud_leave(&iface->super); return UCS_OK; }
ucs_status_t uct_rc_verbs_ep_am_zcopy(uct_ep_h tl_ep, uint8_t id, const void *header, unsigned header_length, const void *payload, size_t length, uct_mem_h memh, uct_completion_t *comp) { uct_rc_verbs_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_rc_verbs_iface_t); uct_rc_verbs_ep_t *ep = ucs_derived_of(tl_ep, uct_rc_verbs_ep_t); struct ibv_mr *mr = memh; uct_rc_iface_send_desc_t *desc; struct ibv_send_wr wr; struct ibv_sge sge[2]; uct_rc_hdr_t *rch; int send_flags; UCT_CHECK_AM_ID(id); UCT_CHECK_LENGTH(sizeof(*rch) + header_length, iface->config.short_desc_size, "am_zcopy header"); UCT_CHECK_LENGTH(header_length + length, iface->super.super.config.seg_size, "am_zcopy payload"); UCT_RC_VERBS_CHECK_RES(iface, ep); UCT_RC_IFACE_GET_TX_DESC(&iface->super, iface->short_desc_mp, desc); if (comp == NULL) { desc->super.handler = (uct_rc_send_handler_t)ucs_mpool_put; send_flags = 0; } else { desc->super.handler = uct_rc_verbs_ep_am_zcopy_handler; desc->super.user_comp = comp; send_flags = IBV_SEND_SIGNALED; } /* Header buffer: active message ID + user header */ rch = (void*)(desc + 1); rch->am_id = id; memcpy(rch + 1, header, header_length); wr.sg_list = sge; wr.opcode = IBV_WR_SEND; sge[0].length = sizeof(*rch) + header_length; if (ucs_unlikely(length == 0)) { wr.num_sge = 1; } else { wr.num_sge = 2; sge[1].addr = (uintptr_t)payload; sge[1].length = length; sge[1].lkey = (mr == UCT_INVALID_MEM_HANDLE) ? 0 : mr->lkey; } UCT_TL_EP_STAT_OP(&ep->super.super, AM, ZCOPY, header_length + length); uct_rc_verbs_ep_post_send_desc(ep, &wr, desc, send_flags); return UCS_INPROGRESS; }
static ucs_status_t uct_ud_verbs_ep_am_bcopy(uct_ep_h tl_ep, uint8_t id, uct_pack_callback_t pack_cb, void *arg, size_t length) { uct_ud_verbs_ep_t *ep = ucs_derived_of(tl_ep, uct_ud_verbs_ep_t); uct_ud_verbs_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_ud_verbs_iface_t); uct_ud_send_skb_t *skb; char *data; ucs_status_t status; UCT_CHECK_LENGTH(sizeof(uct_ud_neth_t) + length, 4096 /* TODO */, "am_bcopy"); status = uct_ud_verbs_am_common(iface, ep, id, &skb); if (status != UCS_OK) { return status; } data = (char *)(skb->neth+1); pack_cb(data, arg, length); iface->tx.sge[0].lkey = skb->lkey; skb->len = iface->tx.sge[0].length = sizeof(uct_ud_neth_t) + length; uct_ud_verbs_iface_tx_data(iface, ep); ucs_trace_data("TX(iface=%p): AM_BCOPY [%d] skb=%p buf=%p len=%u", iface, id, skb, arg, (int)length); uct_ud_iface_complete_tx_skb(&iface->super, &ep->super, skb); return UCS_OK; }
ucs_status_t uct_rc_verbs_ep_am_bcopy(uct_ep_h tl_ep, uint8_t id, uct_pack_callback_t pack_cb, void *arg, size_t length) { uct_rc_verbs_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_rc_verbs_iface_t); uct_rc_verbs_ep_t *ep = ucs_derived_of(tl_ep, uct_rc_verbs_ep_t); uct_rc_iface_send_desc_t *desc; struct ibv_send_wr wr; struct ibv_sge sge; uct_rc_hdr_t *rch; UCT_CHECK_AM_ID(id); UCT_CHECK_LENGTH(sizeof(*rch) + length, iface->super.super.config.seg_size, "am_bcopy"); UCT_RC_VERBS_CHECK_RES(iface, ep); UCT_RC_IFACE_GET_TX_DESC(&iface->super, iface->super.tx.mp, desc); desc->super.handler = (uct_rc_send_handler_t)ucs_mpool_put; rch = (void*)(desc + 1); rch->am_id = id; pack_cb(rch + 1, arg, length); wr.sg_list = &sge; wr.num_sge = 1; wr.opcode = IBV_WR_SEND; sge.length = sizeof(*rch) + length; UCT_TL_EP_STAT_OP(&ep->super.super, AM, BCOPY, length); uct_rc_verbs_ep_post_send_desc(ep, &wr, desc, 0); return UCS_OK; }
ucs_status_t uct_rc_mlx5_ep_am_bcopy(uct_ep_h tl_ep, uint8_t id, uct_pack_callback_t pack_cb, void *arg, size_t length) { uct_rc_mlx5_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_rc_mlx5_iface_t); uct_rc_mlx5_ep_t *ep = ucs_derived_of(tl_ep, uct_rc_mlx5_ep_t); uct_rc_iface_send_desc_t *desc; ucs_status_t status; uct_rc_hdr_t *rch; UCT_CHECK_AM_ID(id); UCT_CHECK_LENGTH(sizeof(*rch) + length, iface->super.super.config.seg_size, "am_bcopy"); UCT_RC_MLX5_CHECK_RES(iface, ep); UCT_RC_IFACE_GET_TX_DESC(&iface->super, iface->super.tx.mp, desc); desc->super.handler = (uct_rc_send_handler_t)ucs_mpool_put; rch = (void*)(desc + 1); rch->am_id = id; pack_cb(rch + 1, arg, length); status = uct_rc_mlx5_ep_bcopy_post(ep, MLX5_OPCODE_SEND|UCT_RC_MLX5_OPCODE_FLAG_RAW, sizeof(*rch) + length, 0, NULL, 0, 0, 0, 0, desc, UCS_OK); UCT_TL_EP_STAT_OP_IF_SUCCESS(status, &ep->super.super, AM, BCOPY, length); return status; }
ucs_status_t uct_dc_mlx5_ep_tag_eager_short(uct_ep_h tl_ep, uct_tag_t tag, const void *data, size_t length) { #if HAVE_IBV_EXP_DM uct_dc_mlx5_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_dc_mlx5_iface_t); uct_dc_mlx5_ep_t *ep = ucs_derived_of(tl_ep, uct_dc_mlx5_ep_t); uct_rc_mlx5_dm_copy_data_t cache; ucs_status_t status; if (ucs_likely((sizeof(struct ibv_exp_tmh) + length <= UCT_IB_MLX5_AM_MAX_SHORT(UCT_IB_MLX5_AV_FULL_SIZE)) || !iface->super.dm.dm)) { #endif return uct_dc_mlx5_ep_tag_eager_short_inline(tl_ep, tag, data, length); #if HAVE_IBV_EXP_DM } UCT_CHECK_LENGTH(length + sizeof(struct ibv_exp_tmh), 0, iface->super.dm.seg_len, "tag_short"); UCT_DC_MLX5_CHECK_RES(iface, ep); uct_rc_mlx5_fill_tmh(ucs_unaligned_ptr(&cache.tm_hdr), tag, 0, IBV_EXP_TMH_EAGER); status = uct_dc_mlx5_ep_short_dm(ep, &cache, sizeof(cache.tm_hdr), data, length, MLX5_OPCODE_SEND, MLX5_WQE_CTRL_SOLICITED | MLX5_WQE_CTRL_CQ_UPDATE, 0, 0); if (!UCS_STATUS_IS_ERR(status)) { UCT_TL_EP_STAT_OP(&ep->super, TAG, SHORT, length); } return status; #endif }
ucs_status_t uct_dc_mlx5_ep_tag_rndv_request(uct_ep_h tl_ep, uct_tag_t tag, const void* header, unsigned header_length, unsigned flags) { uct_dc_mlx5_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_dc_mlx5_iface_t); uct_dc_mlx5_ep_t *ep = ucs_derived_of(tl_ep, uct_dc_mlx5_ep_t); UCT_DC_MLX5_TXQP_DECL(txqp, txwq); UCT_CHECK_LENGTH(header_length + sizeof(struct ibv_exp_tmh), 0, UCT_IB_MLX5_AM_MAX_SHORT(UCT_IB_MLX5_AV_FULL_SIZE), "tag_rndv_request"); UCT_DC_MLX5_CHECK_RES(iface, ep); UCT_DC_MLX5_IFACE_TXQP_GET(iface, ep, txqp, txwq); uct_rc_mlx5_txqp_tag_inline_post(&iface->super, UCT_IB_QPT_DCI, txqp, txwq, MLX5_OPCODE_SEND_IMM, header, header_length, NULL, tag, 0, IBV_EXP_TMH_EAGER, 0, &ep->av, uct_dc_mlx5_ep_get_grh(ep), uct_ib_mlx5_wqe_av_size(&ep->av), NULL, 0, MLX5_WQE_CTRL_SOLICITED); return UCS_OK; }
ucs_status_t uct_ugni_ep_put_zcopy(uct_ep_h tl_ep, const uct_iov_t *iov, size_t iovcnt, uint64_t remote_addr, uct_rkey_t rkey, uct_completion_t *comp) { uct_ugni_ep_t *ep = ucs_derived_of(tl_ep, uct_ugni_ep_t); uct_ugni_rdma_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_ugni_rdma_iface_t); uct_ugni_base_desc_t *rdma; UCT_CHECK_PARAM_IOV(iov, iovcnt, buffer, length, memh); UCT_SKIP_ZERO_LENGTH(length); UCT_CHECK_LENGTH(length, 0, iface->config.rdma_max_size, "put_zcopy"); UCT_TL_IFACE_GET_TX_DESC(&iface->super.super, &iface->free_desc, rdma, return UCS_ERR_NO_RESOURCE); /* Setup Callback */ uct_ugni_format_rdma(rdma, GNI_POST_RDMA_PUT, buffer, remote_addr, memh, rkey, length, ep, iface->super.local_cq, comp); ucs_trace_data("Posting PUT ZCOPY, GNI_PostRdma of size %"PRIx64" from %p to %p, with [%"PRIx64" %"PRIx64"]", rdma->desc.length, (void *)rdma->desc.local_addr, (void *)rdma->desc.remote_addr, rdma->desc.remote_mem_hndl.qword1, rdma->desc.remote_mem_hndl.qword2); UCT_TL_EP_STAT_OP(ucs_derived_of(tl_ep, uct_base_ep_t), PUT, ZCOPY, length); return uct_ugni_post_rdma(iface, ep, rdma); }
static ucs_status_t UCS_F_ALWAYS_INLINE uct_dc_mlx5_ep_tag_eager_short_inline(uct_ep_h tl_ep, uct_tag_t tag, const void *data, size_t length) { uct_dc_mlx5_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_dc_mlx5_iface_t); uct_dc_mlx5_ep_t *ep = ucs_derived_of(tl_ep, uct_dc_mlx5_ep_t); UCT_DC_MLX5_TXQP_DECL(txqp, txwq); UCT_CHECK_LENGTH(length + sizeof(struct ibv_exp_tmh), 0, UCT_IB_MLX5_AM_MAX_SHORT(UCT_IB_MLX5_AV_FULL_SIZE), "uct_dc_mlx5_ep_tag_short"); UCT_DC_MLX5_CHECK_RES(iface, ep); UCT_DC_MLX5_IFACE_TXQP_GET(iface, ep, txqp, txwq); uct_rc_mlx5_txqp_tag_inline_post(&iface->super, UCT_IB_QPT_DCI, txqp, txwq, MLX5_OPCODE_SEND, data, length, NULL, tag, 0, IBV_EXP_TMH_EAGER, 0, &ep->av, uct_dc_mlx5_ep_get_grh(ep), uct_ib_mlx5_wqe_av_size(&ep->av), NULL, 0, MLX5_WQE_CTRL_SOLICITED); UCT_TL_EP_STAT_OP(&ep->super, TAG, SHORT, length); return UCS_OK; }
static ucs_status_t uct_ud_verbs_ep_am_short(uct_ep_h tl_ep, uint8_t id, uint64_t hdr, const void *buffer, unsigned length) { uct_ud_verbs_ep_t *ep = ucs_derived_of(tl_ep, uct_ud_verbs_ep_t); uct_ud_verbs_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_ud_verbs_iface_t); uct_ud_send_skb_t *skb; uct_ud_am_short_hdr_t *am_hdr; ucs_status_t status; status = uct_ud_verbs_am_common(iface, ep, id, &skb); if (status != UCS_OK) { return status; } UCT_CHECK_LENGTH(sizeof(uct_ud_neth_t) + sizeof(hdr) + length, iface->super.config.max_inline, "am_short"); am_hdr = (uct_ud_am_short_hdr_t *)(skb->neth+1); am_hdr->hdr = hdr; iface->tx.sge[0].length = sizeof(uct_ud_neth_t) + sizeof(*am_hdr); uct_ud_verbs_iface_tx_inl(iface, ep, buffer, length); ucs_trace_data("TX: AM [%d] buf=%p len=%u", id, buffer, length); skb->len = iface->tx.sge[0].length; uct_ud_iface_complete_tx_inl(&iface->super, &ep->super, skb, am_hdr+1, buffer, length); return UCS_OK; }
/* For RNDV request send regular eager packet with IBV_SEND_WITH_IMM and * imm_value = 0. Receiver will handle such message as rndv request. */ ucs_status_t uct_rc_verbs_ep_tag_rndv_request(uct_ep_h tl_ep, uct_tag_t tag, const void* header, unsigned header_length) { uct_rc_verbs_ep_t *ep = ucs_derived_of(tl_ep, uct_rc_verbs_ep_t); uct_rc_verbs_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_rc_verbs_iface_t); void *tm_hdr = ucs_alloca(iface->tm.eager_hdr_size); uint32_t app_ctx; struct ibv_send_wr wr; UCT_CHECK_LENGTH(header_length + iface->tm.eager_hdr_size, 0, iface->verbs_common.config.max_inline, "tag_short"); UCT_RC_CHECK_RES(&iface->super, &ep->super); wr.sg_list = iface->verbs_common.inl_sge; wr.num_sge = 2; wr.opcode = IBV_WR_SEND_WITH_IMM; wr.next = NULL; uct_rc_verbs_tag_imm_data_pack(&(wr.imm_data), &app_ctx, 0ul); uct_rc_verbs_iface_fill_inl_tag_sge(iface, tm_hdr, tag, header, header_length, app_ctx); uct_rc_verbs_ep_post_send(iface, ep, &wr, IBV_SEND_INLINE); return UCS_OK; }
ucs_status_t uct_rc_mlx5_ep_get_bcopy(uct_ep_h tl_ep, uct_unpack_callback_t unpack_cb, void *arg, size_t length, uint64_t remote_addr, uct_rkey_t rkey, uct_completion_t *comp) { uct_rc_mlx5_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_rc_mlx5_iface_t); uct_rc_mlx5_ep_t *ep = ucs_derived_of(tl_ep, uct_rc_mlx5_ep_t); uct_rc_iface_send_desc_t *desc; ucs_status_t status; UCT_CHECK_LENGTH(length, iface->super.super.config.seg_size, "get_bcopy"); UCT_RC_MLX5_CHECK_RES(iface, ep); UCT_RC_IFACE_GET_TX_DESC(&iface->super, iface->super.tx.mp, desc); desc->super.handler = (comp == NULL) ? uct_rc_ep_get_bcopy_handler_no_completion : uct_rc_ep_get_bcopy_handler; desc->super.unpack_arg = arg; desc->super.user_comp = comp; desc->super.length = length; desc->unpack_cb = unpack_cb; status = uct_rc_mlx5_ep_bcopy_post(ep, MLX5_OPCODE_RDMA_READ, length, 0, NULL, 0, remote_addr, rkey, MLX5_WQE_CTRL_CQ_UPDATE, desc, UCS_INPROGRESS); UCT_TL_EP_STAT_OP_IF_SUCCESS(status, &ep->super.super, GET, BCOPY, length); return status; }
ucs_status_t uct_ugni_ep_get_bcopy(uct_ep_h tl_ep, uct_unpack_callback_t unpack_cb, void *arg, size_t length, uint64_t remote_addr, uct_rkey_t rkey, uct_completion_t *comp) { uct_ugni_ep_t *ep = ucs_derived_of(tl_ep, uct_ugni_ep_t); uct_ugni_rdma_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_ugni_rdma_iface_t); uct_ugni_rdma_fetch_desc_t *fma; UCT_SKIP_ZERO_LENGTH(length); UCT_CHECK_LENGTH(ucs_align_up_pow2(length, UGNI_GET_ALIGN), 0, iface->config.fma_seg_size, "get_bcopy"); UCT_TL_IFACE_GET_TX_DESC(&iface->super.super, &iface->free_desc_get_buffer, fma, return UCS_ERR_NO_RESOURCE); uct_ugni_format_get_fma(fma, GNI_POST_FMA_GET, remote_addr, rkey, length, ep, comp, uct_ugni_unalign_fma_get_cb, unpack_cb, arg); ucs_trace_data("Posting GET BCOPY, GNI_PostFma of size %"PRIx64" (%lu) from %p to " "%p, with [%"PRIx64" %"PRIx64"]", fma->super.desc.length, length, (void *)fma->super.desc.local_addr, (void *)fma->super.desc.remote_addr, fma->super.desc.remote_mem_hndl.qword1, fma->super.desc.remote_mem_hndl.qword2); UCT_TL_EP_STAT_OP(ucs_derived_of(tl_ep, uct_base_ep_t), GET, BCOPY, length); return uct_ugni_post_fma(iface, ep, &fma->super, UCS_INPROGRESS); }
ucs_status_t uct_rc_verbs_ep_get_bcopy(uct_ep_h tl_ep, uct_unpack_callback_t unpack_cb, void *arg, size_t length, uint64_t remote_addr, uct_rkey_t rkey, uct_completion_t *comp) { uct_rc_verbs_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_rc_verbs_iface_t); uct_rc_verbs_ep_t *ep = ucs_derived_of(tl_ep, uct_rc_verbs_ep_t); uct_rc_iface_send_desc_t *desc; struct ibv_send_wr wr; struct ibv_sge sge; UCT_CHECK_LENGTH(length, iface->super.super.config.seg_size, "get_bcopy"); UCT_RC_VERBS_CHECK_RES(iface, ep); UCT_RC_IFACE_GET_TX_DESC(&iface->super, iface->super.tx.mp, desc); ucs_assert(length <= iface->super.super.config.seg_size); desc->super.handler = (comp == NULL) ? uct_rc_ep_get_bcopy_handler_no_completion : uct_rc_ep_get_bcopy_handler; desc->super.unpack_arg = arg; desc->super.user_comp = comp; desc->super.length = length; desc->unpack_cb = unpack_cb; uct_rc_verbs_fill_rdma_wr(&wr, IBV_WR_RDMA_READ, &sge, length, remote_addr, rkey); UCT_TL_EP_STAT_OP(&ep->super.super, GET, BCOPY, length); uct_rc_verbs_ep_post_send_desc(ep, &wr, desc, IBV_SEND_SIGNALED); return UCS_INPROGRESS; }
ucs_status_t uct_ugni_smsg_ep_am_short(uct_ep_h tl_ep, uint8_t id, uint64_t header, const void *payload, unsigned length) { uct_ugni_smsg_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_ugni_smsg_iface_t); uct_ugni_smsg_ep_t *ep = ucs_derived_of(tl_ep, uct_ugni_smsg_ep_t); uct_ugni_smsg_header_t *smsg_header; uint64_t *header_data; uct_ugni_smsg_desc_t *desc; UCT_CHECK_AM_ID(id); UCT_CHECK_LENGTH(length, iface->config.smsg_seg_size - (sizeof(smsg_header) + sizeof(header)), "am_short"); UCT_TL_IFACE_GET_TX_DESC(&iface->super.super, &iface->free_desc, desc, return UCS_ERR_NO_RESOURCE); ucs_trace_data("AM_SHORT [%p] am_id: %d buf=%p length=%u", iface, id, payload, length); smsg_header = (uct_ugni_smsg_header_t *)(desc+1); smsg_header->length = length + sizeof(header); header_data = (uint64_t*)(smsg_header+1); *header_data = header; memcpy((void*)(header_data+1), payload, length); uct_iface_trace_am(&iface->super.super, UCT_AM_TRACE_TYPE_SEND, id, header_data, length, "TX: AM_SHORT"); return uct_ugni_smsg_ep_am_common_send(ep, iface, id, sizeof(uct_ugni_smsg_header_t), smsg_header, smsg_header->length, (void*)header_data, desc); }
ssize_t uct_ugni_ep_put_bcopy(uct_ep_h tl_ep, uct_pack_callback_t pack_cb, void *arg, uint64_t remote_addr, uct_rkey_t rkey) { /* Since custom pack function is used * we have to allocate separate memory to pack * the info and pass it to FMA * something like: * pack_cb(desc + 1, arg, length); */ uct_ugni_ep_t *ep = ucs_derived_of(tl_ep, uct_ugni_ep_t); uct_ugni_rdma_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_ugni_rdma_iface_t); uct_ugni_base_desc_t *fma; size_t length; UCT_TL_IFACE_GET_TX_DESC(&iface->super.super, &iface->free_desc_buffer, fma, return UCS_ERR_NO_RESOURCE); length = pack_cb(fma + 1, arg); UCT_SKIP_ZERO_LENGTH(length, fma); UCT_CHECK_LENGTH(length, 0, iface->config.fma_seg_size, "put_bcopy"); uct_ugni_format_fma(fma, GNI_POST_FMA_PUT, fma + 1, remote_addr, rkey, length, ep, NULL, NULL); ucs_trace_data("Posting PUT BCOPY, GNI_PostFma of size %"PRIx64" from %p to " "%p, with [%"PRIx64" %"PRIx64"]", fma->desc.length, (void *)fma->desc.local_addr, (void *)fma->desc.remote_addr, fma->desc.remote_mem_hndl.qword1, fma->desc.remote_mem_hndl.qword2); UCT_TL_EP_STAT_OP(ucs_derived_of(tl_ep, uct_base_ep_t), PUT, BCOPY, length); return uct_ugni_post_fma(iface, ep, fma, length); }
ucs_status_t uct_dc_mlx5_ep_put_short(uct_ep_h tl_ep, const void *payload, unsigned length, uint64_t remote_addr, uct_rkey_t rkey) { #if HAVE_IBV_EXP_DM uct_dc_mlx5_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_dc_mlx5_iface_t); uct_dc_mlx5_ep_t *ep = ucs_derived_of(tl_ep, uct_dc_mlx5_ep_t); ucs_status_t status; if (ucs_likely((length <= UCT_IB_MLX5_PUT_MAX_SHORT(UCT_IB_MLX5_AV_FULL_SIZE)) || !iface->super.dm.dm)) { #endif return uct_dc_mlx5_ep_put_short_inline(tl_ep, payload, length, remote_addr, rkey); #if HAVE_IBV_EXP_DM } UCT_CHECK_LENGTH(length, 0, iface->super.dm.seg_len, "put_short"); UCT_DC_MLX5_CHECK_RES(iface, ep); status = uct_dc_mlx5_ep_short_dm(ep, NULL, 0, payload, length, MLX5_OPCODE_RDMA_WRITE, MLX5_WQE_CTRL_CQ_UPDATE, remote_addr, rkey); if (UCS_STATUS_IS_ERR(status)) { return status; } UCT_TL_EP_STAT_OP(&ep->super, PUT, SHORT, length); return UCS_OK; #endif }
static ucs_status_t uct_ud_mlx5_ep_put_short(uct_ep_h tl_ep, const void *buffer, unsigned length, uint64_t remote_addr, uct_rkey_t rkey) { uct_ud_mlx5_ep_t *ep = ucs_derived_of(tl_ep, uct_ud_mlx5_ep_t); uct_ud_mlx5_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_ud_mlx5_iface_t); struct mlx5_wqe_ctrl_seg *ctrl; struct mlx5_wqe_inl_data_seg *inl; unsigned wqe_size; uct_ud_put_hdr_t *put_hdr; uct_ud_neth_t *neth; uct_ud_send_skb_t *skb; uct_ud_enter(&iface->super); uct_ud_iface_progress_pending_tx(&iface->super); skb = uct_ud_ep_get_tx_skb(&iface->super, &ep->super); if (!skb) { uct_ud_leave(&iface->super); return UCS_ERR_NO_RESOURCE; } ctrl = iface->tx.wq.curr; /* Set inline segment which has AM id, AM header, and AM payload */ inl = uct_ib_mlx5_get_next_seg(&iface->tx.wq, ctrl, UCT_UD_MLX5_WQE_SIZE); wqe_size = length + sizeof(*put_hdr) + sizeof(*neth); inl->byte_count = htonl(wqe_size | MLX5_INLINE_SEG); /* assume that neth and am header fit into one bb */ ucs_assert(sizeof(*put_hdr) + sizeof(*neth) < MLX5_SEND_WQE_BB); neth = (void*)(inl + 1); uct_ud_neth_init_data(&ep->super, neth); uct_ud_neth_set_type_put(&ep->super, neth); uct_ud_neth_ack_req(&ep->super, neth); put_hdr = (uct_ud_put_hdr_t *)(neth+1); put_hdr->rva = remote_addr; uct_ib_mlx5_inline_copy(put_hdr + 1, buffer, length, &iface->tx.wq); wqe_size += UCT_UD_MLX5_WQE_SIZE + sizeof(*inl); UCT_CHECK_LENGTH(wqe_size, UCT_IB_MLX5_MAX_BB * MLX5_SEND_WQE_BB, "put_short"); UCT_UD_EP_HOOK_CALL_TX(&ep->super, neth); uct_ud_mlx5_post_send(iface, ep, ctrl, wqe_size); skb->len = sizeof(*neth) + sizeof(*put_hdr); memcpy(skb->neth, neth, skb->len); uct_ud_iface_complete_tx_inl(&iface->super, &ep->super, skb, (char *)skb->neth + skb->len, buffer, length); UCT_TL_EP_STAT_OP(&ep->super.super, PUT, SHORT, length); uct_ud_leave(&iface->super); return UCS_OK; }
ucs_status_t uct_mm_ep_am_short(uct_ep_h tl_ep, uint8_t id, uint64_t header, const void *payload, unsigned length) { uct_mm_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_mm_iface_t); uct_mm_ep_t *ep = ucs_derived_of(tl_ep, uct_mm_ep_t); UCT_CHECK_LENGTH(length + sizeof(header), iface->config.fifo_elem_size - sizeof(uct_mm_fifo_element_t), "am_short"); return uct_mm_ep_am_common_send(UCT_MM_AM_SHORT, ep, iface, id, length, header, payload, NULL, NULL); }
ucs_status_t uct_ugni_udt_ep_am_short(uct_ep_h tl_ep, uint8_t id, uint64_t header, const void *payload, unsigned length) { uct_ugni_udt_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_ugni_udt_iface_t); uct_ugni_udt_ep_t *ep = ucs_derived_of(tl_ep, uct_ugni_udt_ep_t); UCT_CHECK_LENGTH(length, iface->config.udt_seg_size - sizeof(header) - sizeof(uct_ugni_udt_header_t), "am_short"); ucs_trace_data("AM_SHORT [%p] am_id: %d buf=%p length=%u", iface, id, payload, length); return uct_ugni_udt_ep_am_common_send(UCT_UGNI_UDT_AM_SHORT, ep, iface, id, length, header, payload, NULL, NULL); }
ucs_status_t uct_rc_mlx5_ep_get_zcopy(uct_ep_h tl_ep, void *buffer, size_t length, uct_mem_h memh, uint64_t remote_addr, uct_rkey_t rkey, uct_completion_t *comp) { uct_rc_mlx5_ep_t *ep = ucs_derived_of(tl_ep, uct_rc_mlx5_ep_t); ucs_status_t status; UCT_CHECK_LENGTH(length, UCT_IB_MAX_MESSAGE_SIZE, "get_zcopy"); status = uct_rc_mlx5_ep_zcopy_post(ep, MLX5_OPCODE_RDMA_READ, buffer, length, memh, 0, NULL, 0, remote_addr, rkey, MLX5_WQE_CTRL_CQ_UPDATE, comp); UCT_TL_EP_STAT_OP_IF_SUCCESS(status, &ep->super.super, GET, ZCOPY, length); return status; }
ucs_status_t uct_rc_verbs_ep_put_short(uct_ep_h tl_ep, const void *buffer, unsigned length, uint64_t remote_addr, uct_rkey_t rkey) { uct_rc_verbs_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_rc_verbs_iface_t); uct_rc_verbs_ep_t *ep = ucs_derived_of(tl_ep, uct_rc_verbs_ep_t); UCT_CHECK_LENGTH(length, iface->verbs_common.config.max_inline, "put_short"); UCT_RC_CHECK_RES(&iface->super, &ep->super); UCT_RC_VERBS_FILL_INL_PUT_WR(iface, remote_addr, rkey, buffer, length); UCT_TL_EP_STAT_OP(&ep->super.super, PUT, SHORT, length); uct_rc_verbs_ep_post_send(iface, ep, &iface->inl_rwrite_wr, IBV_SEND_INLINE | IBV_SEND_SIGNALED); return UCS_OK; }
ucs_status_t uct_rc_verbs_ep_tag_eager_short(uct_ep_h tl_ep, uct_tag_t tag, const void *data, size_t length) { uct_rc_verbs_ep_t *ep = ucs_derived_of(tl_ep, uct_rc_verbs_ep_t); uct_rc_verbs_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_rc_verbs_iface_t); void *tm_hdr = ucs_alloca(iface->tm.eager_hdr_size); UCT_CHECK_LENGTH(length + iface->tm.eager_hdr_size, 0, iface->verbs_common.config.max_inline, "tag_short"); UCT_RC_CHECK_RES(&iface->super, &ep->super); uct_rc_verbs_iface_fill_inl_tag_sge(iface, tm_hdr, tag, data, length, 0); uct_rc_verbs_ep_post_send(iface, ep, &iface->inl_am_wr, IBV_SEND_INLINE); return UCS_OK; }
ucs_status_t uct_rc_verbs_ep_put_short(uct_ep_h tl_ep, const void *buffer, unsigned length, uint64_t remote_addr, uct_rkey_t rkey) { uct_rc_verbs_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_rc_verbs_iface_t); uct_rc_verbs_ep_t *ep = ucs_derived_of(tl_ep, uct_rc_verbs_ep_t); UCT_CHECK_LENGTH(length, iface->config.max_inline, "put_short"); UCT_RC_VERBS_CHECK_RES(iface, ep); iface->inl_rwrite_wr.wr.rdma.remote_addr = remote_addr; iface->inl_rwrite_wr.wr.rdma.rkey = rkey; iface->inl_sge[0].addr = (uintptr_t)buffer; iface->inl_sge[0].length = length; UCT_TL_EP_STAT_OP(&ep->super.super, PUT, SHORT, length); uct_rc_verbs_ep_post_send(iface, ep, &iface->inl_rwrite_wr, IBV_SEND_INLINE | IBV_SEND_SIGNALED); return UCS_OK; }
ucs_status_t uct_dc_mlx5_ep_get_bcopy(uct_ep_h tl_ep, uct_unpack_callback_t unpack_cb, void *arg, size_t length, uint64_t remote_addr, uct_rkey_t rkey, uct_completion_t *comp) { uct_dc_mlx5_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_dc_mlx5_iface_t); uct_dc_mlx5_ep_t *ep = ucs_derived_of(tl_ep, uct_dc_mlx5_ep_t); uct_rc_iface_send_desc_t *desc; UCT_CHECK_LENGTH(length, 0, iface->super.super.super.config.seg_size, "get_bcopy"); UCT_DC_MLX5_CHECK_RES(iface, ep); UCT_RC_IFACE_GET_TX_GET_BCOPY_DESC(&iface->super.super, &iface->super.super.tx.mp, desc, unpack_cb, comp, arg, length); uct_dc_mlx5_iface_bcopy_post(iface, ep, MLX5_OPCODE_RDMA_READ, length, remote_addr, rkey, desc, 0, 0, desc + 1, NULL); UCT_TL_EP_STAT_OP(&ep->super, GET, BCOPY, length); return UCS_INPROGRESS; }
ucs_status_t uct_dc_mlx5_ep_get_zcopy(uct_ep_h tl_ep, const uct_iov_t *iov, size_t iovcnt, uint64_t remote_addr, uct_rkey_t rkey, uct_completion_t *comp) { uct_dc_mlx5_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_dc_mlx5_iface_t); uct_dc_mlx5_ep_t *ep = ucs_derived_of(tl_ep, uct_dc_mlx5_ep_t); UCT_CHECK_IOV_SIZE(iovcnt, uct_ib_iface_get_max_iov(&iface->super.super.super), "uct_dc_mlx5_ep_get_zcopy"); UCT_CHECK_LENGTH(uct_iov_total_length(iov, iovcnt), UCT_IB_MAX_MESSAGE_SIZE, "get_zcopy"); UCT_DC_CHECK_RES(&iface->super, &ep->super); uct_dc_mlx5_iface_zcopy_post(iface, ep, MLX5_OPCODE_RDMA_READ, iov, iovcnt, 0, NULL, 0, remote_addr, rkey, comp); UCT_TL_EP_STAT_OP(&ep->super.super, GET, ZCOPY, uct_iov_total_length(iov, iovcnt)); return UCS_INPROGRESS; }
ucs_status_t uct_rc_mlx5_ep_get_zcopy(uct_ep_h tl_ep, const uct_iov_t *iov, size_t iovcnt, uint64_t remote_addr, uct_rkey_t rkey, uct_completion_t *comp) { uct_ib_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_ib_iface_t); uct_rc_mlx5_ep_t *ep = ucs_derived_of(tl_ep, uct_rc_mlx5_ep_t); ucs_status_t status; UCT_CHECK_IOV_SIZE(iovcnt, uct_ib_iface_get_max_iov(iface), "uct_rc_mlx5_ep_get_zcopy"); UCT_CHECK_LENGTH(uct_iov_total_length(iov, iovcnt), UCT_IB_MAX_MESSAGE_SIZE, "get_zcopy"); status = uct_rc_mlx5_ep_zcopy_post(ep, MLX5_OPCODE_RDMA_READ, iov, iovcnt, 0, NULL, 0, remote_addr, rkey, MLX5_WQE_CTRL_CQ_UPDATE, comp); UCT_TL_EP_STAT_OP_IF_SUCCESS(status, &ep->super.super, GET, ZCOPY, uct_iov_total_length(iov, iovcnt)); return status; }
ucs_status_t uct_rc_mlx5_ep_put_bcopy(uct_ep_h tl_ep, uct_pack_callback_t pack_cb, void *arg, size_t length, uint64_t remote_addr, uct_rkey_t rkey) { uct_rc_mlx5_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_rc_mlx5_iface_t); uct_rc_mlx5_ep_t *ep = ucs_derived_of(tl_ep, uct_rc_mlx5_ep_t); uct_rc_iface_send_desc_t *desc; ucs_status_t status; UCT_CHECK_LENGTH(length, iface->super.super.config.seg_size, "put_bcopy"); UCT_RC_MLX5_CHECK_RES(iface, ep); UCT_RC_IFACE_GET_TX_DESC(&iface->super, iface->super.tx.mp, desc); desc->super.handler = (uct_rc_send_handler_t)ucs_mpool_put; pack_cb(desc + 1, arg, length); status = uct_rc_mlx5_ep_bcopy_post(ep, MLX5_OPCODE_RDMA_WRITE, length, 0, NULL, 0, remote_addr, rkey, MLX5_WQE_CTRL_CQ_UPDATE, desc, UCS_OK); UCT_TL_EP_STAT_OP_IF_SUCCESS(status, &ep->super.super, PUT, BCOPY, length); return status; }
ucs_status_t uct_ugni_ep_get_zcopy(uct_ep_h tl_ep, const uct_iov_t *iov, size_t iovcnt, uint64_t remote_addr, uct_rkey_t rkey, uct_completion_t *comp) { uct_ugni_ep_t *ep = ucs_derived_of(tl_ep, uct_ugni_ep_t); uct_ugni_rdma_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_ugni_rdma_iface_t); uct_ugni_base_desc_t *rdma; UCT_CHECK_PARAM_IOV(iov, iovcnt, buffer, length, memh); UCT_SKIP_ZERO_LENGTH(length); UCT_CHECK_LENGTH(ucs_align_up_pow2(length, UGNI_GET_ALIGN), 0, iface->config.rdma_max_size, "get_zcopy"); /* Special flow for an unalign data */ if (ucs_unlikely((GNI_DEVICE_GEMINI == iface->super.dev->type && ucs_check_if_align_pow2((uintptr_t)buffer, UGNI_GET_ALIGN)) || ucs_check_if_align_pow2(remote_addr, UGNI_GET_ALIGN) || ucs_check_if_align_pow2(length, UGNI_GET_ALIGN))) { return uct_ugni_ep_get_composed(tl_ep, buffer, length, memh, remote_addr, rkey, comp); } /* Everything is perfectly aligned */ UCT_TL_IFACE_GET_TX_DESC(&iface->super.super, &iface->free_desc, rdma, return UCS_ERR_NO_RESOURCE); /* Setup Callback */ uct_ugni_format_rdma(rdma, GNI_POST_RDMA_GET, buffer, remote_addr, memh, rkey, ucs_align_up_pow2(length, UGNI_GET_ALIGN), ep, iface->super.local_cq, comp); ucs_trace_data("Posting GET ZCOPY, GNI_PostRdma of size %"PRIx64" (%lu) " "from %p to %p, with [%"PRIx64" %"PRIx64"]", rdma->desc.length, length, (void *)rdma->desc.local_addr, (void *)rdma->desc.remote_addr, rdma->desc.remote_mem_hndl.qword1, rdma->desc.remote_mem_hndl.qword2); UCT_TL_EP_STAT_OP(ucs_derived_of(tl_ep, uct_base_ep_t), GET, ZCOPY, length); return uct_ugni_post_rdma(iface, ep, rdma); }