ssize_t uct_rc_verbs_ep_am_bcopy(uct_ep_h tl_ep, uint8_t id, uct_pack_callback_t pack_cb, void *arg, unsigned flags) { uct_rc_verbs_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_rc_verbs_iface_t); uct_rc_verbs_ep_t *ep = ucs_derived_of(tl_ep, uct_rc_verbs_ep_t); uct_rc_iface_send_desc_t *desc; struct ibv_send_wr wr; struct ibv_sge sge; size_t length; UCT_CHECK_AM_ID(id); UCT_RC_CHECK_RES(&iface->super, &ep->super); UCT_RC_CHECK_FC(&iface->super, &ep->super, id); UCT_RC_IFACE_GET_TX_AM_BCOPY_DESC(&iface->super, &iface->super.tx.mp, desc, id, uct_rc_am_hdr_fill, uct_rc_hdr_t, pack_cb, arg, &length); UCT_RC_VERBS_FILL_AM_BCOPY_WR(wr, sge, length + sizeof(uct_rc_hdr_t), wr.opcode); UCT_TL_EP_STAT_OP(&ep->super.super, AM, BCOPY, length); uct_rc_verbs_ep_post_send_desc(ep, &wr, desc, IBV_SEND_SOLICITED, INT_MAX); UCT_RC_UPDATE_FC(&iface->super, &ep->super, id); return length; }
ucs_status_t uct_rc_verbs_ep_am_zcopy(uct_ep_h tl_ep, uint8_t id, const void *header, unsigned header_length, const uct_iov_t *iov, size_t iovcnt, uct_completion_t *comp) { uct_rc_verbs_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_rc_verbs_iface_t); uct_rc_verbs_ep_t *ep = ucs_derived_of(tl_ep, uct_rc_verbs_ep_t); uct_rc_iface_send_desc_t *desc = NULL; struct ibv_sge sge[UCT_IB_MAX_IOV]; /* First sge is reserved for the header */ struct ibv_send_wr wr; int send_flags; size_t sge_cnt; UCT_CHECK_IOV_SIZE(iovcnt, uct_ib_iface_get_max_iov(&iface->super.super) - 1, "uct_rc_verbs_ep_am_zcopy"); UCT_RC_CHECK_AM_ZCOPY(id, header_length, uct_iov_total_length(iov, iovcnt), iface->verbs_common.config.short_desc_size, iface->super.super.config.seg_size); UCT_RC_CHECK_RES(&iface->super, &ep->super); UCT_RC_CHECK_FC_WND(&iface->super, &ep->super, id); UCT_RC_IFACE_GET_TX_AM_ZCOPY_DESC(&iface->super, &iface->verbs_common.short_desc_mp, desc, id, header, header_length, comp, &send_flags); sge[0].length = sizeof(uct_rc_hdr_t) + header_length; sge_cnt = uct_ib_verbs_sge_fill_iov(sge + 1, iov, iovcnt); UCT_RC_VERBS_FILL_AM_ZCOPY_WR_IOV(wr, sge, (sge_cnt + 1), wr.opcode); UCT_TL_EP_STAT_OP(&ep->super.super, AM, ZCOPY, (header_length + uct_iov_total_length(iov, iovcnt))); uct_rc_verbs_ep_post_send_desc(ep, &wr, desc, send_flags); UCT_RC_UPDATE_FC_WND(&iface->super, &ep->super, id); return UCS_INPROGRESS; }
ucs_status_t uct_rc_verbs_ep_am_bcopy(uct_ep_h tl_ep, uint8_t id, uct_pack_callback_t pack_cb, void *arg, size_t length) { uct_rc_verbs_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_rc_verbs_iface_t); uct_rc_verbs_ep_t *ep = ucs_derived_of(tl_ep, uct_rc_verbs_ep_t); uct_rc_iface_send_desc_t *desc; struct ibv_send_wr wr; struct ibv_sge sge; uct_rc_hdr_t *rch; UCT_CHECK_AM_ID(id); UCT_CHECK_LENGTH(sizeof(*rch) + length, iface->super.super.config.seg_size, "am_bcopy"); UCT_RC_VERBS_CHECK_RES(iface, ep); UCT_RC_IFACE_GET_TX_DESC(&iface->super, iface->super.tx.mp, desc); desc->super.handler = (uct_rc_send_handler_t)ucs_mpool_put; rch = (void*)(desc + 1); rch->am_id = id; pack_cb(rch + 1, arg, length); wr.sg_list = &sge; wr.num_sge = 1; wr.opcode = IBV_WR_SEND; sge.length = sizeof(*rch) + length; UCT_TL_EP_STAT_OP(&ep->super.super, AM, BCOPY, length); uct_rc_verbs_ep_post_send_desc(ep, &wr, desc, 0); return UCS_OK; }
ucs_status_t uct_rc_verbs_ep_tag_eager_zcopy(uct_ep_h tl_ep, uct_tag_t tag, uint64_t imm, const uct_iov_t *iov, size_t iovcnt, uct_completion_t *comp) { uct_rc_verbs_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_rc_verbs_iface_t); uct_rc_verbs_ep_t *ep = ucs_derived_of(tl_ep, uct_rc_verbs_ep_t); uct_rc_iface_send_desc_t *desc; struct ibv_sge sge[UCT_IB_MAX_IOV]; struct ibv_send_wr wr; int send_flags; size_t sge_cnt; uint32_t app_ctx; UCT_CHECK_IOV_SIZE(iovcnt, 1ul, "uct_rc_verbs_ep_tag_eager_zcopy"); UCT_RC_CHECK_ZCOPY_DATA(iface->tm.eager_hdr_size, uct_iov_total_length(iov, iovcnt), iface->super.super.config.seg_size); UCT_RC_CHECK_RES(&iface->super, &ep->super); sge_cnt = uct_ib_verbs_sge_fill_iov(sge + 1, iov, iovcnt); UCT_RC_VERBS_FILL_TM_IMM(wr, imm, app_ctx); UCT_RC_VERBS_GET_TM_ZCOPY_DESC(iface, &iface->verbs_common.short_desc_mp, desc, tag, app_ctx, comp, &send_flags, sge[0]); wr.num_sge = sge_cnt + 1; wr.sg_list = sge; uct_rc_verbs_ep_post_send_desc(ep, &wr, desc, send_flags); return UCS_INPROGRESS; }
ucs_status_t uct_rc_verbs_ep_get_bcopy(uct_ep_h tl_ep, uct_unpack_callback_t unpack_cb, void *arg, size_t length, uint64_t remote_addr, uct_rkey_t rkey, uct_completion_t *comp) { uct_rc_verbs_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_rc_verbs_iface_t); uct_rc_verbs_ep_t *ep = ucs_derived_of(tl_ep, uct_rc_verbs_ep_t); uct_rc_iface_send_desc_t *desc; struct ibv_send_wr wr; struct ibv_sge sge; UCT_CHECK_LENGTH(length, iface->super.super.config.seg_size, "get_bcopy"); UCT_RC_VERBS_CHECK_RES(iface, ep); UCT_RC_IFACE_GET_TX_DESC(&iface->super, iface->super.tx.mp, desc); ucs_assert(length <= iface->super.super.config.seg_size); desc->super.handler = (comp == NULL) ? uct_rc_ep_get_bcopy_handler_no_completion : uct_rc_ep_get_bcopy_handler; desc->super.unpack_arg = arg; desc->super.user_comp = comp; desc->super.length = length; desc->unpack_cb = unpack_cb; uct_rc_verbs_fill_rdma_wr(&wr, IBV_WR_RDMA_READ, &sge, length, remote_addr, rkey); UCT_TL_EP_STAT_OP(&ep->super.super, GET, BCOPY, length); uct_rc_verbs_ep_post_send_desc(ep, &wr, desc, IBV_SEND_SIGNALED); return UCS_INPROGRESS; }
ucs_status_t uct_rc_verbs_ep_am_zcopy(uct_ep_h tl_ep, uint8_t id, const void *header, unsigned header_length, const void *payload, size_t length, uct_mem_h memh, uct_completion_t *comp) { uct_rc_verbs_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_rc_verbs_iface_t); uct_rc_verbs_ep_t *ep = ucs_derived_of(tl_ep, uct_rc_verbs_ep_t); struct ibv_mr *mr = memh; uct_rc_iface_send_desc_t *desc; struct ibv_send_wr wr; struct ibv_sge sge[2]; uct_rc_hdr_t *rch; int send_flags; UCT_CHECK_AM_ID(id); UCT_CHECK_LENGTH(sizeof(*rch) + header_length, iface->config.short_desc_size, "am_zcopy header"); UCT_CHECK_LENGTH(header_length + length, iface->super.super.config.seg_size, "am_zcopy payload"); UCT_RC_VERBS_CHECK_RES(iface, ep); UCT_RC_IFACE_GET_TX_DESC(&iface->super, iface->short_desc_mp, desc); if (comp == NULL) { desc->super.handler = (uct_rc_send_handler_t)ucs_mpool_put; send_flags = 0; } else { desc->super.handler = uct_rc_verbs_ep_am_zcopy_handler; desc->super.user_comp = comp; send_flags = IBV_SEND_SIGNALED; } /* Header buffer: active message ID + user header */ rch = (void*)(desc + 1); rch->am_id = id; memcpy(rch + 1, header, header_length); wr.sg_list = sge; wr.opcode = IBV_WR_SEND; sge[0].length = sizeof(*rch) + header_length; if (ucs_unlikely(length == 0)) { wr.num_sge = 1; } else { wr.num_sge = 2; sge[1].addr = (uintptr_t)payload; sge[1].length = length; sge[1].lkey = (mr == UCT_INVALID_MEM_HANDLE) ? 0 : mr->lkey; } UCT_TL_EP_STAT_OP(&ep->super.super, AM, ZCOPY, header_length + length); uct_rc_verbs_ep_post_send_desc(ep, &wr, desc, send_flags); return UCS_INPROGRESS; }
static UCS_F_ALWAYS_INLINE void uct_rc_verbs_ep_atomic_post(uct_rc_verbs_ep_t *ep, int opcode, uint64_t compare_add, uint64_t swap, uint64_t remote_addr, uct_rkey_t rkey, uct_rc_iface_send_desc_t *desc, int force_sig) { struct ibv_send_wr wr; struct ibv_sge sge; UCT_RC_VERBS_FILL_ATOMIC_WR(wr, wr.opcode, sge, opcode, compare_add, swap, remote_addr, rkey, ep->super.umr_offset); UCT_TL_EP_STAT_ATOMIC(&ep->super.super); uct_rc_verbs_ep_post_send_desc(ep, &wr, desc, force_sig); }
ssize_t uct_rc_verbs_ep_put_bcopy(uct_ep_h tl_ep, uct_pack_callback_t pack_cb, void *arg, uint64_t remote_addr, uct_rkey_t rkey) { uct_rc_verbs_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_rc_verbs_iface_t); uct_rc_verbs_ep_t *ep = ucs_derived_of(tl_ep, uct_rc_verbs_ep_t); uct_rc_iface_send_desc_t *desc; struct ibv_send_wr wr; struct ibv_sge sge; size_t length; UCT_RC_CHECK_RES(&iface->super, &ep->super); UCT_RC_IFACE_GET_TX_PUT_BCOPY_DESC(&iface->super, &iface->super.tx.mp, desc, pack_cb, arg, length); UCT_RC_VERBS_FILL_RDMA_WR(wr, wr.opcode, IBV_WR_RDMA_WRITE, sge, length, remote_addr, rkey); UCT_TL_EP_STAT_OP(&ep->super.super, PUT, BCOPY, length); uct_rc_verbs_ep_post_send_desc(ep, &wr, desc, IBV_SEND_SIGNALED); return length; }
static UCS_F_ALWAYS_INLINE void uct_rc_verbs_ep_atomic_post(uct_rc_verbs_ep_t *ep, int opcode, uint64_t compare_add, uint64_t swap, uint64_t remote_addr, uct_rkey_t rkey, uct_rc_iface_send_desc_t *desc, int force_sig) { struct ibv_send_wr wr; struct ibv_sge sge; wr.sg_list = &sge; wr.num_sge = 1; wr.opcode = opcode; wr.wr.atomic.compare_add = compare_add; wr.wr.atomic.swap = swap; wr.wr.atomic.remote_addr = remote_addr; wr.wr.atomic.rkey = rkey; sge.length = sizeof(uint64_t); UCT_TL_EP_STAT_ATOMIC(&ep->super.super); uct_rc_verbs_ep_post_send_desc(ep, &wr, desc, force_sig); }
ssize_t uct_rc_verbs_ep_tag_eager_bcopy(uct_ep_h tl_ep, uct_tag_t tag, uint64_t imm, uct_pack_callback_t pack_cb, void *arg) { uct_rc_verbs_ep_t *ep = ucs_derived_of(tl_ep, uct_rc_verbs_ep_t); uct_rc_verbs_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_rc_verbs_iface_t); uct_rc_iface_send_desc_t *desc; struct ibv_send_wr wr; struct ibv_sge sge; size_t length; uint32_t app_ctx; UCT_RC_CHECK_RES(&iface->super, &ep->super); UCT_RC_VERBS_FILL_TM_IMM(wr, imm, app_ctx); UCT_RC_VERBS_GET_TM_BCOPY_DESC(iface, &iface->super.tx.mp, desc, tag, app_ctx, pack_cb, arg, length); UCT_RC_VERBS_FILL_SGE(wr, sge, length + iface->tm.eager_hdr_size); uct_rc_verbs_ep_post_send_desc(ep, &wr, desc, 0); return length; }
ucs_status_t uct_rc_verbs_ep_get_bcopy(uct_ep_h tl_ep, uct_unpack_callback_t unpack_cb, void *arg, size_t length, uint64_t remote_addr, uct_rkey_t rkey, uct_completion_t *comp) { uct_rc_verbs_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_rc_verbs_iface_t); uct_rc_verbs_ep_t *ep = ucs_derived_of(tl_ep, uct_rc_verbs_ep_t); uct_rc_iface_send_desc_t *desc; struct ibv_send_wr wr; struct ibv_sge sge; UCT_CHECK_LENGTH(length, iface->super.super.config.seg_size, "get_bcopy"); UCT_RC_CHECK_RES(&iface->super, &ep->super); UCT_RC_IFACE_GET_TX_GET_BCOPY_DESC(&iface->super, &iface->super.tx.mp, desc, unpack_cb, comp, arg, length); UCT_RC_VERBS_FILL_RDMA_WR(wr, wr.opcode, IBV_WR_RDMA_READ, sge, length, remote_addr, rkey); UCT_TL_EP_STAT_OP(&ep->super.super, GET, BCOPY, length); uct_rc_verbs_ep_post_send_desc(ep, &wr, desc, IBV_SEND_SIGNALED); return UCS_INPROGRESS; }
ucs_status_t uct_rc_verbs_ep_put_bcopy(uct_ep_h tl_ep, uct_pack_callback_t pack_cb, void *arg, size_t length, uint64_t remote_addr, uct_rkey_t rkey) { uct_rc_verbs_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_rc_verbs_iface_t); uct_rc_verbs_ep_t *ep = ucs_derived_of(tl_ep, uct_rc_verbs_ep_t); uct_rc_iface_send_desc_t *desc; struct ibv_send_wr wr; struct ibv_sge sge; UCT_CHECK_LENGTH(length, iface->super.super.config.seg_size, "put_bcopy"); UCT_RC_VERBS_ZERO_LENGTH_POST(length); UCT_RC_VERBS_CHECK_RES(iface, ep); UCT_RC_IFACE_GET_TX_DESC(&iface->super, iface->super.tx.mp, desc); desc->super.handler = (uct_rc_send_handler_t)ucs_mpool_put; pack_cb(desc + 1, arg, length); uct_rc_verbs_fill_rdma_wr(&wr, IBV_WR_RDMA_WRITE, &sge, length, remote_addr, rkey); UCT_TL_EP_STAT_OP(&ep->super.super, PUT, BCOPY, length); uct_rc_verbs_ep_post_send_desc(ep, &wr, desc, IBV_SEND_SIGNALED); return UCS_OK; }
ssize_t uct_rc_verbs_ep_am_bcopy(uct_ep_h tl_ep, uint8_t id, uct_pack_callback_t pack_cb, void *arg) { uct_rc_verbs_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_rc_verbs_iface_t); uct_rc_verbs_ep_t *ep = ucs_derived_of(tl_ep, uct_rc_verbs_ep_t); uct_rc_iface_send_desc_t *desc; struct ibv_send_wr wr; struct ibv_sge sge; size_t length; size_t data_length; UCT_CHECK_AM_ID(id); UCT_RC_CHECK_RES(&iface->super, &ep->super); UCT_RC_CHECK_FC(&iface->super, &ep->super, id); UCT_RC_VERBS_GET_TX_AM_BCOPY_DESC(iface, &iface->super.tx.mp, desc, id, pack_cb, arg, length, data_length); UCT_RC_VERBS_FILL_AM_BCOPY_WR(wr, sge, length, wr.opcode); UCT_TL_EP_STAT_OP(&ep->super.super, AM, BCOPY, data_length); uct_rc_verbs_ep_post_send_desc(ep, &wr, desc, 0); UCT_RC_UPDATE_FC(&iface->super, &ep->super, id); return data_length; }