ucs_status_t uct_rc_verbs_ep_fc_ctrl(uct_ep_t *tl_ep, unsigned op, uct_rc_fc_request_t *req) { struct ibv_send_wr fc_wr; uct_rc_verbs_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_rc_verbs_iface_t); uct_rc_verbs_ep_t *ep = ucs_derived_of(tl_ep, uct_rc_verbs_ep_t); size_t notag_hdr_size = iface->verbs_common.config.notag_hdr_size; uct_rc_hdr_t *hdr = iface->verbs_common.am_inl_hdr + notag_hdr_size; /* In RC only PURE grant is sent as a separate message. Other FC * messages are bundled with AM. */ ucs_assert(op == UCT_RC_EP_FC_PURE_GRANT); /* Do not check FC WND here to avoid head-to-head deadlock. * Credits grant should be sent regardless of FC wnd state. */ ucs_assert(sizeof(*hdr) + notag_hdr_size <= iface->verbs_common.config.max_inline); UCT_RC_CHECK_RES(&iface->super, &ep->super); hdr->am_id = UCT_RC_EP_FC_PURE_GRANT; fc_wr.sg_list = iface->verbs_common.inl_sge; fc_wr.opcode = IBV_WR_SEND; fc_wr.next = NULL; fc_wr.num_sge = 1; iface->verbs_common.inl_sge[0].addr = (uintptr_t)iface->verbs_common.am_inl_hdr; iface->verbs_common.inl_sge[0].length = sizeof(*hdr) + notag_hdr_size; uct_rc_verbs_ep_post_send(iface, ep, &fc_wr, IBV_SEND_INLINE); return UCS_OK; }
ssize_t uct_rc_mlx5_ep_am_bcopy(uct_ep_h tl_ep, uint8_t id, uct_pack_callback_t pack_cb, void *arg) { uct_rc_mlx5_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_rc_mlx5_iface_t); uct_rc_mlx5_ep_t *ep = ucs_derived_of(tl_ep, uct_rc_mlx5_ep_t); uct_rc_iface_send_desc_t *desc; uct_rc_hdr_t *rch; size_t length; UCT_CHECK_AM_ID(id); UCT_RC_CHECK_RES(&iface->super, &ep->super); UCT_RC_CHECK_FC_WND(&iface->super, &ep->super, id); UCT_RC_IFACE_GET_TX_DESC(&iface->super, &iface->super.tx.mp, desc); desc->super.handler = (uct_rc_send_handler_t)ucs_mpool_put; rch = (void*)(desc + 1); rch->am_id = id; length = pack_cb(rch + 1, arg); uct_rc_mlx5_ep_bcopy_post(ep, MLX5_OPCODE_SEND|UCT_RC_MLX5_OPCODE_FLAG_RAW, sizeof(*rch) + length, 0, NULL, 0, 0, 0, 0, desc); UCT_TL_EP_STAT_OP(&ep->super.super, AM, BCOPY, length); UCT_RC_UPDATE_FC_WND(&ep->super); return length; }
/* For RNDV request send regular eager packet with IBV_SEND_WITH_IMM and * imm_value = 0. Receiver will handle such message as rndv request. */ ucs_status_t uct_rc_verbs_ep_tag_rndv_request(uct_ep_h tl_ep, uct_tag_t tag, const void* header, unsigned header_length) { uct_rc_verbs_ep_t *ep = ucs_derived_of(tl_ep, uct_rc_verbs_ep_t); uct_rc_verbs_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_rc_verbs_iface_t); void *tm_hdr = ucs_alloca(iface->tm.eager_hdr_size); uint32_t app_ctx; struct ibv_send_wr wr; UCT_CHECK_LENGTH(header_length + iface->tm.eager_hdr_size, 0, iface->verbs_common.config.max_inline, "tag_short"); UCT_RC_CHECK_RES(&iface->super, &ep->super); wr.sg_list = iface->verbs_common.inl_sge; wr.num_sge = 2; wr.opcode = IBV_WR_SEND_WITH_IMM; wr.next = NULL; uct_rc_verbs_tag_imm_data_pack(&(wr.imm_data), &app_ctx, 0ul); uct_rc_verbs_iface_fill_inl_tag_sge(iface, tm_hdr, tag, header, header_length, app_ctx); uct_rc_verbs_ep_post_send(iface, ep, &wr, IBV_SEND_INLINE); return UCS_OK; }
/* * Helper function for zero-copy post. * Adds user completion to the callback queue. */ static UCS_F_ALWAYS_INLINE ucs_status_t uct_rc_mlx5_ep_zcopy_post(uct_rc_mlx5_ep_t *ep, unsigned opcode, const uct_iov_t *iov, size_t iovcnt, /* SEND */ uint8_t am_id, const void *am_hdr, unsigned am_hdr_len, /* RDMA */ uint64_t rdma_raddr, uct_rkey_t rdma_rkey, int force_sig, uct_completion_t *comp) { uct_rc_iface_t *iface = ucs_derived_of(ep->super.super.super.iface, uct_rc_iface_t); uint16_t sn; UCT_RC_CHECK_RES(iface, &ep->super); sn = ep->tx.wq.sw_pi; uct_rc_mlx5_txqp_dptr_post_iov(iface, IBV_QPT_RC, &ep->super.txqp, &ep->tx.wq, opcode, iov, iovcnt, am_id, am_hdr, am_hdr_len, rdma_raddr, uct_ib_md_direct_rkey(rdma_rkey), NULL, 0, (comp == NULL) ? force_sig : MLX5_WQE_CTRL_CQ_UPDATE); uct_rc_txqp_add_send_comp(iface, &ep->super.txqp, comp, sn); return UCS_INPROGRESS; }
ssize_t uct_rc_verbs_ep_am_bcopy(uct_ep_h tl_ep, uint8_t id, uct_pack_callback_t pack_cb, void *arg, unsigned flags) { uct_rc_verbs_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_rc_verbs_iface_t); uct_rc_verbs_ep_t *ep = ucs_derived_of(tl_ep, uct_rc_verbs_ep_t); uct_rc_iface_send_desc_t *desc; struct ibv_send_wr wr; struct ibv_sge sge; size_t length; UCT_CHECK_AM_ID(id); UCT_RC_CHECK_RES(&iface->super, &ep->super); UCT_RC_CHECK_FC(&iface->super, &ep->super, id); UCT_RC_IFACE_GET_TX_AM_BCOPY_DESC(&iface->super, &iface->super.tx.mp, desc, id, uct_rc_am_hdr_fill, uct_rc_hdr_t, pack_cb, arg, &length); UCT_RC_VERBS_FILL_AM_BCOPY_WR(wr, sge, length + sizeof(uct_rc_hdr_t), wr.opcode); UCT_TL_EP_STAT_OP(&ep->super.super, AM, BCOPY, length); uct_rc_verbs_ep_post_send_desc(ep, &wr, desc, IBV_SEND_SOLICITED, INT_MAX); UCT_RC_UPDATE_FC(&iface->super, &ep->super, id); return length; }
ucs_status_t uct_rc_mlx5_ep_flush(uct_ep_h tl_ep, unsigned flags, uct_completion_t *comp) { uct_rc_mlx5_ep_t *ep = ucs_derived_of(tl_ep, uct_rc_mlx5_ep_t); uct_rc_mlx5_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_rc_mlx5_iface_t); uint16_t sn; if (!uct_rc_iface_has_tx_resources(&iface->super)) { return UCS_ERR_NO_RESOURCE; } if (uct_rc_txqp_available(&ep->super.txqp) == ep->tx.wq.bb_max) { UCT_TL_EP_STAT_FLUSH(&ep->super.super); return UCS_OK; } if (uct_rc_txqp_unsignaled(&ep->super.txqp) != 0) { sn = ep->tx.wq.sw_pi; UCT_RC_CHECK_RES(&iface->super, &ep->super); uct_rc_mlx5_txqp_inline_post(&iface->super, IBV_QPT_RC, &ep->super.txqp, &ep->tx.wq, MLX5_OPCODE_NOP, NULL, 0, 0, 0, 0, 0, NULL, 0); } else if (!uct_rc_ep_has_tx_resources(&ep->super)) { return UCS_ERR_NO_RESOURCE; } else { sn = ep->tx.wq.sig_pi; } uct_rc_txqp_add_send_comp(&iface->super, &ep->super.txqp, comp, sn); UCT_TL_EP_STAT_FLUSH_WAIT(&ep->super.super); return UCS_INPROGRESS; }
ucs_status_t uct_rc_verbs_ep_am_zcopy(uct_ep_h tl_ep, uint8_t id, const void *header, unsigned header_length, const uct_iov_t *iov, size_t iovcnt, uct_completion_t *comp) { uct_rc_verbs_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_rc_verbs_iface_t); uct_rc_verbs_ep_t *ep = ucs_derived_of(tl_ep, uct_rc_verbs_ep_t); uct_rc_iface_send_desc_t *desc = NULL; struct ibv_sge sge[UCT_IB_MAX_IOV]; /* First sge is reserved for the header */ struct ibv_send_wr wr; int send_flags; size_t sge_cnt; UCT_CHECK_IOV_SIZE(iovcnt, uct_ib_iface_get_max_iov(&iface->super.super) - 1, "uct_rc_verbs_ep_am_zcopy"); UCT_RC_CHECK_AM_ZCOPY(id, header_length, uct_iov_total_length(iov, iovcnt), iface->verbs_common.config.short_desc_size, iface->super.super.config.seg_size); UCT_RC_CHECK_RES(&iface->super, &ep->super); UCT_RC_CHECK_FC_WND(&iface->super, &ep->super, id); UCT_RC_IFACE_GET_TX_AM_ZCOPY_DESC(&iface->super, &iface->verbs_common.short_desc_mp, desc, id, header, header_length, comp, &send_flags); sge[0].length = sizeof(uct_rc_hdr_t) + header_length; sge_cnt = uct_ib_verbs_sge_fill_iov(sge + 1, iov, iovcnt); UCT_RC_VERBS_FILL_AM_ZCOPY_WR_IOV(wr, sge, (sge_cnt + 1), wr.opcode); UCT_TL_EP_STAT_OP(&ep->super.super, AM, ZCOPY, (header_length + uct_iov_total_length(iov, iovcnt))); uct_rc_verbs_ep_post_send_desc(ep, &wr, desc, send_flags); UCT_RC_UPDATE_FC_WND(&iface->super, &ep->super, id); return UCS_INPROGRESS; }
ucs_status_t uct_rc_verbs_ep_fc_ctrl(uct_rc_ep_t *rc_ep) { uct_rc_verbs_iface_t *iface = ucs_derived_of(rc_ep->super.super.iface, uct_rc_verbs_iface_t); uct_rc_verbs_ep_t *ep = ucs_derived_of(rc_ep, uct_rc_verbs_ep_t); uct_rc_hdr_t hdr; struct ibv_send_wr fc_wr; /* Do not check FC WND here to avoid head-to-head deadlock. * Credits grant should be sent regardless of FC wnd state. */ ucs_assert(sizeof(hdr) <= iface->verbs_common.config.max_inline); UCT_RC_CHECK_RES(&iface->super, &ep->super); hdr.am_id = UCT_RC_EP_FC_PURE_GRANT; fc_wr.sg_list = iface->verbs_common.inl_sge; fc_wr.num_sge = 1; fc_wr.opcode = IBV_WR_SEND; fc_wr.next = NULL; iface->verbs_common.inl_sge[0].addr = (uintptr_t)&hdr; iface->verbs_common.inl_sge[0].length = sizeof(hdr); uct_rc_verbs_ep_post_send(iface, ep, &fc_wr, IBV_SEND_INLINE); return UCS_OK; }
ucs_status_t uct_rc_verbs_ep_tag_eager_zcopy(uct_ep_h tl_ep, uct_tag_t tag, uint64_t imm, const uct_iov_t *iov, size_t iovcnt, uct_completion_t *comp) { uct_rc_verbs_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_rc_verbs_iface_t); uct_rc_verbs_ep_t *ep = ucs_derived_of(tl_ep, uct_rc_verbs_ep_t); uct_rc_iface_send_desc_t *desc; struct ibv_sge sge[UCT_IB_MAX_IOV]; struct ibv_send_wr wr; int send_flags; size_t sge_cnt; uint32_t app_ctx; UCT_CHECK_IOV_SIZE(iovcnt, 1ul, "uct_rc_verbs_ep_tag_eager_zcopy"); UCT_RC_CHECK_ZCOPY_DATA(iface->tm.eager_hdr_size, uct_iov_total_length(iov, iovcnt), iface->super.super.config.seg_size); UCT_RC_CHECK_RES(&iface->super, &ep->super); sge_cnt = uct_ib_verbs_sge_fill_iov(sge + 1, iov, iovcnt); UCT_RC_VERBS_FILL_TM_IMM(wr, imm, app_ctx); UCT_RC_VERBS_GET_TM_ZCOPY_DESC(iface, &iface->verbs_common.short_desc_mp, desc, tag, app_ctx, comp, &send_flags, sge[0]); wr.num_sge = sge_cnt + 1; wr.sg_list = sge; uct_rc_verbs_ep_post_send_desc(ep, &wr, desc, send_flags); return UCS_INPROGRESS; }
ucs_status_t uct_rc_mlx5_ep_get_bcopy(uct_ep_h tl_ep, uct_unpack_callback_t unpack_cb, void *arg, size_t length, uint64_t remote_addr, uct_rkey_t rkey, uct_completion_t *comp) { uct_rc_mlx5_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_rc_mlx5_iface_t); uct_rc_mlx5_ep_t *ep = ucs_derived_of(tl_ep, uct_rc_mlx5_ep_t); uct_rc_iface_send_desc_t *desc; UCT_CHECK_LENGTH(length, iface->super.super.config.seg_size, "get_bcopy"); UCT_RC_CHECK_RES(&iface->super, &ep->super); UCT_RC_IFACE_GET_TX_DESC(&iface->super, &iface->super.tx.mp, desc); desc->super.handler = (comp == NULL) ? uct_rc_ep_get_bcopy_handler_no_completion : uct_rc_ep_get_bcopy_handler; desc->super.unpack_arg = arg; desc->super.user_comp = comp; desc->super.length = length; desc->unpack_cb = unpack_cb; uct_rc_mlx5_ep_bcopy_post(ep, MLX5_OPCODE_RDMA_READ, length, 0, NULL, 0, remote_addr, rkey, MLX5_WQE_CTRL_CQ_UPDATE, desc); UCT_TL_EP_STAT_OP(&ep->super.super, GET, BCOPY, length); return UCS_INPROGRESS; }
static UCS_F_ALWAYS_INLINE ucs_status_t uct_rc_mlx5_ep_atomic_add(uct_ep_h tl_ep, int opcode, unsigned length, uint64_t add, uint64_t remote_addr, uct_rkey_t rkey) { uct_rc_mlx5_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_rc_mlx5_iface_t); uct_rc_mlx5_ep_t *ep = ucs_derived_of(tl_ep, uct_rc_mlx5_ep_t); uct_rc_iface_send_desc_t *desc; UCT_RC_CHECK_RES(&iface->super, &ep->super); UCT_RC_IFACE_GET_TX_ATOMIC_ADD_DESC(&iface->super, &iface->mlx5_common.tx.atomic_desc_mp, desc); uct_rc_mlx5_ep_atomic_post(ep, opcode, desc, length, remote_addr, rkey, 0, 0, add, 0); return UCS_OK; }
ucs_status_t uct_rc_verbs_ep_put_short(uct_ep_h tl_ep, const void *buffer, unsigned length, uint64_t remote_addr, uct_rkey_t rkey) { uct_rc_verbs_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_rc_verbs_iface_t); uct_rc_verbs_ep_t *ep = ucs_derived_of(tl_ep, uct_rc_verbs_ep_t); UCT_CHECK_LENGTH(length, iface->verbs_common.config.max_inline, "put_short"); UCT_RC_CHECK_RES(&iface->super, &ep->super); UCT_RC_VERBS_FILL_INL_PUT_WR(iface, remote_addr, rkey, buffer, length); UCT_TL_EP_STAT_OP(&ep->super.super, PUT, SHORT, length); uct_rc_verbs_ep_post_send(iface, ep, &iface->inl_rwrite_wr, IBV_SEND_INLINE | IBV_SEND_SIGNALED); return UCS_OK; }
static UCS_F_ALWAYS_INLINE ucs_status_t uct_rc_verbs_ep_atomic(uct_rc_verbs_ep_t *ep, int opcode, void *result, uint64_t compare_add, uint64_t swap, uint64_t remote_addr, uct_rkey_t rkey, uct_completion_t *comp) { uct_rc_verbs_iface_t *iface = ucs_derived_of(ep->super.super.super.iface, uct_rc_verbs_iface_t); uct_rc_iface_send_desc_t *desc; UCT_RC_CHECK_RES(&iface->super, &ep->super); UCT_RC_IFACE_GET_TX_ATOMIC_DESC(&iface->super, &iface->verbs_common.short_desc_mp, desc, iface->super.config.atomic64_handler, result, comp); uct_rc_verbs_ep_atomic_post(ep, opcode, compare_add, swap, remote_addr, rkey, desc, IBV_SEND_SIGNALED); return UCS_INPROGRESS; }
ucs_status_t uct_rc_verbs_ep_atomic_add64(uct_ep_h tl_ep, uint64_t add, uint64_t remote_addr, uct_rkey_t rkey) { uct_rc_verbs_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_rc_verbs_iface_t); uct_rc_verbs_ep_t *ep = ucs_derived_of(tl_ep, uct_rc_verbs_ep_t); uct_rc_iface_send_desc_t *desc; /* TODO don't allocate descriptor - have dummy buffer */ UCT_RC_CHECK_RES(&iface->super, &ep->super); UCT_RC_IFACE_GET_TX_ATOMIC_ADD_DESC(&iface->super, &iface->verbs_common.short_desc_mp, desc); uct_rc_verbs_ep_atomic_post(ep, IBV_WR_ATOMIC_FETCH_AND_ADD, add, 0, remote_addr, rkey, desc, IBV_SEND_SIGNALED); return UCS_OK; }
ucs_status_t uct_rc_mlx5_ep_fc_ctrl(uct_rc_ep_t *rc_ep) { uct_rc_mlx5_ep_t *ep = ucs_derived_of(rc_ep, uct_rc_mlx5_ep_t); uct_rc_iface_t *iface = ucs_derived_of(rc_ep->super.super.iface, uct_rc_iface_t); UCT_RC_CHECK_RES(iface, &ep->super); uct_rc_mlx5_txqp_inline_post(iface, IBV_QPT_RC, &ep->super.txqp, &ep->tx.wq, MLX5_OPCODE_SEND|UCT_RC_MLX5_OPCODE_FLAG_RAW, NULL, 0, UCT_RC_EP_FC_PURE_GRANT, 0 , 0, 0, NULL, 0); UCT_TL_EP_STAT_OP(&ep->super.super, AM, SHORT, 0); return UCS_OK; }
ucs_status_t uct_rc_verbs_ep_am_short(uct_ep_h tl_ep, uint8_t id, uint64_t hdr, const void *buffer, unsigned length) { uct_rc_verbs_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_rc_verbs_iface_t); uct_rc_verbs_ep_t *ep = ucs_derived_of(tl_ep, uct_rc_verbs_ep_t); UCT_RC_CHECK_AM_SHORT(id, length, iface->config.max_inline); UCT_RC_CHECK_RES(&iface->super, &ep->super); UCT_RC_CHECK_FC(&iface->super, &ep->super, id); uct_rc_verbs_iface_fill_inl_am_sge(iface, id, hdr, buffer, length); UCT_TL_EP_STAT_OP(&ep->super.super, AM, SHORT, sizeof(hdr) + length); uct_rc_verbs_ep_post_send(iface, ep, &iface->inl_am_wr, IBV_SEND_INLINE | IBV_SEND_SOLICITED, INT_MAX); UCT_RC_UPDATE_FC(&iface->super, &ep->super, id); return UCS_OK; }
ucs_status_t uct_rc_verbs_ep_tag_eager_short(uct_ep_h tl_ep, uct_tag_t tag, const void *data, size_t length) { uct_rc_verbs_ep_t *ep = ucs_derived_of(tl_ep, uct_rc_verbs_ep_t); uct_rc_verbs_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_rc_verbs_iface_t); void *tm_hdr = ucs_alloca(iface->tm.eager_hdr_size); UCT_CHECK_LENGTH(length + iface->tm.eager_hdr_size, 0, iface->verbs_common.config.max_inline, "tag_short"); UCT_RC_CHECK_RES(&iface->super, &ep->super); uct_rc_verbs_iface_fill_inl_tag_sge(iface, tm_hdr, tag, data, length, 0); uct_rc_verbs_ep_post_send(iface, ep, &iface->inl_am_wr, IBV_SEND_INLINE); return UCS_OK; }
ucs_status_t uct_rc_mlx5_ep_put_short(uct_ep_h tl_ep, const void *buffer, unsigned length, uint64_t remote_addr, uct_rkey_t rkey) { uct_rc_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_rc_iface_t); uct_rc_mlx5_ep_t *ep = ucs_derived_of(tl_ep, uct_rc_mlx5_ep_t); UCT_RC_MLX5_CHECK_PUT_SHORT(length, 0); UCT_RC_CHECK_RES(iface, &ep->super); uct_rc_mlx5_txqp_inline_post(iface, IBV_QPT_RC, &ep->super.txqp, &ep->tx.wq, MLX5_OPCODE_RDMA_WRITE, buffer, length, 0, 0, remote_addr, uct_ib_md_direct_rkey(rkey), NULL, 0); UCT_TL_EP_STAT_OP(&ep->super.super, PUT, SHORT, length); return UCS_OK; }
ssize_t uct_rc_mlx5_ep_put_bcopy(uct_ep_h tl_ep, uct_pack_callback_t pack_cb, void *arg, uint64_t remote_addr, uct_rkey_t rkey) { uct_rc_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_rc_iface_t); uct_rc_mlx5_ep_t *ep = ucs_derived_of(tl_ep, uct_rc_mlx5_ep_t); uct_rc_iface_send_desc_t *desc; size_t length; UCT_RC_CHECK_RES(iface, &ep->super); UCT_RC_IFACE_GET_TX_PUT_BCOPY_DESC(iface, &iface->tx.mp, desc, pack_cb, arg, length); uct_rc_mlx5_txqp_bcopy_post(iface, &ep->super.txqp, &ep->tx.wq, MLX5_OPCODE_RDMA_WRITE, length, 0, NULL, 0, remote_addr, rkey, MLX5_WQE_CTRL_CQ_UPDATE, desc); UCT_TL_EP_STAT_OP(&ep->super.super, PUT, BCOPY, length); return length; }
ssize_t uct_rc_verbs_ep_put_bcopy(uct_ep_h tl_ep, uct_pack_callback_t pack_cb, void *arg, uint64_t remote_addr, uct_rkey_t rkey) { uct_rc_verbs_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_rc_verbs_iface_t); uct_rc_verbs_ep_t *ep = ucs_derived_of(tl_ep, uct_rc_verbs_ep_t); uct_rc_iface_send_desc_t *desc; struct ibv_send_wr wr; struct ibv_sge sge; size_t length; UCT_RC_CHECK_RES(&iface->super, &ep->super); UCT_RC_IFACE_GET_TX_PUT_BCOPY_DESC(&iface->super, &iface->super.tx.mp, desc, pack_cb, arg, length); UCT_RC_VERBS_FILL_RDMA_WR(wr, wr.opcode, IBV_WR_RDMA_WRITE, sge, length, remote_addr, rkey); UCT_TL_EP_STAT_OP(&ep->super.super, PUT, BCOPY, length); uct_rc_verbs_ep_post_send_desc(ep, &wr, desc, IBV_SEND_SIGNALED); return length; }
static ucs_status_t uct_rc_verbs_ep_nop(uct_rc_verbs_ep_t *ep) { #if HAVE_DECL_IBV_EXP_WR_NOP uct_rc_verbs_iface_t *iface = ucs_derived_of(ep->super.super.super.iface, uct_rc_verbs_iface_t); struct ibv_exp_send_wr wr; wr.next = NULL; wr.num_sge = 0; wr.exp_opcode = IBV_EXP_WR_NOP; wr.exp_send_flags = IBV_EXP_SEND_FENCE; wr.comp_mask = 0; UCT_RC_CHECK_RES(&iface->super, &ep->super); uct_rc_verbs_exp_post_send(ep, &wr, IBV_EXP_SEND_SIGNALED); return UCS_OK; #else return UCS_ERR_UNSUPPORTED; #endif }
static UCS_F_ALWAYS_INLINE ucs_status_t uct_rc_mlx5_ep_atomic(uct_rc_mlx5_ep_t *ep, int opcode, void *result, int ext, unsigned length, uint64_t remote_addr, uct_rkey_t rkey, uint64_t compare_mask, uint64_t compare, uint64_t swap_add, uct_completion_t *comp) { uct_rc_mlx5_iface_t *iface = ucs_derived_of(ep->super.super.super.iface, uct_rc_mlx5_iface_t); uct_rc_iface_send_desc_t *desc; UCT_RC_CHECK_RES(&iface->super, &ep->super); UCT_RC_IFACE_GET_TX_ATOMIC_DESC(&iface->super, &iface->mlx5_common.tx.atomic_desc_mp, desc, uct_rc_iface_atomic_handler(&iface->super, ext, length), result, comp); uct_rc_mlx5_ep_atomic_post(ep, opcode, desc, length, remote_addr, rkey, compare_mask, compare, swap_add, MLX5_WQE_CTRL_CQ_UPDATE); return UCS_INPROGRESS; }
ssize_t uct_rc_mlx5_ep_put_bcopy(uct_ep_h tl_ep, uct_pack_callback_t pack_cb, void *arg, uint64_t remote_addr, uct_rkey_t rkey) { uct_rc_mlx5_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_rc_mlx5_iface_t); uct_rc_mlx5_ep_t *ep = ucs_derived_of(tl_ep, uct_rc_mlx5_ep_t); uct_rc_iface_send_desc_t *desc; size_t length; UCT_RC_CHECK_RES(&iface->super, &ep->super); UCT_RC_IFACE_GET_TX_DESC(&iface->super, &iface->super.tx.mp, desc); desc->super.handler = (uct_rc_send_handler_t)ucs_mpool_put; length = pack_cb(desc + 1, arg); uct_rc_mlx5_ep_bcopy_post(ep, MLX5_OPCODE_RDMA_WRITE, length, 0, NULL, 0, remote_addr, rkey, MLX5_WQE_CTRL_CQ_UPDATE, desc); UCT_TL_EP_STAT_OP(&ep->super.super, PUT, BCOPY, length); return length; }
ucs_status_t uct_rc_verbs_ep_fc_ctrl(uct_ep_t *tl_ep, unsigned op, uct_rc_fc_request_t *req) { struct ibv_send_wr fc_wr; uct_rc_verbs_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_rc_verbs_iface_t); uct_rc_verbs_ep_t *ep = ucs_derived_of(tl_ep, uct_rc_verbs_ep_t); uct_rc_hdr_t *hdr; struct ibv_sge sge; int flags; if (!iface->fc_desc) { hdr = &iface->am_inl_hdr.rc_hdr; hdr->am_id = UCT_RC_EP_FC_PURE_GRANT; fc_wr.sg_list = iface->inl_sge; iface->inl_sge[0].addr = (uintptr_t)hdr; iface->inl_sge[0].length = sizeof(*hdr); flags = IBV_SEND_INLINE; } else { hdr = (uct_rc_hdr_t*)(iface->fc_desc + 1); sge.addr = (uintptr_t)hdr; sge.length = sizeof(*hdr); sge.lkey = iface->fc_desc->lkey; fc_wr.sg_list = &sge; flags = 0; } /* In RC only PURE grant is sent as a separate message. Other FC * messages are bundled with AM. */ ucs_assert(op == UCT_RC_EP_FC_PURE_GRANT); /* Do not check FC WND here to avoid head-to-head deadlock. * Credits grant should be sent regardless of FC wnd state. */ UCT_RC_CHECK_RES(&iface->super, &ep->super); fc_wr.opcode = IBV_WR_SEND; fc_wr.next = NULL; fc_wr.num_sge = 1; uct_rc_verbs_ep_post_send(iface, ep, &fc_wr, flags, INT_MAX); return UCS_OK; }
ucs_status_t uct_rc_verbs_ep_atomic_add32(uct_ep_h tl_ep, uint32_t add, uint64_t remote_addr, uct_rkey_t rkey) { #if HAVE_IB_EXT_ATOMICS uct_rc_verbs_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_rc_verbs_iface_t); uct_rc_verbs_ep_t *ep = ucs_derived_of(tl_ep, uct_rc_verbs_ep_t); uct_rc_iface_send_desc_t *desc; UCT_RC_CHECK_RES(&iface->super, &ep->super); UCT_RC_IFACE_GET_TX_ATOMIC_ADD_DESC(&iface->super, &iface->verbs_common.short_desc_mp, desc); /* TODO don't allocate descriptor - have dummy buffer */ uct_rc_verbs_ep_ext_atomic_post(ep, IBV_EXP_WR_EXT_MASKED_ATOMIC_FETCH_AND_ADD, sizeof(uint32_t), 0, add, 0, remote_addr, rkey, desc, IBV_EXP_SEND_SIGNALED); return UCS_OK; #else return UCS_ERR_UNSUPPORTED; #endif }
static inline ucs_status_t uct_rc_verbs_ep_ext_atomic(uct_rc_verbs_ep_t *ep, int opcode, void *result, uint32_t length, uint64_t compare_mask, uint64_t compare_add, uint64_t swap, uint64_t remote_addr, uct_rkey_t rkey, uct_completion_t *comp) { uct_rc_verbs_iface_t *iface = ucs_derived_of(ep->super.super.super.iface, uct_rc_verbs_iface_t); uct_rc_send_handler_t handler = uct_rc_iface_atomic_handler(&iface->super, 1, length); uct_rc_iface_send_desc_t *desc; UCT_RC_CHECK_RES(&iface->super, &ep->super); UCT_RC_IFACE_GET_TX_ATOMIC_DESC(&iface->super, &iface->verbs_common.short_desc_mp, desc, handler, result, comp); uct_rc_verbs_ep_ext_atomic_post(ep, opcode, length, compare_mask, compare_add, swap, remote_addr, rkey, desc, IBV_EXP_SEND_SIGNALED); return UCS_INPROGRESS; }
static inline ucs_status_t uct_rc_verbs_ep_rdma_zcopy(uct_rc_verbs_ep_t *ep, const uct_iov_t *iov, size_t iovcnt, uint64_t remote_addr, uct_rkey_t rkey, uct_completion_t *comp, int opcode) { uct_rc_verbs_iface_t *iface = ucs_derived_of(ep->super.super.super.iface, uct_rc_verbs_iface_t); struct ibv_sge sge[UCT_IB_MAX_IOV]; struct ibv_send_wr wr; size_t sge_cnt; UCT_RC_CHECK_RES(&iface->super, &ep->super); sge_cnt = uct_ib_verbs_sge_fill_iov(sge, iov, iovcnt); UCT_SKIP_ZERO_LENGTH(sge_cnt); UCT_RC_VERBS_FILL_RDMA_WR_IOV(wr, wr.opcode, opcode, sge, sge_cnt, remote_addr, rkey); wr.next = NULL; uct_rc_verbs_ep_post_send(iface, ep, &wr, IBV_SEND_SIGNALED); uct_rc_txqp_add_send_comp(&iface->super, &ep->super.txqp, comp, ep->txcnt.pi); return UCS_INPROGRESS; }
ucs_status_t uct_rc_verbs_ep_atomic64_post(uct_ep_h tl_ep, unsigned opcode, uint64_t value, uint64_t remote_addr, uct_rkey_t rkey) { uct_rc_verbs_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_rc_verbs_iface_t); uct_rc_verbs_ep_t *ep = ucs_derived_of(tl_ep, uct_rc_verbs_ep_t); uct_rc_iface_send_desc_t *desc; if (opcode != UCT_ATOMIC_OP_ADD) { return UCS_ERR_UNSUPPORTED; } /* TODO don't allocate descriptor - have dummy buffer */ UCT_RC_CHECK_RES(&iface->super, &ep->super); UCT_RC_IFACE_GET_TX_ATOMIC_DESC(&iface->super, &iface->short_desc_mp, desc); uct_rc_verbs_ep_atomic_post(ep, IBV_WR_ATOMIC_FETCH_AND_ADD, value, 0, remote_addr, rkey, desc, IBV_SEND_SIGNALED); return UCS_OK; }
ucs_status_t uct_rc_mlx5_ep_get_bcopy(uct_ep_h tl_ep, uct_unpack_callback_t unpack_cb, void *arg, size_t length, uint64_t remote_addr, uct_rkey_t rkey, uct_completion_t *comp) { uct_rc_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_rc_iface_t); uct_rc_mlx5_ep_t *ep = ucs_derived_of(tl_ep, uct_rc_mlx5_ep_t); uct_rc_iface_send_desc_t *desc; UCT_CHECK_LENGTH(length, iface->super.config.seg_size, "get_bcopy"); UCT_RC_CHECK_RES(iface, &ep->super); UCT_RC_IFACE_GET_TX_GET_BCOPY_DESC(iface, &iface->tx.mp, desc, unpack_cb, comp, arg, length); uct_rc_mlx5_txqp_bcopy_post(iface, &ep->super.txqp, &ep->tx.wq, MLX5_OPCODE_RDMA_READ, length, 0, NULL, 0, remote_addr, rkey, MLX5_WQE_CTRL_CQ_UPDATE, desc); UCT_TL_EP_STAT_OP(&ep->super.super, GET, BCOPY, length); return UCS_INPROGRESS; }
static UCS_F_ALWAYS_INLINE ucs_status_t uct_rc_mlx5_ep_atomic(uct_rc_mlx5_ep_t *ep, int opcode, void *result, unsigned length, uct_rc_send_handler_t handler, uint64_t remote_addr, uct_rkey_t rkey, uint64_t compare_mask, uint64_t compare, uint64_t swap_add, uct_completion_t *comp) { uct_rc_mlx5_iface_t *iface = ucs_derived_of(ep->super.super.super.iface, uct_rc_mlx5_iface_t); uct_rc_iface_send_desc_t *desc; UCT_CHECK_PARAM(comp != NULL, "completion must be non-NULL"); UCT_RC_CHECK_RES(&iface->super, &ep->super); UCT_RC_IFACE_GET_TX_DESC(&iface->super, &iface->tx.atomic_desc_mp, desc); desc->super.handler = handler; desc->super.buffer = result; desc->super.user_comp = comp; uct_rc_mlx5_ep_atomic_post(ep, opcode, desc, length, remote_addr, rkey, compare_mask, compare, swap_add, MLX5_WQE_CTRL_CQ_UPDATE); return UCS_INPROGRESS; }