uct_ud_send_skb_t *uct_ud_ep_prepare_crep(uct_ud_ep_t *ep) { uct_ud_send_skb_t *skb; uct_ud_neth_t *neth; uct_ud_ctl_hdr_t *crep; uct_ud_iface_t *iface = ucs_derived_of(ep->super.super.iface, uct_ud_iface_t); ucs_assert_always(ep->dest_ep_id != UCT_UD_EP_NULL_ID); ucs_assert_always(ep->ep_id != UCT_UD_EP_NULL_ID); skb = uct_ud_iface_get_tx_skb(iface, ep); if (!skb) { return NULL; } neth = skb->neth; uct_ud_neth_init_data(ep, neth); neth->packet_type = ep->dest_ep_id; neth->packet_type |= (UCT_UD_PACKET_FLAG_ACK_REQ|UCT_UD_PACKET_FLAG_CTL); crep = (uct_ud_ctl_hdr_t *)(neth + 1); crep->type = UCT_UD_PACKET_CREP; crep->conn_rep.src_ep_id = ep->ep_id; skb->len = sizeof(*neth) + sizeof(*crep); UCT_UD_EP_HOOK_CALL_TX(ep, skb->neth); uct_ud_iface_complete_tx_skb_nolog(iface, ep, skb); /* uct_ud_ep_notify(ep); TODO: allow to send data on CREQ RX */ return skb; }
static void uct_ud_verbs_iface_progress_pending(uct_ud_verbs_iface_t *iface) { uct_ud_ep_t *ep; ucs_status_t status; uct_ud_neth_t neth; uct_ud_send_skb_t *skb; while (!ucs_queue_is_empty(&iface->super.tx.pending_ops)) { status = uct_ud_iface_get_next_pending(&iface->super, &ep, &neth, &skb); if (status == UCS_ERR_NO_RESOURCE) { return; } if (status == UCS_INPROGRESS) { continue; } if (ucs_unlikely(skb != NULL)) { /* TODO: not every skb is inline */ iface->tx.sge[0].addr = (uintptr_t) (skb->neth); iface->tx.sge[0].length = skb->len; uct_ud_verbs_iface_tx_ctl(iface, ucs_derived_of(ep, uct_ud_verbs_ep_t)); uct_ud_ep_log_tx_tag("PENDING_TX: (skb)", ep, skb->neth, skb->len); } else { iface->tx.sge[0].addr = (uintptr_t)&neth; iface->tx.sge[0].length = sizeof(neth); UCT_UD_EP_HOOK_CALL_TX(ep, &neth); uct_ud_verbs_iface_tx_ctl(iface, ucs_derived_of(ep, uct_ud_verbs_ep_t)); uct_ud_ep_log_tx_tag("PENDING_TX: (neth)", ep, &neth, sizeof(neth)); } } }
static ucs_status_t uct_ud_mlx5_ep_am_short(uct_ep_h tl_ep, uint8_t id, uint64_t hdr, const void *buffer, unsigned length) { uct_ud_mlx5_ep_t *ep = ucs_derived_of(tl_ep, uct_ud_mlx5_ep_t); uct_ud_mlx5_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_ud_mlx5_iface_t); struct mlx5_wqe_ctrl_seg *ctrl; struct mlx5_wqe_inl_data_seg *inl; uct_ud_am_short_hdr_t *am; uct_ud_neth_t *neth; unsigned wqe_size; uct_ud_send_skb_t *skb; /* data a written directly into tx wqe, so it is impossible to use * common ud am code */ UCT_CHECK_AM_ID(id); UCT_CHECK_LENGTH(sizeof(uct_ud_neth_t) + sizeof(hdr) + length, iface->super.config.max_inline, "am_short"); uct_ud_enter(&iface->super); uct_ud_iface_progress_pending_tx(&iface->super); skb = uct_ud_ep_get_tx_skb(&iface->super, &ep->super); if (!skb) { uct_ud_leave(&iface->super); return UCS_ERR_NO_RESOURCE; } ctrl = iface->tx.wq.curr; /* Set inline segment which has AM id, AM header, and AM payload */ inl = uct_ib_mlx5_get_next_seg(&iface->tx.wq, ctrl, UCT_UD_MLX5_WQE_SIZE); wqe_size = length + sizeof(*am) + sizeof(*neth); inl->byte_count = htonl(wqe_size | MLX5_INLINE_SEG); /* assume that neth and am header fit into one bb */ ucs_assert(sizeof(*am) + sizeof(*neth) < MLX5_SEND_WQE_BB); neth = (void*)(inl + 1); uct_ud_am_set_neth(neth, &ep->super, id); am = (void*)(neth + 1); am->hdr = hdr; uct_ib_mlx5_inline_copy(am + 1, buffer, length, &iface->tx.wq); wqe_size += UCT_UD_MLX5_WQE_SIZE + sizeof(*inl); UCT_CHECK_LENGTH(wqe_size, UCT_IB_MLX5_MAX_BB * MLX5_SEND_WQE_BB, "am_short"); UCT_UD_EP_HOOK_CALL_TX(&ep->super, neth); uct_ud_mlx5_post_send(iface, ep, ctrl, wqe_size); skb->len = sizeof(*neth) + sizeof(*am); memcpy(skb->neth, neth, skb->len); uct_ud_iface_complete_tx_inl(&iface->super, &ep->super, skb, (char *)skb->neth + skb->len, buffer, length); UCT_TL_EP_STAT_OP(&ep->super.super, AM, SHORT, sizeof(hdr) + length); uct_ud_leave(&iface->super); return UCS_OK; }
static ucs_status_t uct_ud_mlx5_ep_put_short(uct_ep_h tl_ep, const void *buffer, unsigned length, uint64_t remote_addr, uct_rkey_t rkey) { uct_ud_mlx5_ep_t *ep = ucs_derived_of(tl_ep, uct_ud_mlx5_ep_t); uct_ud_mlx5_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_ud_mlx5_iface_t); struct mlx5_wqe_ctrl_seg *ctrl; struct mlx5_wqe_inl_data_seg *inl; unsigned wqe_size; uct_ud_put_hdr_t *put_hdr; uct_ud_neth_t *neth; uct_ud_send_skb_t *skb; uct_ud_enter(&iface->super); uct_ud_iface_progress_pending_tx(&iface->super); skb = uct_ud_ep_get_tx_skb(&iface->super, &ep->super); if (!skb) { uct_ud_leave(&iface->super); return UCS_ERR_NO_RESOURCE; } ctrl = iface->tx.wq.curr; /* Set inline segment which has AM id, AM header, and AM payload */ inl = uct_ib_mlx5_get_next_seg(&iface->tx.wq, ctrl, UCT_UD_MLX5_WQE_SIZE); wqe_size = length + sizeof(*put_hdr) + sizeof(*neth); inl->byte_count = htonl(wqe_size | MLX5_INLINE_SEG); /* assume that neth and am header fit into one bb */ ucs_assert(sizeof(*put_hdr) + sizeof(*neth) < MLX5_SEND_WQE_BB); neth = (void*)(inl + 1); uct_ud_neth_init_data(&ep->super, neth); uct_ud_neth_set_type_put(&ep->super, neth); uct_ud_neth_ack_req(&ep->super, neth); put_hdr = (uct_ud_put_hdr_t *)(neth+1); put_hdr->rva = remote_addr; uct_ib_mlx5_inline_copy(put_hdr + 1, buffer, length, &iface->tx.wq); wqe_size += UCT_UD_MLX5_WQE_SIZE + sizeof(*inl); UCT_CHECK_LENGTH(wqe_size, UCT_IB_MLX5_MAX_BB * MLX5_SEND_WQE_BB, "put_short"); UCT_UD_EP_HOOK_CALL_TX(&ep->super, neth); uct_ud_mlx5_post_send(iface, ep, ctrl, wqe_size); skb->len = sizeof(*neth) + sizeof(*put_hdr); memcpy(skb->neth, neth, skb->len); uct_ud_iface_complete_tx_inl(&iface->super, &ep->super, skb, (char *)skb->neth + skb->len, buffer, length); UCT_TL_EP_STAT_OP(&ep->super.super, PUT, SHORT, length); uct_ud_leave(&iface->super); return UCS_OK; }
static inline void uct_ud_verbs_iface_tx_data(uct_ud_verbs_iface_t *iface, uct_ud_verbs_ep_t *ep) { int UCS_V_UNUSED ret; struct ibv_send_wr *bad_wr; uct_ud_verbs_iface_fill_tx_wr(iface, ep, &iface->tx.wr_bcp, 0); UCT_UD_EP_HOOK_CALL_TX(&ep->super, (uct_ud_neth_t *)iface->tx.sge[0].addr); ret = ibv_post_send(iface->super.qp, &iface->tx.wr_bcp, &bad_wr); ucs_assertv(ret == 0, "ibv_post_send() returned %d (%m)", ret); uct_ib_log_post_send(iface->super.qp, &iface->tx.wr_bcp, NULL); }
static inline void uct_ud_verbs_iface_tx_inl(uct_ud_verbs_iface_t *iface, uct_ud_verbs_ep_t *ep, const void *buffer, unsigned length) { int UCS_V_UNUSED ret; struct ibv_send_wr *bad_wr; iface->tx.sge[1].addr = (uintptr_t)buffer; iface->tx.sge[1].length = length; uct_ud_verbs_iface_fill_tx_wr(iface, ep, &iface->tx.wr_inl, IBV_SEND_INLINE); UCT_UD_EP_HOOK_CALL_TX(&ep->super, (uct_ud_neth_t *)iface->tx.sge[0].addr); ret = ibv_post_send(iface->super.qp, &iface->tx.wr_inl, &bad_wr); ucs_assertv(ret == 0, "ibv_post_send() returned %d (%m)", ret); uct_ib_log_post_send(iface->super.qp, &iface->tx.wr_inl, NULL); }
static UCS_F_ALWAYS_INLINE void uct_ud_mlx5_ep_tx_skb(uct_ud_mlx5_iface_t *iface, uct_ud_mlx5_ep_t *ep, uct_ud_send_skb_t *skb) { struct mlx5_wqe_ctrl_seg *ctrl; struct mlx5_wqe_data_seg *dptr; ctrl = iface->tx.wq.curr; dptr = uct_ib_mlx5_get_next_seg(&iface->tx.wq, ctrl, UCT_UD_MLX5_WQE_SIZE); uct_ib_mlx5_set_data_seg(dptr, skb->neth, skb->len, skb->lkey); UCT_UD_EP_HOOK_CALL_TX(&ep->super, skb->neth); uct_ud_mlx5_post_send(iface, ucs_derived_of(ep, uct_ud_mlx5_ep_t), ctrl, UCT_UD_MLX5_WQE_SIZE + sizeof(*dptr)); }
static inline void uct_ud_mlx5_ep_tx_inl(uct_ud_mlx5_iface_t *iface, uct_ud_mlx5_ep_t *ep, const void *buf, unsigned length) { struct mlx5_wqe_ctrl_seg *ctrl; struct mlx5_wqe_inl_data_seg *inl; ctrl = iface->tx.wq.curr; inl = uct_ib_mlx5_get_next_seg(&iface->tx.wq, ctrl, UCT_UD_MLX5_WQE_SIZE); inl->byte_count = htonl(length | MLX5_INLINE_SEG); uct_ib_mlx5_inline_copy(inl + 1, buf, length, &iface->tx.wq); UCT_UD_EP_HOOK_CALL_TX(&ep->super, (uct_ud_neth_t *)buf); uct_ud_mlx5_post_send(iface, ep, ctrl, UCT_UD_MLX5_WQE_SIZE + sizeof(*inl) + length); }
static inline void uct_ud_verbs_ep_tx_skb(uct_ud_verbs_iface_t *iface, uct_ud_verbs_ep_t *ep, uct_ud_send_skb_t *skb, unsigned flags) { int UCS_V_UNUSED ret; struct ibv_send_wr *bad_wr; iface->tx.sge[0].lkey = skb->lkey; iface->tx.sge[0].length = skb->len; iface->tx.sge[0].addr = (uintptr_t)skb->neth; uct_ud_verbs_iface_fill_tx_wr(iface, ep, &iface->tx.wr_skb, flags); UCT_UD_EP_HOOK_CALL_TX(&ep->super, (uct_ud_neth_t *)iface->tx.sge[0].addr); ret = ibv_post_send(iface->super.qp, &iface->tx.wr_skb, &bad_wr); ucs_assertv(ret == 0, "ibv_post_send() returned %d (%m)", ret); uct_ib_log_post_send(&iface->super.super, iface->super.qp, &iface->tx.wr_skb, NULL); --iface->super.tx.available; }