static ucs_status_t uct_ud_mlx5_ep_am_short(uct_ep_h tl_ep, uint8_t id, uint64_t hdr, const void *buffer, unsigned length) { uct_ud_mlx5_ep_t *ep = ucs_derived_of(tl_ep, uct_ud_mlx5_ep_t); uct_ud_mlx5_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_ud_mlx5_iface_t); struct mlx5_wqe_ctrl_seg *ctrl; struct mlx5_wqe_inl_data_seg *inl; uct_ud_am_short_hdr_t *am; uct_ud_neth_t *neth; unsigned wqe_size; uct_ud_send_skb_t *skb; /* data a written directly into tx wqe, so it is impossible to use * common ud am code */ UCT_CHECK_AM_ID(id); UCT_CHECK_LENGTH(sizeof(uct_ud_neth_t) + sizeof(hdr) + length, iface->super.config.max_inline, "am_short"); uct_ud_enter(&iface->super); uct_ud_iface_progress_pending_tx(&iface->super); skb = uct_ud_ep_get_tx_skb(&iface->super, &ep->super); if (!skb) { uct_ud_leave(&iface->super); return UCS_ERR_NO_RESOURCE; } ctrl = iface->tx.wq.curr; /* Set inline segment which has AM id, AM header, and AM payload */ inl = uct_ib_mlx5_get_next_seg(&iface->tx.wq, ctrl, UCT_UD_MLX5_WQE_SIZE); wqe_size = length + sizeof(*am) + sizeof(*neth); inl->byte_count = htonl(wqe_size | MLX5_INLINE_SEG); /* assume that neth and am header fit into one bb */ ucs_assert(sizeof(*am) + sizeof(*neth) < MLX5_SEND_WQE_BB); neth = (void*)(inl + 1); uct_ud_am_set_neth(neth, &ep->super, id); am = (void*)(neth + 1); am->hdr = hdr; uct_ib_mlx5_inline_copy(am + 1, buffer, length, &iface->tx.wq); wqe_size += UCT_UD_MLX5_WQE_SIZE + sizeof(*inl); UCT_CHECK_LENGTH(wqe_size, UCT_IB_MLX5_MAX_BB * MLX5_SEND_WQE_BB, "am_short"); UCT_UD_EP_HOOK_CALL_TX(&ep->super, neth); uct_ud_mlx5_post_send(iface, ep, ctrl, wqe_size); skb->len = sizeof(*neth) + sizeof(*am); memcpy(skb->neth, neth, skb->len); uct_ud_iface_complete_tx_inl(&iface->super, &ep->super, skb, (char *)skb->neth + skb->len, buffer, length); UCT_TL_EP_STAT_OP(&ep->super.super, AM, SHORT, sizeof(hdr) + length); uct_ud_leave(&iface->super); return UCS_OK; }
static ucs_status_t uct_ud_mlx5_ep_put_short(uct_ep_h tl_ep, const void *buffer, unsigned length, uint64_t remote_addr, uct_rkey_t rkey) { uct_ud_mlx5_ep_t *ep = ucs_derived_of(tl_ep, uct_ud_mlx5_ep_t); uct_ud_mlx5_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_ud_mlx5_iface_t); struct mlx5_wqe_ctrl_seg *ctrl; struct mlx5_wqe_inl_data_seg *inl; unsigned wqe_size; uct_ud_put_hdr_t *put_hdr; uct_ud_neth_t *neth; uct_ud_send_skb_t *skb; uct_ud_enter(&iface->super); uct_ud_iface_progress_pending_tx(&iface->super); skb = uct_ud_ep_get_tx_skb(&iface->super, &ep->super); if (!skb) { uct_ud_leave(&iface->super); return UCS_ERR_NO_RESOURCE; } ctrl = iface->tx.wq.curr; /* Set inline segment which has AM id, AM header, and AM payload */ inl = uct_ib_mlx5_get_next_seg(&iface->tx.wq, ctrl, UCT_UD_MLX5_WQE_SIZE); wqe_size = length + sizeof(*put_hdr) + sizeof(*neth); inl->byte_count = htonl(wqe_size | MLX5_INLINE_SEG); /* assume that neth and am header fit into one bb */ ucs_assert(sizeof(*put_hdr) + sizeof(*neth) < MLX5_SEND_WQE_BB); neth = (void*)(inl + 1); uct_ud_neth_init_data(&ep->super, neth); uct_ud_neth_set_type_put(&ep->super, neth); uct_ud_neth_ack_req(&ep->super, neth); put_hdr = (uct_ud_put_hdr_t *)(neth+1); put_hdr->rva = remote_addr; uct_ib_mlx5_inline_copy(put_hdr + 1, buffer, length, &iface->tx.wq); wqe_size += UCT_UD_MLX5_WQE_SIZE + sizeof(*inl); UCT_CHECK_LENGTH(wqe_size, UCT_IB_MLX5_MAX_BB * MLX5_SEND_WQE_BB, "put_short"); UCT_UD_EP_HOOK_CALL_TX(&ep->super, neth); uct_ud_mlx5_post_send(iface, ep, ctrl, wqe_size); skb->len = sizeof(*neth) + sizeof(*put_hdr); memcpy(skb->neth, neth, skb->len); uct_ud_iface_complete_tx_inl(&iface->super, &ep->super, skb, (char *)skb->neth + skb->len, buffer, length); UCT_TL_EP_STAT_OP(&ep->super.super, PUT, SHORT, length); uct_ud_leave(&iface->super); return UCS_OK; }
static ucs_status_t uct_ud_verbs_ep_put_short(uct_ep_h tl_ep, const void *buffer, unsigned length, uint64_t remote_addr, uct_rkey_t rkey) { uct_ud_verbs_ep_t *ep = ucs_derived_of(tl_ep, uct_ud_verbs_ep_t); uct_ud_verbs_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_ud_verbs_iface_t); uct_ud_send_skb_t *skb; uct_ud_put_hdr_t *put_hdr; uct_ud_neth_t *neth; /* TODO: UCT_CHECK_LENGTH(length <= iface->config.max_inline, "put_short"); */ uct_ud_enter(&iface->super); uct_ud_iface_progress_pending_tx(&iface->super); skb = uct_ud_ep_get_tx_skb(&iface->super, &ep->super); if (!skb) { uct_ud_leave(&iface->super); return UCS_ERR_NO_RESOURCE; } neth = skb->neth; uct_ud_neth_init_data(&ep->super, neth); uct_ud_neth_set_type_put(&ep->super, neth); uct_ud_neth_ack_req(&ep->super, neth); put_hdr = (uct_ud_put_hdr_t *)(neth+1); put_hdr->rva = remote_addr; iface->tx.sge[0].addr = (uintptr_t)neth; iface->tx.sge[0].length = sizeof(*neth) + sizeof(*put_hdr); uct_ud_verbs_ep_tx_inlv(iface, ep, buffer, length); ucs_trace_data("TX: PUT [%0llx] buf=%p len=%u", (unsigned long long)remote_addr, buffer, length); skb->len = iface->tx.sge[0].length; uct_ud_iface_complete_tx_inl(&iface->super, &ep->super, skb, put_hdr+1, buffer, length); uct_ud_leave(&iface->super); return UCS_OK; }