static UCS_F_ALWAYS_INLINE void uct_ud_mlx5_ep_tx_skb(uct_ud_mlx5_iface_t *iface, uct_ud_mlx5_ep_t *ep, uct_ud_send_skb_t *skb) { struct mlx5_wqe_ctrl_seg *ctrl; struct mlx5_wqe_data_seg *dptr; ctrl = iface->tx.wq.curr; dptr = uct_ib_mlx5_get_next_seg(&iface->tx.wq, ctrl, UCT_UD_MLX5_WQE_SIZE); uct_ib_mlx5_set_data_seg(dptr, skb->neth, skb->len, skb->lkey); UCT_UD_EP_HOOK_CALL_TX(&ep->super, skb->neth); uct_ud_mlx5_post_send(iface, ucs_derived_of(ep, uct_ud_mlx5_ep_t), ctrl, UCT_UD_MLX5_WQE_SIZE + sizeof(*dptr)); }
static unsigned uct_rc_mlx5_iface_post_recv(uct_rc_mlx5_iface_t *iface, unsigned max) { struct mlx5_wqe_srq_next_seg *seg; uct_rc_mlx5_recv_desc_t *desc; unsigned count, head; uct_rc_hdr_t *hdr; unsigned length; head = iface->rx.head; length = iface->super.super.config.seg_size; count = 0; while (count < max) { ucs_assert(head != iface->rx.tail); UCT_TL_IFACE_GET_RX_DESC(&iface->super.super.super, iface->super.rx.mp, desc, break); seg = uct_rc_mlx5_iface_get_srq_wqe(iface, head); hdr = uct_ib_iface_recv_desc_hdr(&iface->super.super, &desc->super); uct_ib_mlx5_set_data_seg((void*)(seg + 1), hdr, length, /* TODO pre-init length */ desc->super.lkey); VALGRIND_MAKE_MEM_NOACCESS(hdr, length); ucs_queue_push(&iface->rx.desc_q, &desc->queue); head = uct_rc_mlx5_srq_next_wqe_ind(seg); ++count; } if (count > 0) { iface->rx.head = head; iface->rx.sw_pi += count; iface->super.rx.available -= count; ucs_memory_cpu_store_fence(); *iface->rx.db = htonl(iface->rx.sw_pi); } return count; }
/* * Generic data-pointer posting function. * Parameters which are not relevant to the opcode are ignored. * * +--------+-----+-------+--------+-------+ * SEND | CTRL | INL | am_id | am_hdr | DPSEG | * +--------+-----+---+---+----+----+------+ * RDMA_WRITE | CTRL | RADDR | DPSEG | * +--------+---------+--------+-------+ * ATOMIC | CTRL | RADDR | ATOMIC | DPSEG | * +--------+---------+--------+-------+ */ static UCS_F_ALWAYS_INLINE void uct_rc_mlx5_ep_dptr_post(uct_rc_mlx5_ep_t *ep, unsigned opcode_flags, const void *buffer, unsigned length, uint32_t *lkey_p, /* SEND */ uint8_t am_id, const void *am_hdr, unsigned am_hdr_len, /* RDMA/ATOMIC */ uint64_t remote_addr, uct_rkey_t rkey, /* ATOMIC */ uint64_t compare_mask, uint64_t compare, uint64_t swap_add, int signal) { struct mlx5_wqe_ctrl_seg *ctrl; struct mlx5_wqe_raddr_seg *raddr; struct mlx5_wqe_atomic_seg *atomic; struct mlx5_wqe_data_seg *dptr; struct mlx5_wqe_inl_data_seg *inl; struct uct_ib_mlx5_atomic_masked_cswap32_seg *masked_cswap32; struct uct_ib_mlx5_atomic_masked_fadd32_seg *masked_fadd32; struct uct_ib_mlx5_atomic_masked_cswap64_seg *masked_cswap64; uct_rc_mlx5_iface_t *iface; uct_rc_hdr_t *rch; unsigned wqe_size, inl_seg_size; uint8_t opmod; iface = ucs_derived_of(ep->super.super.super.iface, uct_rc_mlx5_iface_t); if (!signal) { signal = uct_rc_iface_tx_moderation(&iface->super, &ep->super, MLX5_WQE_CTRL_CQ_UPDATE); } else { ucs_assert(signal == MLX5_WQE_CTRL_CQ_UPDATE); } opmod = 0; ctrl = ep->tx.wq.curr; switch (opcode_flags) { case MLX5_OPCODE_SEND: inl_seg_size = ucs_align_up_pow2(sizeof(*inl) + sizeof(*rch) + am_hdr_len, UCT_IB_MLX5_WQE_SEG_SIZE); ucs_assert(sizeof(*ctrl) + inl_seg_size + sizeof(*dptr) <= UCT_RC_MLX5_MAX_BB * MLX5_SEND_WQE_BB); ucs_assert(length + sizeof(*rch) + am_hdr_len <= iface->super.super.config.seg_size); /* Inline segment with AM ID and header */ inl = (void*)(ctrl + 1); inl->byte_count = htonl((sizeof(*rch) + am_hdr_len) | MLX5_INLINE_SEG); rch = (void*)(inl + 1); rch->am_id = am_id; uct_ib_mlx5_inline_copy(rch + 1, am_hdr, am_hdr_len, &ep->tx.wq); /* Data segment with payload */ if (length == 0) { wqe_size = sizeof(*ctrl) + inl_seg_size; } else { wqe_size = sizeof(*ctrl) + inl_seg_size + sizeof(*dptr); dptr = (void*)(ctrl + 1) + inl_seg_size; if (ucs_unlikely((void*)dptr >= ep->tx.wq.qend)) { dptr = (void*)dptr - (ep->tx.wq.qend - ep->tx.wq.qstart); } ucs_assert((void*)dptr >= ep->tx.wq.qstart); ucs_assert((void*)(dptr + 1) <= ep->tx.wq.qend); uct_ib_mlx5_set_data_seg(dptr, buffer, length, *lkey_p); } break; case MLX5_OPCODE_SEND|UCT_RC_MLX5_OPCODE_FLAG_RAW: /* Data segment only */ ucs_assert(length < (2ul << 30)); ucs_assert(length <= iface->super.super.config.seg_size); wqe_size = sizeof(*ctrl) + sizeof(*dptr); uct_ib_mlx5_set_data_seg((void*)(ctrl + 1), buffer, length, *lkey_p); break; case MLX5_OPCODE_RDMA_READ: case MLX5_OPCODE_RDMA_WRITE: /* Set RDMA segment */ ucs_assert(length <= UCT_IB_MAX_MESSAGE_SIZE); raddr = (void*)(ctrl + 1); uct_rc_mlx5_ep_set_rdma_seg(raddr, remote_addr, rkey); /* Data segment */ if (length == 0) { wqe_size = sizeof(*ctrl) + sizeof(*raddr); } else { wqe_size = sizeof(*ctrl) + sizeof(*raddr) + sizeof(*dptr); uct_ib_mlx5_set_data_seg((void*)(raddr + 1), buffer, length, *lkey_p); } break; case MLX5_OPCODE_ATOMIC_FA: case MLX5_OPCODE_ATOMIC_CS: ucs_assert(length == sizeof(uint64_t)); raddr = (void*)(ctrl + 1); uct_rc_mlx5_ep_set_rdma_seg(raddr, remote_addr, rkey); atomic = (void*)(raddr + 1); if (opcode_flags == MLX5_OPCODE_ATOMIC_CS) { atomic->compare = compare; } atomic->swap_add = swap_add; uct_ib_mlx5_set_data_seg((void*)(atomic + 1), buffer, length, *lkey_p); wqe_size = sizeof(*ctrl) + sizeof(*raddr) + sizeof(*atomic) + sizeof(*dptr); break; case MLX5_OPCODE_ATOMIC_MASKED_CS: raddr = (void*)(ctrl + 1); uct_rc_mlx5_ep_set_rdma_seg(raddr, remote_addr, rkey); switch (length) { case sizeof(uint32_t): opmod = UCT_IB_MLX5_OPMOD_EXT_ATOMIC(2); masked_cswap32 = (void*)(raddr + 1); masked_cswap32->swap = swap_add; masked_cswap32->compare = compare; masked_cswap32->swap_mask = (uint32_t)-1; masked_cswap32->compare_mask = compare_mask; dptr = (void*)(masked_cswap32 + 1); wqe_size = sizeof(*ctrl) + sizeof(*raddr) + sizeof(*masked_cswap32) + sizeof(*dptr); break; case sizeof(uint64_t): opmod = UCT_IB_MLX5_OPMOD_EXT_ATOMIC(3); /* Ext. atomic, size 2**3 */ masked_cswap64 = (void*)(raddr + 1); masked_cswap64->swap = swap_add; masked_cswap64->compare = compare; masked_cswap64->swap_mask = (uint64_t)-1; masked_cswap64->compare_mask = compare_mask; dptr = (void*)(masked_cswap64 + 1); wqe_size = sizeof(*ctrl) + sizeof(*raddr) + sizeof(*masked_cswap64) + sizeof(*dptr); /* Handle QP wrap-around. It cannot happen in the middle of * masked-cswap segment, because it's still in the first BB. */ ucs_assert((void*)dptr <= ep->tx.wq.qend); if (dptr == ep->tx.wq.qend) { dptr = ep->tx.wq.qstart; } else { ucs_assert((void*)masked_cswap64 < ep->tx.wq.qend); } break; default: ucs_assert(0); } uct_ib_mlx5_set_data_seg(dptr, buffer, length, *lkey_p); break; case MLX5_OPCODE_ATOMIC_MASKED_FA: ucs_assert(length == sizeof(uint32_t)); raddr = (void*)(ctrl + 1); uct_rc_mlx5_ep_set_rdma_seg(raddr, remote_addr, rkey); opmod = UCT_IB_MLX5_OPMOD_EXT_ATOMIC(2); masked_fadd32 = (void*)(raddr + 1); masked_fadd32->add = swap_add; masked_fadd32->filed_boundary = 0; uct_ib_mlx5_set_data_seg((void*)(masked_fadd32 + 1), buffer, length, *lkey_p); wqe_size = sizeof(*ctrl) + sizeof(*raddr) + sizeof(*masked_fadd32) + sizeof(*dptr); break; default: ucs_fatal("invalid send opcode"); } uct_rc_mlx5_post_send(ep, ctrl, (opcode_flags & UCT_RC_MLX5_OPCODE_MASK), opmod, signal, wqe_size); }