static UCS_F_ALWAYS_INLINE void uct_rc_mlx5_iface_poll_tx(uct_rc_mlx5_iface_t *iface) { uct_rc_iface_send_op_t *op; struct mlx5_cqe64 *cqe; uct_rc_mlx5_ep_t *ep; unsigned qp_num; uint16_t hw_ci; cqe = uct_ib_mlx5_get_cqe(&iface->tx.cq, UCT_IB_MLX5_CQE64_SIZE_LOG); if (cqe == NULL) { return; } UCS_STATS_UPDATE_COUNTER(iface->super.stats, UCT_RC_IFACE_STAT_TX_COMPLETION, 1); ucs_memory_cpu_load_fence(); qp_num = ntohl(cqe->sop_drop_qpn) & UCS_MASK(UCT_IB_QPN_ORDER); ep = ucs_derived_of(uct_rc_iface_lookup_ep(&iface->super, qp_num), uct_rc_mlx5_ep_t); ucs_assert(ep != NULL); hw_ci = ntohs(cqe->wqe_counter); ep->super.available = uct_ib_mlx5_txwq_update_bb(&ep->tx.wq, hw_ci); ++iface->super.tx.cq_available; /* Process completions */ ucs_queue_for_each_extract(op, &ep->super.outstanding, queue, UCS_CIRCULAR_COMPARE16(op->sn, <=, hw_ci)) { op->handler(op); } }
static UCS_F_ALWAYS_INLINE void uct_ud_mlx5_iface_poll_tx(uct_ud_mlx5_iface_t *iface) { struct mlx5_cqe64 *cqe; cqe = uct_ib_mlx5_get_cqe(&iface->tx.cq, UCT_IB_MLX5_CQE64_SIZE_LOG); if (cqe == NULL) { return; } uct_ib_mlx5_log_cqe(cqe); iface->super.tx.available = uct_ib_mlx5_txwq_update_bb(&iface->tx.wq, ntohs(cqe->wqe_counter)); }
static UCS_F_ALWAYS_INLINE ucs_status_t uct_ud_mlx5_iface_poll_rx(uct_ud_mlx5_iface_t *iface) { struct mlx5_cqe64 *cqe; uint16_t ci; uct_ib_iface_recv_desc_t *desc; uint32_t len; void *packet; ucs_status_t status; ci = iface->rx.wq.cq_wqe_counter & iface->rx.wq.mask; packet = (void *)ntohll(iface->rx.wq.wqes[ci].addr); ucs_prefetch(packet + UCT_IB_GRH_LEN); desc = (uct_ib_iface_recv_desc_t *)(packet - iface->super.super.config.rx_hdr_offset); cqe = uct_ib_mlx5_get_cqe(&iface->rx.cq, UCT_IB_MLX5_CQE64_SIZE_LOG); if (cqe == NULL) { status = UCS_ERR_NO_PROGRESS; goto out; } uct_ib_mlx5_log_cqe(cqe); ucs_assert(0 == (cqe->op_own & (MLX5_INLINE_SCATTER_32|MLX5_INLINE_SCATTER_64))); ucs_assert(ntohs(cqe->wqe_counter) == iface->rx.wq.cq_wqe_counter); iface->super.rx.available++; iface->rx.wq.cq_wqe_counter++; len = ntohl(cqe->byte_cnt); VALGRIND_MAKE_MEM_DEFINED(packet, len); uct_ud_ep_process_rx(&iface->super, (uct_ud_neth_t *)(packet + UCT_IB_GRH_LEN), len - UCT_IB_GRH_LEN, (uct_ud_recv_skb_t *)desc); status = UCS_OK; out: if (iface->super.rx.available >= iface->super.config.rx_max_batch) { /* we need to try to post buffers always. Otherwise it is possible * to run out of rx wqes if receiver is slow and there are always * cqe to process */ uct_ud_mlx5_iface_post_recv(iface); } return status; }
static UCS_F_ALWAYS_INLINE ucs_status_t uct_rc_mlx5_iface_poll_rx(uct_rc_mlx5_iface_t *iface) { struct mlx5_wqe_srq_next_seg *seg; uct_rc_mlx5_recv_desc_t *desc; uct_rc_hdr_t *hdr; struct mlx5_cqe64 *cqe; unsigned byte_len; uint16_t wqe_ctr_be; uint16_t max_batch; ucs_status_t status; cqe = uct_ib_mlx5_get_cqe(&iface->rx.cq, iface->rx.cq.cqe_size_log); if (cqe == NULL) { /* If not CQE - post receives */ status = UCS_ERR_NO_PROGRESS; goto done; } UCS_STATS_UPDATE_COUNTER(iface->super.stats, UCT_RC_IFACE_STAT_RX_COMPLETION, 1); ucs_assert(!ucs_queue_is_empty(&iface->rx.desc_q)); ucs_memory_cpu_load_fence(); desc = ucs_queue_pull_elem_non_empty(&iface->rx.desc_q, uct_rc_mlx5_recv_desc_t, queue); byte_len = ntohl(cqe->byte_cnt); uct_ib_iface_desc_received(&iface->super.super, &desc->super, byte_len, !(cqe->op_own & (MLX5_INLINE_SCATTER_32|MLX5_INLINE_SCATTER_64))); /* Get a pointer to AM header (after which comes the payload) * Support cases of inline scatter by pointing directly to CQE. */ if (cqe->op_own & MLX5_INLINE_SCATTER_32) { hdr = (uct_rc_hdr_t*)cqe; UCS_STATS_UPDATE_COUNTER(iface->stats, UCT_RC_MLX5_IFACE_STAT_RX_INL_32, 1); } else if (cqe->op_own & MLX5_INLINE_SCATTER_64) { hdr = (uct_rc_hdr_t*)(cqe - 1); UCS_STATS_UPDATE_COUNTER(iface->stats, UCT_RC_MLX5_IFACE_STAT_RX_INL_64, 1) } else {
static UCS_F_ALWAYS_INLINE void uct_dc_mlx5_poll_tx(uct_dc_mlx5_iface_t *iface) { uint8_t dci; struct mlx5_cqe64 *cqe; uint32_t qp_num; uint16_t hw_ci; UCT_DC_MLX5_TXQP_DECL(txqp, txwq); cqe = uct_ib_mlx5_get_cqe(&iface->super.super.super, &iface->mlx5_common.tx.cq, iface->mlx5_common.tx.cq.cqe_size_log); if (cqe == NULL) { return; } UCS_STATS_UPDATE_COUNTER(iface->super.super.stats, UCT_RC_IFACE_STAT_TX_COMPLETION, 1); ucs_memory_cpu_load_fence(); ucs_assertv(!(cqe->op_own & (MLX5_INLINE_SCATTER_32|MLX5_INLINE_SCATTER_64)), "tx inline scatter not supported"); qp_num = ntohl(cqe->sop_drop_qpn) & UCS_MASK(UCT_IB_QPN_ORDER); dci = uct_dc_iface_dci_find(&iface->super, qp_num); txqp = &iface->super.tx.dcis[dci].txqp; txwq = &iface->dci_wqs[dci]; hw_ci = ntohs(cqe->wqe_counter); uct_rc_txqp_available_set(txqp, uct_ib_mlx5_txwq_update_bb(txwq, hw_ci)); uct_rc_txqp_completion(txqp, hw_ci); iface->super.super.tx.cq_available++; uct_dc_iface_dci_put(&iface->super, dci); if (uct_dc_iface_dci_can_alloc(&iface->super)) { ucs_arbiter_dispatch(&iface->super.super.tx.arbiter, 1, uct_dc_iface_dci_do_pending_wait, NULL); } ucs_arbiter_dispatch(&iface->super.tx.dci_arbiter, 1, uct_dc_iface_dci_do_pending_tx, NULL); }