static UCS_F_NOINLINE void uct_ud_mlx5_iface_post_recv(uct_ud_mlx5_iface_t *iface) { unsigned batch = iface->super.config.rx_max_batch; struct mlx5_wqe_data_seg *rx_wqes; uint16_t pi, next_pi, count; uct_ib_iface_recv_desc_t *desc; rx_wqes = iface->rx.wq.wqes; pi = iface->rx.wq.rq_wqe_counter & iface->rx.wq.mask; for (count = 0; count < batch; count ++) { next_pi = (pi + 1) & iface->rx.wq.mask; ucs_prefetch(rx_wqes + next_pi); UCT_TL_IFACE_GET_RX_DESC(&iface->super.super.super, &iface->super.rx.mp, desc, break); rx_wqes[pi].lkey = htonl(desc->lkey); rx_wqes[pi].addr = htonll((uintptr_t)uct_ib_iface_recv_desc_hdr(&iface->super.super, desc)); pi = next_pi; } if (ucs_unlikely(count == 0)) { ucs_error("iface(%p) failed to post receive wqes", iface); return; } pi = iface->rx.wq.rq_wqe_counter + count; iface->rx.wq.rq_wqe_counter = pi; iface->super.rx.available -= count; ucs_memory_cpu_fence(); *iface->rx.wq.dbrec = htonl(pi); }
int uct_ib_mlx5dv_arm_cq(uct_ib_mlx5_cq_t *cq, int solicited) { uint64_t doorbell, sn_ci_cmd; uint32_t sn, ci, cmd; sn = cq->cq_sn & 3; ci = cq->cq_ci & 0xffffff; cmd = solicited ? MLX5_CQ_DB_REQ_NOT_SOL : MLX5_CQ_DB_REQ_NOT; sn_ci_cmd = (sn << 28) | cmd | ci; cq->dbrec[UCT_IB_MLX5_CQ_ARM_DB] = htobe32(sn_ci_cmd); ucs_memory_cpu_fence(); doorbell = (sn_ci_cmd << 32) | cq->cq_num; *(uint64_t *)((uint8_t *)cq->uar + MLX5_CQ_DOORBELL) = htobe64(doorbell); ucs_memory_bus_store_fence(); return 0; }