static UCS_F_NOINLINE unsigned uct_rc_mlx5_iface_post_recv(uct_rc_mlx5_iface_t *iface) { uct_rc_mlx5_srq_seg_t *seg; uct_ib_iface_recv_desc_t *desc; uint16_t count, index, next_index; uct_rc_hdr_t *hdr; /* Make sure the union is right */ UCS_STATIC_ASSERT(ucs_offsetof(uct_rc_mlx5_srq_seg_t, mlx5_srq.next_wqe_index) == ucs_offsetof(uct_rc_mlx5_srq_seg_t, srq.next_wqe_index)); UCS_STATIC_ASSERT(ucs_offsetof(uct_rc_mlx5_srq_seg_t, dptr) == sizeof(struct mlx5_wqe_srq_next_seg)); ucs_assert(UCS_CIRCULAR_COMPARE16(iface->rx.ready_idx, <=, iface->rx.free_idx)); index = iface->rx.ready_idx; for (;;) { next_index = index + 1; seg = uct_rc_mlx5_iface_get_srq_wqe(iface, next_index & iface->rx.mask); if (UCS_CIRCULAR_COMPARE16(next_index, >, iface->rx.free_idx)) { if (!seg->srq.ooo) { break; } ucs_assert(next_index == (uint16_t)(iface->rx.free_idx + 1)); seg->srq.ooo = 0; iface->rx.free_idx = next_index; } if (seg->srq.desc == NULL) { UCT_TL_IFACE_GET_RX_DESC(&iface->super.super.super, &iface->super.rx.mp, desc, break); /* Set receive data segment pointer. Length is pre-initialized. */ hdr = uct_ib_iface_recv_desc_hdr(&iface->super.super, desc); seg->srq.desc = desc; seg->dptr.lkey = htonl(desc->lkey); seg->dptr.addr = htonll((uintptr_t)hdr); VALGRIND_MAKE_MEM_NOACCESS(hdr, iface->super.super.config.seg_size); } index = next_index; }
static unsigned uct_rc_mlx5_iface_post_recv(uct_rc_mlx5_iface_t *iface, unsigned max) { struct mlx5_wqe_srq_next_seg *seg; uct_rc_mlx5_recv_desc_t *desc; unsigned count, head; uct_rc_hdr_t *hdr; unsigned length; head = iface->rx.head; length = iface->super.super.config.seg_size; count = 0; while (count < max) { ucs_assert(head != iface->rx.tail); UCT_TL_IFACE_GET_RX_DESC(&iface->super.super.super, iface->super.rx.mp, desc, break); seg = uct_rc_mlx5_iface_get_srq_wqe(iface, head); hdr = uct_ib_iface_recv_desc_hdr(&iface->super.super, &desc->super); uct_ib_mlx5_set_data_seg((void*)(seg + 1), hdr, length, /* TODO pre-init length */ desc->super.lkey); VALGRIND_MAKE_MEM_NOACCESS(hdr, length); ucs_queue_push(&iface->rx.desc_q, &desc->queue); head = uct_rc_mlx5_srq_next_wqe_ind(seg); ++count; } if (count > 0) { iface->rx.head = head; iface->rx.sw_pi += count; iface->super.rx.available -= count; ucs_memory_cpu_store_fence(); *iface->rx.db = htonl(iface->rx.sw_pi); } return count; }