static UCS_F_NOINLINE void uct_ud_mlx5_iface_post_recv(uct_ud_mlx5_iface_t *iface) { unsigned batch = iface->super.config.rx_max_batch; struct mlx5_wqe_data_seg *rx_wqes; uint16_t pi, next_pi, count; uct_ib_iface_recv_desc_t *desc; rx_wqes = iface->rx.wq.wqes; pi = iface->rx.wq.rq_wqe_counter & iface->rx.wq.mask; for (count = 0; count < batch; count ++) { next_pi = (pi + 1) & iface->rx.wq.mask; ucs_prefetch(rx_wqes + next_pi); UCT_TL_IFACE_GET_RX_DESC(&iface->super.super.super, &iface->super.rx.mp, desc, break); rx_wqes[pi].lkey = htonl(desc->lkey); rx_wqes[pi].addr = htonll((uintptr_t)uct_ib_iface_recv_desc_hdr(&iface->super.super, desc)); pi = next_pi; } if (ucs_unlikely(count == 0)) { ucs_error("iface(%p) failed to post receive wqes", iface); return; } pi = iface->rx.wq.rq_wqe_counter + count; iface->rx.wq.rq_wqe_counter = pi; iface->super.rx.available -= count; ucs_memory_cpu_fence(); *iface->rx.wq.dbrec = htonl(pi); }
int uct_ib_iface_prepare_rx_wrs(uct_ib_iface_t *iface, ucs_mpool_t *mp, uct_ib_recv_wr_t *wrs, unsigned n) { uct_ib_iface_recv_desc_t *desc; unsigned count; count = 0; while (count < n) { UCT_TL_IFACE_GET_RX_DESC(&iface->super, mp, desc, break); wrs[count].sg.addr = (uintptr_t)uct_ib_iface_recv_desc_hdr(iface, desc); wrs[count].sg.length = iface->config.rx_payload_offset + iface->config.seg_size; wrs[count].sg.lkey = desc->lkey; wrs[count].ibwr.num_sge = 1; wrs[count].ibwr.wr_id = (uintptr_t)desc; wrs[count].ibwr.sg_list = &wrs[count].sg; wrs[count].ibwr.next = &wrs[count + 1].ibwr; ++count; } if (count > 0) { wrs[count - 1].ibwr.next = NULL; } return count; }
static UCS_F_ALWAYS_INLINE ucs_status_t uct_rc_verbs_iface_poll_rx(uct_rc_verbs_iface_t *iface) { uct_ib_iface_recv_desc_t *desc; uct_rc_hdr_t *hdr; struct ibv_wc wc[UCT_IB_MAX_WC]; int i, ret; ret = ibv_poll_cq(iface->super.super.recv_cq, UCT_IB_MAX_WC, wc); if (ret > 0) { for (i = 0; i < ret; ++i) { if (ucs_unlikely(wc[i].status != IBV_WC_SUCCESS)) { ucs_fatal("Receive completion with error: %s", ibv_wc_status_str(wc[i].status)); } UCS_STATS_UPDATE_COUNTER(iface->super.stats, UCT_RC_IFACE_STAT_RX_COMPLETION, 1); desc = (void*)wc[i].wr_id; uct_ib_iface_desc_received(&iface->super.super, desc, wc[i].byte_len, 1); hdr = uct_ib_iface_recv_desc_hdr(&iface->super.super, desc); uct_ib_log_recv_completion(IBV_QPT_RC, &wc[i], hdr, uct_rc_ep_am_packet_dump); uct_rc_iface_invoke_am(&iface->super, hdr, wc[i].byte_len, desc); } iface->super.rx.available += ret; return UCS_OK; } else if (ret == 0) { uct_rc_verbs_iface_post_recv(iface, 0); return UCS_ERR_NO_PROGRESS; } else { ucs_fatal("Failed to poll receive CQ"); } }
static inline ucs_status_t uct_ud_verbs_iface_poll_rx(uct_ud_verbs_iface_t *iface) { uct_ib_iface_recv_desc_t *desc; struct ibv_wc wc[UCT_IB_MAX_WC]; int i, ret; char *packet; ret = ibv_poll_cq(iface->super.super.recv_cq, UCT_IB_MAX_WC, wc); if (ret == 0) { return UCS_ERR_NO_PROGRESS; } if (ucs_unlikely(ret < 0)) { ucs_fatal("Failed to poll receive CQ"); } for (i = 0; i < ret; ++i) { if (ucs_unlikely(wc[i].status != IBV_WC_SUCCESS)) { ucs_fatal("Receive completion with error: %s", ibv_wc_status_str(wc[i].status)); } desc = (void*)wc[i].wr_id; ucs_trace_data("pkt rcvd: buf=%p len=%d", desc, wc[i].byte_len); packet = uct_ib_iface_recv_desc_hdr(&iface->super.super, desc); VALGRIND_MAKE_MEM_DEFINED(packet, wc[i].byte_len); uct_ud_ep_process_rx(&iface->super, (uct_ud_neth_t *)(packet + UCT_IB_GRH_LEN), wc[i].byte_len - UCT_IB_GRH_LEN, (uct_ud_recv_skb_t *)desc); } iface->super.rx.available += ret; uct_ud_verbs_iface_post_recv(iface); return UCS_OK; }
unsigned uct_rc_mlx5_iface_srq_post_recv(uct_rc_iface_t *iface, uct_ib_mlx5_srq_t *srq) { uct_ib_mlx5_srq_seg_t *seg; uct_ib_iface_recv_desc_t *desc; uint16_t count, index, next_index; uct_rc_hdr_t *hdr; /* Make sure the union is right */ UCS_STATIC_ASSERT(ucs_offsetof(uct_ib_mlx5_srq_seg_t, mlx5_srq.next_wqe_index) == ucs_offsetof(uct_ib_mlx5_srq_seg_t, srq.next_wqe_index)); UCS_STATIC_ASSERT(ucs_offsetof(uct_ib_mlx5_srq_seg_t, dptr) == sizeof(struct mlx5_wqe_srq_next_seg)); ucs_assert(UCS_CIRCULAR_COMPARE16(srq->ready_idx, <=, srq->free_idx)); index = srq->ready_idx; for (;;) { next_index = index + 1; seg = uct_ib_mlx5_srq_get_wqe(srq, next_index & srq->mask); if (UCS_CIRCULAR_COMPARE16(next_index, >, srq->free_idx)) { if (!seg->srq.ooo) { break; } ucs_assert(next_index == (uint16_t)(srq->free_idx + 1)); seg->srq.ooo = 0; srq->free_idx = next_index; } if (seg->srq.desc == NULL) { UCT_TL_IFACE_GET_RX_DESC(&iface->super.super, &iface->rx.mp, desc, break); /* Set receive data segment pointer. Length is pre-initialized. */ hdr = uct_ib_iface_recv_desc_hdr(&iface->super, desc); seg->srq.desc = desc; seg->dptr.lkey = htonl(desc->lkey); seg->dptr.addr = htonll((uintptr_t)hdr); VALGRIND_MAKE_MEM_NOACCESS(hdr, iface->super.config.seg_size); } index = next_index; }
static unsigned uct_rc_mlx5_iface_post_recv(uct_rc_mlx5_iface_t *iface, unsigned max) { struct mlx5_wqe_srq_next_seg *seg; uct_rc_mlx5_recv_desc_t *desc; unsigned count, head; uct_rc_hdr_t *hdr; unsigned length; head = iface->rx.head; length = iface->super.super.config.seg_size; count = 0; while (count < max) { ucs_assert(head != iface->rx.tail); UCT_TL_IFACE_GET_RX_DESC(&iface->super.super.super, iface->super.rx.mp, desc, break); seg = uct_rc_mlx5_iface_get_srq_wqe(iface, head); hdr = uct_ib_iface_recv_desc_hdr(&iface->super.super, &desc->super); uct_ib_mlx5_set_data_seg((void*)(seg + 1), hdr, length, /* TODO pre-init length */ desc->super.lkey); VALGRIND_MAKE_MEM_NOACCESS(hdr, length); ucs_queue_push(&iface->rx.desc_q, &desc->queue); head = uct_rc_mlx5_srq_next_wqe_ind(seg); ++count; } if (count > 0) { iface->rx.head = head; iface->rx.sw_pi += count; iface->super.rx.available -= count; ucs_memory_cpu_store_fence(); *iface->rx.db = htonl(iface->rx.sw_pi); } return count; }