int mvdev_post_srq_buffers(mv_rpool *rp, mv_srq * srq, int num_bufs) { int i = 0, total = 1; mv_rbuf *v, *first_v, *last_v; struct ibv_recv_wr *bad_wr; first_v = last_v = get_mv_rbuf(srq->buffer_size); prepare_rc_recv(first_v, rp); for(i = 1; i < num_bufs; i++) { ++total; v = get_mv_rbuf(srq->buffer_size); prepare_rc_recv(v, rp); last_v->desc.rr.next = &v->desc.rr; last_v = v; } if(MVDEV_UNLIKELY(ibv_post_srq_recv(srq->srq, &first_v->desc.rr, &bad_wr))) { fprintf(stderr, "Cannot post to SRQ!\n"); return 0; /* we should know if this happens */ } D_PRINT("Posted %d recvs to SRQ\n", i); return total; }
unsigned uct_rc_verbs_iface_post_recv_always(uct_rc_iface_t *iface, unsigned max) { struct ibv_recv_wr *bad_wr; uct_ib_recv_wr_t *wrs; unsigned count; int ret; wrs = ucs_alloca(sizeof *wrs * max); count = uct_ib_iface_prepare_rx_wrs(&iface->super, &iface->rx.mp, wrs, max); if (ucs_unlikely(count == 0)) { return 0; } UCT_IB_INSTRUMENT_RECORD_RECV_WR_LEN("uct_rc_iface_post_recv_always", &wrs[0].ibwr); ret = ibv_post_srq_recv(iface->rx.srq, &wrs[0].ibwr, &bad_wr); if (ret != 0) { ucs_fatal("ibv_post_srq_recv() returned %d: %m", ret); } iface->rx.available -= count; return count; }
void rdma_backend_post_srq_recv(RdmaBackendDev *backend_dev, RdmaBackendSRQ *srq, struct ibv_sge *sge, uint32_t num_sge, void *ctx) { BackendCtx *bctx; struct ibv_sge new_sge[MAX_SGE]; uint32_t bctx_id; int rc; struct ibv_recv_wr wr = {}, *bad_wr; bctx = g_malloc0(sizeof(*bctx)); bctx->up_ctx = ctx; bctx->backend_srq = srq; rc = rdma_rm_alloc_cqe_ctx(backend_dev->rdma_dev_res, &bctx_id, bctx); if (unlikely(rc)) { complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_NOMEM, ctx); goto err_free_bctx; } rdma_protected_gslist_append_int32(&srq->cqe_ctx_list, bctx_id); rc = build_host_sge_array(backend_dev->rdma_dev_res, new_sge, sge, num_sge, &backend_dev->rdma_dev_res->stats.rx_bufs_len); if (rc) { complete_work(IBV_WC_GENERAL_ERR, rc, ctx); goto err_dealloc_cqe_ctx; } wr.num_sge = num_sge; wr.sg_list = new_sge; wr.wr_id = bctx_id; rc = ibv_post_srq_recv(srq->ibsrq, &wr, &bad_wr); if (rc) { rdma_error_report("ibv_post_srq_recv fail, srqn=0x%x, rc=%d, errno=%d", srq->ibsrq->handle, rc, errno); complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_FAIL_BACKEND, ctx); goto err_dealloc_cqe_ctx; } atomic_inc(&backend_dev->rdma_dev_res->stats.missing_cqe); backend_dev->rdma_dev_res->stats.rx_bufs++; backend_dev->rdma_dev_res->stats.rx_srq++; return; err_dealloc_cqe_ctx: backend_dev->rdma_dev_res->stats.rx_bufs_err++; rdma_rm_dealloc_cqe_ctx(backend_dev->rdma_dev_res, bctx_id); err_free_bctx: g_free(bctx); }
void SharedReceiveQueue::postBuffer(InfinibandBuffer& buffer, std::error_code& ec) { if (!buffer.valid()) { ec = error::invalid_buffer; return; } WorkRequestId workId(0x0u, buffer.id(), WorkType::RECEIVE); // Prepare work request struct ibv_recv_wr wr; memset(&wr, 0, sizeof(wr)); wr.wr_id = workId.id(); wr.sg_list = buffer.handle(); wr.num_sge = 1; // Repost receives on shared queue struct ibv_recv_wr* bad_wr = nullptr; if (ibv_post_srq_recv(mQueue, &wr, &bad_wr)) { ec = std::error_code(errno, std::generic_category()); return; } }
static ssize_t fi_ibv_srq_ep_recvmsg(struct fid_ep *ep, const struct fi_msg *msg, uint64_t flags) { struct fi_ibv_srq_ep *_ep; struct ibv_recv_wr wr, *bad; struct ibv_sge *sge = NULL; ssize_t ret; size_t i; _ep = container_of(ep, struct fi_ibv_srq_ep, ep_fid); assert(_ep->srq); wr.wr_id = (uintptr_t) msg->context; wr.next = NULL; if (msg->iov_count) { sge = alloca(sizeof(*sge) * msg->iov_count); for (i = 0; i < msg->iov_count; i++) { sge[i].addr = (uintptr_t) msg->msg_iov[i].iov_base; sge[i].length = (uint32_t) msg->msg_iov[i].iov_len; sge[i].lkey = (uint32_t) (uintptr_t) (msg->desc[i]); } } wr.sg_list = sge; wr.num_sge = msg->iov_count; ret = ibv_post_srq_recv(_ep->srq, &wr, &bad); switch (ret) { case ENOMEM: return -FI_EAGAIN; case -1: /* Deal with non-compliant libibverbs drivers which set errno * instead of directly returning the error value */ return (errno == ENOMEM) ? -FI_EAGAIN : -errno; default: return -ret; } }
static int pp_post_recv(int cnt) { struct ibv_sge sge; struct ibv_recv_wr wr, *bad_wr; sge.addr = (uintptr_t) ctx.buf; sge.length = ctx.size; sge.lkey = ctx.mr->lkey; wr.next = NULL; wr.wr_id = (uintptr_t) &ctx; wr.sg_list = &sge; wr.num_sge = 1; while (cnt--) { if (ibv_post_srq_recv(ctx.srq, &wr, &bad_wr)) { fprintf(stderr, "Failed to post receive to SRQ\n"); return 1; } } return 0; }
static UCS_F_NOINLINE unsigned uct_rc_verbs_iface_post_recv_always(uct_rc_verbs_iface_t *iface, unsigned max) { struct ibv_recv_wr *bad_wr; uct_ib_recv_wr_t *wrs; unsigned count; int ret; wrs = ucs_alloca(sizeof *wrs * max); count = uct_ib_iface_prepare_rx_wrs(&iface->super.super, iface->super.rx.mp, wrs, max); if (count == 0) { return 0; } ret = ibv_post_srq_recv(iface->super.rx.srq, &wrs[0].ibwr, &bad_wr); if (ret != 0) { ucs_fatal("ibv_post_srq_recv() returned %d: %m", ret); } iface->super.rx.available -= count; return count; }
/***************************************************************************//** * Description * Init rdma global resources * ******************************************************************************/ static struct thread_context* init_rdma_thread_resources() { struct thread_context *ctx = calloc(1, sizeof(struct thread_context)); ctx->qp_hash = hashtable_create(1024); int num_device; if ( !(ctx->device_ctx_list = rdma_get_devices(&num_device)) ) { perror("rdma_get_devices()"); return NULL; } ctx->device_ctx = *ctx->device_ctx_list; if (verbose) { printf("Get device: %d\n", num_device); } if ( !(ctx->pd = ibv_alloc_pd(ctx->device_ctx)) ) { perror("ibv_alloc_pd()"); return NULL; } if ( !(ctx->comp_channel = ibv_create_comp_channel(ctx->device_ctx)) ) { perror("ibv_create_comp_channel()"); return NULL; } struct ibv_srq_init_attr srq_init_attr; srq_init_attr.srq_context = NULL; srq_init_attr.attr.max_sge = 16; srq_init_attr.attr.max_wr = srq_size; srq_init_attr.attr.srq_limit = srq_size; /* RDMA TODO: what is srq_limit? */ if ( !(ctx->srq = ibv_create_srq(ctx->pd, &srq_init_attr)) ) { perror("ibv_create_srq()"); return NULL; } if ( !(ctx->send_cq = ibv_create_cq(ctx->device_ctx, cq_size, NULL, ctx->comp_channel, 0)) ) { perror("ibv_create_cq()"); return NULL; } if (0 != ibv_req_notify_cq(ctx->send_cq, 0)) { perror("ibv_reg_notify_cq()"); return NULL; } if ( !(ctx->recv_cq = ibv_create_cq(ctx->device_ctx, cq_size, NULL, ctx->comp_channel, 0)) ) { perror("ibv_create_cq()"); return NULL; } if (0 != ibv_req_notify_cq(ctx->recv_cq, 0)) { perror("ibv_reg_notify_cq()"); return NULL; } ctx->rsize = BUFF_SIZE; ctx->rbuf_list = calloc(buff_per_thread, sizeof(char *)); ctx->rmr_list = calloc(buff_per_thread, sizeof(struct ibv_mr*)); ctx->poll_wc = calloc(poll_wc_size, sizeof(struct ibv_wc)); int i = 0; for (i = 0; i < buff_per_thread; ++i) { ctx->rbuf_list[i] = malloc(ctx->rsize); if (ctx->rbuf_list[i] == 0) { break; } } if (i != buff_per_thread) { int j = 0; for (j = 0; j < i; ++j) { free(ctx->rbuf_list[j]); } free(ctx->rbuf_list); ctx->rbuf_list = 0; } if (!ctx->rmr_list || !ctx->rbuf_list) { fprintf(stderr, "out of ctxmory in init_rdma_thread_resources()\n"); return NULL; } struct ibv_recv_wr *bad = NULL; struct ibv_sge sge; struct ibv_recv_wr rwr; for (i = 0; i < buff_per_thread; ++i) { ctx->rmr_list[i] = ibv_reg_mr(ctx->pd, ctx->rbuf_list[i], ctx->rsize, IBV_ACCESS_LOCAL_WRITE); sge.addr = (uintptr_t)ctx->rbuf_list[i]; sge.length = ctx->rsize; sge.lkey = ctx->rmr_list[i]->lkey; rwr.wr_id = (uintptr_t)ctx->rmr_list[i]; rwr.next = NULL; rwr.sg_list = &sge; rwr.num_sge = 1; if (0 != ibv_post_srq_recv(ctx->srq, &rwr, &bad)) { perror("ibv_post_srq_recv()"); return NULL; } } return ctx; }
gaspi_return_t pgaspi_passive_receive (const gaspi_segment_id_t segment_id_local, const gaspi_offset_t offset_local, gaspi_rank_t * const rem_rank, const gaspi_size_t size, const gaspi_timeout_t timeout_ms) { #ifdef DEBUG if (glb_gaspi_ctx_ib.rrmd[segment_id_local] == NULL) { gaspi_printf("Debug: Invalid local segment (gaspi_passive_receive)\n"); return GASPI_ERROR; } if( rem_rank == NULL) { gaspi_printf("Debug: Invalid pointer parameter: rem_rank (gaspi_passive_receive)\n"); return GASPI_ERROR; } if( offset_local > glb_gaspi_ctx_ib.rrmd[segment_id_local][glb_gaspi_ctx.rank].size) { gaspi_printf("Debug: Invalid offsets (gaspi_passive_receive)\n"); return GASPI_ERROR; } if( size < 1 || size > GASPI_MAX_TSIZE_P ) { gaspi_printf("Debug: Invalid size (gaspi_passive_receive)\n"); return GASPI_ERROR; } #endif struct ibv_recv_wr *bad_wr; struct ibv_wc wc_recv; struct ibv_sge rlist; struct ibv_recv_wr rwr; struct ibv_cq *ev_cq; void *ev_ctx; int i; fd_set rfds; struct timeval tout; lock_gaspi_tout (&glb_gaspi_ctx.lockPR, timeout_ms); rlist.addr = (uintptr_t) (glb_gaspi_ctx_ib. rrmd[segment_id_local][glb_gaspi_ctx.rank].addr + NOTIFY_OFFSET + offset_local); rlist.length = size; rlist.lkey = glb_gaspi_ctx_ib.rrmd[segment_id_local][glb_gaspi_ctx.rank].mr->lkey; rwr.wr_id = glb_gaspi_ctx.rank; rwr.sg_list = &rlist; rwr.num_sge = 1; rwr.next = NULL; if (ibv_post_srq_recv (glb_gaspi_ctx_ib.srqP, &rwr, &bad_wr)) { unlock_gaspi (&glb_gaspi_ctx.lockPR); return GASPI_ERROR; } FD_ZERO (&rfds); FD_SET (glb_gaspi_ctx_ib.channelP->fd, &rfds); const long ts = (timeout_ms / 1000); const long tus = (timeout_ms - ts * 1000) * 1000; tout.tv_sec = ts; tout.tv_usec = tus; const int selret = select (FD_SETSIZE, &rfds, NULL, NULL, &tout); if (selret < 0) { unlock_gaspi (&glb_gaspi_ctx.lockPR); return GASPI_ERROR; } else if (selret == 0) { unlock_gaspi (&glb_gaspi_ctx.lockPR); return GASPI_TIMEOUT; } if (ibv_get_cq_event (glb_gaspi_ctx_ib.channelP, &ev_cq, &ev_ctx)) { unlock_gaspi (&glb_gaspi_ctx.lockPR); return GASPI_ERROR; } ibv_ack_cq_events (ev_cq, 1); if (ev_cq != glb_gaspi_ctx_ib.rcqP) { unlock_gaspi (&glb_gaspi_ctx.lockPR); return GASPI_ERROR; } if (ibv_req_notify_cq (glb_gaspi_ctx_ib.rcqP, 0)) { unlock_gaspi (&glb_gaspi_ctx.lockPR); return GASPI_ERROR; } int ne = 0; do { ne = ibv_poll_cq (glb_gaspi_ctx_ib.rcqP, 1, &wc_recv); } while (ne == 0); if ((ne < 0) || (wc_recv.status != IBV_WC_SUCCESS)) { glb_gaspi_ctx.qp_state_vec[GASPI_PASSIVE_QP][wc_recv.wr_id] = 1; unlock_gaspi (&glb_gaspi_ctx.lockPR); return GASPI_ERROR; } *rem_rank = 0xffff; for (i = 0; i < glb_gaspi_ctx.tnc; i++) { if (glb_gaspi_ctx_ib.qpP[i]->qp_num == wc_recv.qp_num) { *rem_rank = i; break; } } unlock_gaspi (&glb_gaspi_ctx.lockPR); return GASPI_SUCCESS; }