struct ibv_srq_1_0 *__ibv_create_srq_1_0(struct ibv_pd_1_0 *pd, struct ibv_srq_init_attr *srq_init_attr) { struct ibv_srq *real_srq; struct ibv_srq_1_0 *srq; srq = malloc(sizeof *srq); if (!srq) return NULL; real_srq = ibv_create_srq(pd->real_pd, srq_init_attr); if (!real_srq) { free(srq); return NULL; } srq->context = pd->context; srq->srq_context = srq_init_attr->srq_context; srq->pd = pd; srq->real_srq = real_srq; real_srq->srq_context = srq; return srq; }
struct ibv_srq_1_0 *__ibv_create_srq_1_0(struct ibv_pd_1_0 *pd, struct ibv_srq_init_attr *srq_init_attr) { fprintf(stderr, "%s:%s:%d \n", __func__, __FILE__, __LINE__); struct ibv_srq *real_srq; struct ibv_srq_1_0 *srq; srq = malloc(sizeof *srq); if (!srq) return NULL; real_srq = ibv_create_srq(pd->real_pd, srq_init_attr); if (!real_srq) { free(srq); return NULL; } srq->context = pd->context; srq->srq_context = srq_init_attr->srq_context; srq->pd = pd; srq->real_srq = real_srq; real_srq->srq_context = srq; return srq; }
SharedReceiveQueue::SharedReceiveQueue(const ProtectionDomain& domain, uint32_t length) { struct ibv_srq_init_attr srq_attr; memset(&srq_attr, 0, sizeof(srq_attr)); srq_attr.attr.max_wr = length; srq_attr.attr.max_sge = 1; mQueue = ibv_create_srq(domain.get(), &srq_attr); if (mQueue == nullptr) { throw std::system_error(errno, std::generic_category()); } LOG_TRACE("Created shared receive queue"); }
/** * Create a srq using process info data. */ struct ibv_srq *create_srq(int hca_num) { struct ibv_srq_init_attr srq_init_attr; struct ibv_srq *srq_ptr = NULL; memset(&srq_init_attr, 0, sizeof(srq_init_attr)); srq_init_attr.srq_context = hca_list[hca_num].nic_context; srq_init_attr.attr.max_wr = viadev_srq_alloc_size; srq_init_attr.attr.max_sge = 1; /* The limit value should be ignored during SRQ create */ srq_init_attr.attr.srq_limit = viadev_srq_limit; srq_ptr = ibv_create_srq(hca_list[hca_num].ptag, &srq_init_attr); if (!srq_ptr) { ibv_error_abort(-1, "Error creating SRQ\n"); } return srq_ptr; }
int fi_ibv_srq_context(struct fid_domain *domain, struct fi_rx_attr *attr, struct fid_ep **rx_ep, void *context) { struct ibv_srq_init_attr srq_init_attr = {}; struct fi_ibv_domain *dom; struct fi_ibv_srq_ep *_rx_ep; if (!domain) return -FI_EINVAL; _rx_ep = calloc(1, sizeof *_rx_ep); if (!_rx_ep) return -FI_ENOMEM; dom = container_of(domain, struct fi_ibv_domain, domain_fid); _rx_ep->ep_fid.fid.fclass = FI_CLASS_SRX_CTX; _rx_ep->ep_fid.fid.context = context; _rx_ep->ep_fid.fid.ops = &fi_ibv_srq_ep_ops; _rx_ep->ep_fid.ops = &fi_ibv_srq_ep_base_ops; _rx_ep->ep_fid.msg = &fi_ibv_srq_msg_ops; _rx_ep->ep_fid.cm = &fi_ibv_srq_cm_ops; _rx_ep->ep_fid.rma = &fi_ibv_srq_rma_ops; _rx_ep->ep_fid.atomic = &fi_ibv_srq_atomic_ops; srq_init_attr.attr.max_wr = attr->size; srq_init_attr.attr.max_sge = attr->iov_limit; _rx_ep->srq = ibv_create_srq(dom->pd, &srq_init_attr); if (!_rx_ep->srq) { free(_rx_ep); return -errno; } *rx_ep = &_rx_ep->ep_fid; return 0; }
/***************************************************************************//** * Description * Init rdma global resources * ******************************************************************************/ static struct thread_context* init_rdma_thread_resources() { struct thread_context *ctx = calloc(1, sizeof(struct thread_context)); ctx->qp_hash = hashtable_create(1024); int num_device; if ( !(ctx->device_ctx_list = rdma_get_devices(&num_device)) ) { perror("rdma_get_devices()"); return NULL; } ctx->device_ctx = *ctx->device_ctx_list; if (verbose) { printf("Get device: %d\n", num_device); } if ( !(ctx->pd = ibv_alloc_pd(ctx->device_ctx)) ) { perror("ibv_alloc_pd()"); return NULL; } if ( !(ctx->comp_channel = ibv_create_comp_channel(ctx->device_ctx)) ) { perror("ibv_create_comp_channel()"); return NULL; } struct ibv_srq_init_attr srq_init_attr; srq_init_attr.srq_context = NULL; srq_init_attr.attr.max_sge = 16; srq_init_attr.attr.max_wr = srq_size; srq_init_attr.attr.srq_limit = srq_size; /* RDMA TODO: what is srq_limit? */ if ( !(ctx->srq = ibv_create_srq(ctx->pd, &srq_init_attr)) ) { perror("ibv_create_srq()"); return NULL; } if ( !(ctx->send_cq = ibv_create_cq(ctx->device_ctx, cq_size, NULL, ctx->comp_channel, 0)) ) { perror("ibv_create_cq()"); return NULL; } if (0 != ibv_req_notify_cq(ctx->send_cq, 0)) { perror("ibv_reg_notify_cq()"); return NULL; } if ( !(ctx->recv_cq = ibv_create_cq(ctx->device_ctx, cq_size, NULL, ctx->comp_channel, 0)) ) { perror("ibv_create_cq()"); return NULL; } if (0 != ibv_req_notify_cq(ctx->recv_cq, 0)) { perror("ibv_reg_notify_cq()"); return NULL; } ctx->rsize = BUFF_SIZE; ctx->rbuf_list = calloc(buff_per_thread, sizeof(char *)); ctx->rmr_list = calloc(buff_per_thread, sizeof(struct ibv_mr*)); ctx->poll_wc = calloc(poll_wc_size, sizeof(struct ibv_wc)); int i = 0; for (i = 0; i < buff_per_thread; ++i) { ctx->rbuf_list[i] = malloc(ctx->rsize); if (ctx->rbuf_list[i] == 0) { break; } } if (i != buff_per_thread) { int j = 0; for (j = 0; j < i; ++j) { free(ctx->rbuf_list[j]); } free(ctx->rbuf_list); ctx->rbuf_list = 0; } if (!ctx->rmr_list || !ctx->rbuf_list) { fprintf(stderr, "out of ctxmory in init_rdma_thread_resources()\n"); return NULL; } struct ibv_recv_wr *bad = NULL; struct ibv_sge sge; struct ibv_recv_wr rwr; for (i = 0; i < buff_per_thread; ++i) { ctx->rmr_list[i] = ibv_reg_mr(ctx->pd, ctx->rbuf_list[i], ctx->rsize, IBV_ACCESS_LOCAL_WRITE); sge.addr = (uintptr_t)ctx->rbuf_list[i]; sge.length = ctx->rsize; sge.lkey = ctx->rmr_list[i]->lkey; rwr.wr_id = (uintptr_t)ctx->rmr_list[i]; rwr.next = NULL; rwr.sg_list = &sge; rwr.num_sge = 1; if (0 != ibv_post_srq_recv(ctx->srq, &rwr, &bad)) { perror("ibv_post_srq_recv()"); return NULL; } } return ctx; }
/* * create both the high and low priority completion queues * and the shared receive queue (if requested) */ int mca_btl_openib_create_cq_srq(mca_btl_openib_module_t *openib_btl) { /* Allocate Protection Domain */ openib_btl->poll_cq = false; if (mca_btl_openib_component.use_srq) { struct ibv_srq_init_attr attr; attr.attr.max_wr = mca_btl_openib_component.srq_rd_max; attr.attr.max_sge = mca_btl_openib_component.ib_sg_list_size; openib_btl->srd_posted[BTL_OPENIB_HP_QP] = 0; openib_btl->srd_posted[BTL_OPENIB_LP_QP] = 0; openib_btl->srq[BTL_OPENIB_HP_QP] = ibv_create_srq(openib_btl->hca->ib_pd, &attr); if (NULL == openib_btl->srq[BTL_OPENIB_HP_QP]) { show_init_error(__FILE__, __LINE__, "ibv_create_srq", ibv_get_device_name(openib_btl->hca->ib_dev)); return OMPI_ERROR; } openib_btl->srq[BTL_OPENIB_LP_QP] = ibv_create_srq(openib_btl->hca->ib_pd, &attr); if (NULL == openib_btl->srq[BTL_OPENIB_LP_QP]) { show_init_error(__FILE__, __LINE__, "ibv_create_srq", ibv_get_device_name(openib_btl->hca->ib_dev)); return OMPI_ERROR; } } else { openib_btl->srq[BTL_OPENIB_HP_QP] = NULL; openib_btl->srq[BTL_OPENIB_LP_QP] = NULL; } /* Create the low and high priority queue pairs */ #if OMPI_MCA_BTL_OPENIB_IBV_CREATE_CQ_ARGS == 3 openib_btl->ib_cq[BTL_OPENIB_LP_QP] = ibv_create_cq(openib_btl->hca->ib_dev_context, mca_btl_openib_component.ib_cq_size, NULL); #else openib_btl->ib_cq[BTL_OPENIB_LP_QP] = ibv_create_cq(openib_btl->hca->ib_dev_context, mca_btl_openib_component.ib_cq_size, NULL, NULL, 0); #endif if (NULL == openib_btl->ib_cq[BTL_OPENIB_LP_QP]) { show_init_error(__FILE__, __LINE__, "ibv_create_cq", ibv_get_device_name(openib_btl->hca->ib_dev)); return OMPI_ERROR; } #if OMPI_MCA_BTL_OPENIB_IBV_CREATE_CQ_ARGS == 3 openib_btl->ib_cq[BTL_OPENIB_HP_QP] = ibv_create_cq(openib_btl->hca->ib_dev_context, mca_btl_openib_component.ib_cq_size, NULL); #else openib_btl->ib_cq[BTL_OPENIB_HP_QP] = ibv_create_cq(openib_btl->hca->ib_dev_context, mca_btl_openib_component.ib_cq_size, NULL, NULL, 0); #endif if(NULL == openib_btl->ib_cq[BTL_OPENIB_HP_QP]) { show_init_error(__FILE__, __LINE__, "ibv_create_cq", ibv_get_device_name(openib_btl->hca->ib_dev)); return OMPI_ERROR; } return OMPI_SUCCESS; }
int rdma_backend_qp_state_rtr(RdmaBackendDev *backend_dev, RdmaBackendQP *qp, uint8_t qp_type, uint8_t sgid_idx, union ibv_gid *dgid, uint32_t dqpn, uint32_t rq_psn, uint32_t qkey, bool use_qkey) { struct ibv_qp_attr attr = {}; union ibv_gid ibv_gid = { .global.interface_id = dgid->global.interface_id, .global.subnet_prefix = dgid->global.subnet_prefix }; int rc, attr_mask; attr.qp_state = IBV_QPS_RTR; attr_mask = IBV_QP_STATE; qp->sgid_idx = sgid_idx; switch (qp_type) { case IBV_QPT_RC: attr.path_mtu = IBV_MTU_1024; attr.dest_qp_num = dqpn; attr.max_dest_rd_atomic = 1; attr.min_rnr_timer = 12; attr.ah_attr.port_num = backend_dev->port_num; attr.ah_attr.is_global = 1; attr.ah_attr.grh.hop_limit = 1; attr.ah_attr.grh.dgid = ibv_gid; attr.ah_attr.grh.sgid_index = qp->sgid_idx; attr.rq_psn = rq_psn; attr_mask |= IBV_QP_AV | IBV_QP_PATH_MTU | IBV_QP_DEST_QPN | IBV_QP_RQ_PSN | IBV_QP_MAX_DEST_RD_ATOMIC | IBV_QP_MIN_RNR_TIMER; trace_rdma_backend_rc_qp_state_rtr(qp->ibqp->qp_num, be64_to_cpu(ibv_gid.global. subnet_prefix), be64_to_cpu(ibv_gid.global. interface_id), qp->sgid_idx, dqpn, rq_psn); break; case IBV_QPT_UD: if (use_qkey) { attr.qkey = qkey; attr_mask |= IBV_QP_QKEY; } trace_rdma_backend_ud_qp_state_rtr(qp->ibqp->qp_num, use_qkey ? qkey : 0); break; } rc = ibv_modify_qp(qp->ibqp, &attr, attr_mask); if (rc) { rdma_error_report("ibv_modify_qp fail, rc=%d, errno=%d", rc, errno); return -EIO; } return 0; } int rdma_backend_qp_state_rts(RdmaBackendQP *qp, uint8_t qp_type, uint32_t sq_psn, uint32_t qkey, bool use_qkey) { struct ibv_qp_attr attr = {}; int rc, attr_mask; attr.qp_state = IBV_QPS_RTS; attr.sq_psn = sq_psn; attr_mask = IBV_QP_STATE | IBV_QP_SQ_PSN; switch (qp_type) { case IBV_QPT_RC: attr.timeout = 14; attr.retry_cnt = 7; attr.rnr_retry = 7; attr.max_rd_atomic = 1; attr_mask |= IBV_QP_TIMEOUT | IBV_QP_RETRY_CNT | IBV_QP_RNR_RETRY | IBV_QP_MAX_QP_RD_ATOMIC; trace_rdma_backend_rc_qp_state_rts(qp->ibqp->qp_num, sq_psn); break; case IBV_QPT_UD: if (use_qkey) { attr.qkey = qkey; attr_mask |= IBV_QP_QKEY; } trace_rdma_backend_ud_qp_state_rts(qp->ibqp->qp_num, sq_psn, use_qkey ? qkey : 0); break; } rc = ibv_modify_qp(qp->ibqp, &attr, attr_mask); if (rc) { rdma_error_report("ibv_modify_qp fail, rc=%d, errno=%d", rc, errno); return -EIO; } return 0; } int rdma_backend_query_qp(RdmaBackendQP *qp, struct ibv_qp_attr *attr, int attr_mask, struct ibv_qp_init_attr *init_attr) { if (!qp->ibqp) { attr->qp_state = IBV_QPS_RTS; return 0; } return ibv_query_qp(qp->ibqp, attr, attr_mask, init_attr); } void rdma_backend_destroy_qp(RdmaBackendQP *qp, RdmaDeviceResources *dev_res) { if (qp->ibqp) { ibv_destroy_qp(qp->ibqp); } g_slist_foreach(qp->cqe_ctx_list.list, free_cqe_ctx, dev_res); rdma_protected_gslist_destroy(&qp->cqe_ctx_list); } int rdma_backend_create_srq(RdmaBackendSRQ *srq, RdmaBackendPD *pd, uint32_t max_wr, uint32_t max_sge, uint32_t srq_limit) { struct ibv_srq_init_attr srq_init_attr = {}; srq_init_attr.attr.max_wr = max_wr; srq_init_attr.attr.max_sge = max_sge; srq_init_attr.attr.srq_limit = srq_limit; srq->ibsrq = ibv_create_srq(pd->ibpd, &srq_init_attr); if (!srq->ibsrq) { rdma_error_report("ibv_create_srq failed, errno=%d", errno); return -EIO; } rdma_protected_gslist_init(&srq->cqe_ctx_list); return 0; } int rdma_backend_query_srq(RdmaBackendSRQ *srq, struct ibv_srq_attr *srq_attr) { if (!srq->ibsrq) { return -EINVAL; } return ibv_query_srq(srq->ibsrq, srq_attr); } int rdma_backend_modify_srq(RdmaBackendSRQ *srq, struct ibv_srq_attr *srq_attr, int srq_attr_mask) { if (!srq->ibsrq) { return -EINVAL; } return ibv_modify_srq(srq->ibsrq, srq_attr, srq_attr_mask); } void rdma_backend_destroy_srq(RdmaBackendSRQ *srq, RdmaDeviceResources *dev_res) { if (srq->ibsrq) { ibv_destroy_srq(srq->ibsrq); } g_slist_foreach(srq->cqe_ctx_list.list, free_cqe_ctx, dev_res); rdma_protected_gslist_destroy(&srq->cqe_ctx_list); } #define CHK_ATTR(req, dev, member, fmt) ({ \ trace_rdma_check_dev_attr(#member, dev.member, req->member); \ if (req->member > dev.member) { \ rdma_warn_report("%s = "fmt" is higher than host device capability "fmt, \ #member, req->member, dev.member); \ req->member = dev.member; \ } \ }) static int init_device_caps(RdmaBackendDev *backend_dev, struct ibv_device_attr *dev_attr) { struct ibv_device_attr bk_dev_attr; int rc; rc = ibv_query_device(backend_dev->context, &bk_dev_attr); if (rc) { rdma_error_report("ibv_query_device fail, rc=%d, errno=%d", rc, errno); return -EIO; } dev_attr->max_sge = MAX_SGE; dev_attr->max_srq_sge = MAX_SGE; CHK_ATTR(dev_attr, bk_dev_attr, max_mr_size, "%" PRId64); CHK_ATTR(dev_attr, bk_dev_attr, max_qp, "%d"); CHK_ATTR(dev_attr, bk_dev_attr, max_sge, "%d"); CHK_ATTR(dev_attr, bk_dev_attr, max_cq, "%d"); CHK_ATTR(dev_attr, bk_dev_attr, max_mr, "%d"); CHK_ATTR(dev_attr, bk_dev_attr, max_pd, "%d"); CHK_ATTR(dev_attr, bk_dev_attr, max_qp_rd_atom, "%d"); CHK_ATTR(dev_attr, bk_dev_attr, max_qp_init_rd_atom, "%d"); CHK_ATTR(dev_attr, bk_dev_attr, max_ah, "%d"); CHK_ATTR(dev_attr, bk_dev_attr, max_srq, "%d"); return 0; } static inline void build_mad_hdr(struct ibv_grh *grh, union ibv_gid *sgid, union ibv_gid *my_gid, int paylen) { grh->paylen = htons(paylen); grh->sgid = *sgid; grh->dgid = *my_gid; }