int mca_btl_openib_size_queues( struct mca_btl_openib_module_t* openib_btl, size_t nprocs) { int min_cq_size; int first_time = (0 == openib_btl->num_peers); int rc; openib_btl->num_peers += nprocs; if(mca_btl_openib_component.use_srq) { openib_btl->rd_num = mca_btl_openib_component.rd_num + log2(nprocs) * mca_btl_openib_component.srq_rd_per_peer; if(openib_btl->rd_num > mca_btl_openib_component.srq_rd_max) openib_btl->rd_num = mca_btl_openib_component.srq_rd_max; openib_btl->rd_low = openib_btl->rd_num - 1; min_cq_size = openib_btl->rd_num * 2 * openib_btl->num_peers; if(!first_time) { struct ibv_srq_attr srq_attr; srq_attr.max_wr = openib_btl->rd_num; rc = ibv_modify_srq(openib_btl->srq[BTL_OPENIB_HP_QP], &srq_attr, IBV_SRQ_MAX_WR); if(rc) { BTL_ERROR(("cannot resize high priority shared receive queue, error: %d", rc)); return OMPI_ERROR; } rc = ibv_modify_srq(openib_btl->srq[BTL_OPENIB_LP_QP], &srq_attr, IBV_SRQ_MAX_WR); if(rc) { BTL_ERROR(("cannot resize low priority shared receive queue, error: %d", rc)); return OMPI_ERROR; } } } else { min_cq_size = ( mca_btl_openib_component.rd_num > (int32_t) mca_btl_openib_component.eager_rdma_num ? mca_btl_openib_component.rd_num : (int32_t) mca_btl_openib_component.eager_rdma_num ) * 2 * openib_btl->num_peers; } if(min_cq_size > (int32_t) mca_btl_openib_component.ib_cq_size) { mca_btl_openib_component.ib_cq_size = min_cq_size > openib_btl->hca->ib_dev_attr.max_cq ? openib_btl->hca->ib_dev_attr.max_cq : min_cq_size; #if OMPI_MCA_BTL_OPENIB_HAVE_RESIZE_CQ if(!first_time) { rc = ibv_resize_cq(openib_btl->ib_cq[BTL_OPENIB_LP_QP], mca_btl_openib_component.ib_cq_size); if(rc) { BTL_ERROR(("cannot resize low priority completion queue, error: %d", rc)); return OMPI_ERROR; } rc = ibv_resize_cq(openib_btl->ib_cq[BTL_OPENIB_HP_QP], mca_btl_openib_component.ib_cq_size); if(rc) { BTL_ERROR(("cannot resize high priority completion queue, error: %d", rc)); return OMPI_ERROR; } } #endif } if(first_time) { /* never been here before, setup cq and srq */ mca_btl_openib_component.ib_cq_size = (int) mca_btl_openib_component.ib_cq_size > openib_btl->hca->ib_dev_attr.max_cq ? openib_btl->hca->ib_dev_attr.max_cq : (int) mca_btl_openib_component.ib_cq_size; return mca_btl_openib_create_cq_srq(openib_btl); } return OMPI_SUCCESS; }
int __ibv_modify_srq_1_0(struct ibv_srq_1_0 *srq, struct ibv_srq_attr *srq_attr, int srq_attr_mask) { return ibv_modify_srq(srq->real_srq, srq_attr, srq_attr_mask); }
int rdma_backend_qp_state_rtr(RdmaBackendDev *backend_dev, RdmaBackendQP *qp, uint8_t qp_type, uint8_t sgid_idx, union ibv_gid *dgid, uint32_t dqpn, uint32_t rq_psn, uint32_t qkey, bool use_qkey) { struct ibv_qp_attr attr = {}; union ibv_gid ibv_gid = { .global.interface_id = dgid->global.interface_id, .global.subnet_prefix = dgid->global.subnet_prefix }; int rc, attr_mask; attr.qp_state = IBV_QPS_RTR; attr_mask = IBV_QP_STATE; qp->sgid_idx = sgid_idx; switch (qp_type) { case IBV_QPT_RC: attr.path_mtu = IBV_MTU_1024; attr.dest_qp_num = dqpn; attr.max_dest_rd_atomic = 1; attr.min_rnr_timer = 12; attr.ah_attr.port_num = backend_dev->port_num; attr.ah_attr.is_global = 1; attr.ah_attr.grh.hop_limit = 1; attr.ah_attr.grh.dgid = ibv_gid; attr.ah_attr.grh.sgid_index = qp->sgid_idx; attr.rq_psn = rq_psn; attr_mask |= IBV_QP_AV | IBV_QP_PATH_MTU | IBV_QP_DEST_QPN | IBV_QP_RQ_PSN | IBV_QP_MAX_DEST_RD_ATOMIC | IBV_QP_MIN_RNR_TIMER; trace_rdma_backend_rc_qp_state_rtr(qp->ibqp->qp_num, be64_to_cpu(ibv_gid.global. subnet_prefix), be64_to_cpu(ibv_gid.global. interface_id), qp->sgid_idx, dqpn, rq_psn); break; case IBV_QPT_UD: if (use_qkey) { attr.qkey = qkey; attr_mask |= IBV_QP_QKEY; } trace_rdma_backend_ud_qp_state_rtr(qp->ibqp->qp_num, use_qkey ? qkey : 0); break; } rc = ibv_modify_qp(qp->ibqp, &attr, attr_mask); if (rc) { rdma_error_report("ibv_modify_qp fail, rc=%d, errno=%d", rc, errno); return -EIO; } return 0; } int rdma_backend_qp_state_rts(RdmaBackendQP *qp, uint8_t qp_type, uint32_t sq_psn, uint32_t qkey, bool use_qkey) { struct ibv_qp_attr attr = {}; int rc, attr_mask; attr.qp_state = IBV_QPS_RTS; attr.sq_psn = sq_psn; attr_mask = IBV_QP_STATE | IBV_QP_SQ_PSN; switch (qp_type) { case IBV_QPT_RC: attr.timeout = 14; attr.retry_cnt = 7; attr.rnr_retry = 7; attr.max_rd_atomic = 1; attr_mask |= IBV_QP_TIMEOUT | IBV_QP_RETRY_CNT | IBV_QP_RNR_RETRY | IBV_QP_MAX_QP_RD_ATOMIC; trace_rdma_backend_rc_qp_state_rts(qp->ibqp->qp_num, sq_psn); break; case IBV_QPT_UD: if (use_qkey) { attr.qkey = qkey; attr_mask |= IBV_QP_QKEY; } trace_rdma_backend_ud_qp_state_rts(qp->ibqp->qp_num, sq_psn, use_qkey ? qkey : 0); break; } rc = ibv_modify_qp(qp->ibqp, &attr, attr_mask); if (rc) { rdma_error_report("ibv_modify_qp fail, rc=%d, errno=%d", rc, errno); return -EIO; } return 0; } int rdma_backend_query_qp(RdmaBackendQP *qp, struct ibv_qp_attr *attr, int attr_mask, struct ibv_qp_init_attr *init_attr) { if (!qp->ibqp) { attr->qp_state = IBV_QPS_RTS; return 0; } return ibv_query_qp(qp->ibqp, attr, attr_mask, init_attr); } void rdma_backend_destroy_qp(RdmaBackendQP *qp, RdmaDeviceResources *dev_res) { if (qp->ibqp) { ibv_destroy_qp(qp->ibqp); } g_slist_foreach(qp->cqe_ctx_list.list, free_cqe_ctx, dev_res); rdma_protected_gslist_destroy(&qp->cqe_ctx_list); } int rdma_backend_create_srq(RdmaBackendSRQ *srq, RdmaBackendPD *pd, uint32_t max_wr, uint32_t max_sge, uint32_t srq_limit) { struct ibv_srq_init_attr srq_init_attr = {}; srq_init_attr.attr.max_wr = max_wr; srq_init_attr.attr.max_sge = max_sge; srq_init_attr.attr.srq_limit = srq_limit; srq->ibsrq = ibv_create_srq(pd->ibpd, &srq_init_attr); if (!srq->ibsrq) { rdma_error_report("ibv_create_srq failed, errno=%d", errno); return -EIO; } rdma_protected_gslist_init(&srq->cqe_ctx_list); return 0; } int rdma_backend_query_srq(RdmaBackendSRQ *srq, struct ibv_srq_attr *srq_attr) { if (!srq->ibsrq) { return -EINVAL; } return ibv_query_srq(srq->ibsrq, srq_attr); } int rdma_backend_modify_srq(RdmaBackendSRQ *srq, struct ibv_srq_attr *srq_attr, int srq_attr_mask) { if (!srq->ibsrq) { return -EINVAL; } return ibv_modify_srq(srq->ibsrq, srq_attr, srq_attr_mask); } void rdma_backend_destroy_srq(RdmaBackendSRQ *srq, RdmaDeviceResources *dev_res) { if (srq->ibsrq) { ibv_destroy_srq(srq->ibsrq); } g_slist_foreach(srq->cqe_ctx_list.list, free_cqe_ctx, dev_res); rdma_protected_gslist_destroy(&srq->cqe_ctx_list); } #define CHK_ATTR(req, dev, member, fmt) ({ \ trace_rdma_check_dev_attr(#member, dev.member, req->member); \ if (req->member > dev.member) { \ rdma_warn_report("%s = "fmt" is higher than host device capability "fmt, \ #member, req->member, dev.member); \ req->member = dev.member; \ } \ }) static int init_device_caps(RdmaBackendDev *backend_dev, struct ibv_device_attr *dev_attr) { struct ibv_device_attr bk_dev_attr; int rc; rc = ibv_query_device(backend_dev->context, &bk_dev_attr); if (rc) { rdma_error_report("ibv_query_device fail, rc=%d, errno=%d", rc, errno); return -EIO; } dev_attr->max_sge = MAX_SGE; dev_attr->max_srq_sge = MAX_SGE; CHK_ATTR(dev_attr, bk_dev_attr, max_mr_size, "%" PRId64); CHK_ATTR(dev_attr, bk_dev_attr, max_qp, "%d"); CHK_ATTR(dev_attr, bk_dev_attr, max_sge, "%d"); CHK_ATTR(dev_attr, bk_dev_attr, max_cq, "%d"); CHK_ATTR(dev_attr, bk_dev_attr, max_mr, "%d"); CHK_ATTR(dev_attr, bk_dev_attr, max_pd, "%d"); CHK_ATTR(dev_attr, bk_dev_attr, max_qp_rd_atom, "%d"); CHK_ATTR(dev_attr, bk_dev_attr, max_qp_init_rd_atom, "%d"); CHK_ATTR(dev_attr, bk_dev_attr, max_ah, "%d"); CHK_ATTR(dev_attr, bk_dev_attr, max_srq, "%d"); return 0; } static inline void build_mad_hdr(struct ibv_grh *grh, union ibv_gid *sgid, union ibv_gid *my_gid, int paylen) { grh->paylen = htons(paylen); grh->sgid = *sgid; grh->dgid = *my_gid; }
int __ibv_modify_srq_1_0(struct ibv_srq_1_0 *srq, struct ibv_srq_attr *srq_attr, int srq_attr_mask) { fprintf(stderr, "%s:%s:%d \n", __func__, __FILE__, __LINE__); return ibv_modify_srq(srq->real_srq, srq_attr, srq_attr_mask); }