Пример #1
0
struct ibv_srq_1_0 *__ibv_create_srq_1_0(struct ibv_pd_1_0 *pd,
					 struct ibv_srq_init_attr *srq_init_attr)
{
	struct ibv_srq *real_srq;
	struct ibv_srq_1_0 *srq;

	srq = malloc(sizeof *srq);
	if (!srq)
		return NULL;

	real_srq = ibv_create_srq(pd->real_pd, srq_init_attr);
	if (!real_srq) {
		free(srq);
		return NULL;
	}

	srq->context     = pd->context;
	srq->srq_context = srq_init_attr->srq_context;
	srq->pd          = pd;
	srq->real_srq    = real_srq;

	real_srq->srq_context = srq;

	return srq;
}
Пример #2
0
struct ibv_srq_1_0 *__ibv_create_srq_1_0(struct ibv_pd_1_0 *pd,
					 struct ibv_srq_init_attr *srq_init_attr)
{  fprintf(stderr, "%s:%s:%d \n", __func__, __FILE__, __LINE__);
	struct ibv_srq *real_srq;
	struct ibv_srq_1_0 *srq;

	srq = malloc(sizeof *srq);
	if (!srq)
		return NULL;

	real_srq = ibv_create_srq(pd->real_pd, srq_init_attr);
	if (!real_srq) {
		free(srq);
		return NULL;
	}

	srq->context     = pd->context;
	srq->srq_context = srq_init_attr->srq_context;
	srq->pd          = pd;
	srq->real_srq    = real_srq;

	real_srq->srq_context = srq;

	return srq;
}
Пример #3
0
SharedReceiveQueue::SharedReceiveQueue(const ProtectionDomain& domain, uint32_t length) {
    struct ibv_srq_init_attr srq_attr;
    memset(&srq_attr, 0, sizeof(srq_attr));
    srq_attr.attr.max_wr = length;
    srq_attr.attr.max_sge = 1;

    mQueue = ibv_create_srq(domain.get(), &srq_attr);
    if (mQueue == nullptr) {
        throw std::system_error(errno, std::generic_category());
    }
    LOG_TRACE("Created shared receive queue");
}
Пример #4
0
/**
 * Create a srq using process info data.
 */
struct ibv_srq *create_srq(int hca_num)
{
    struct ibv_srq_init_attr srq_init_attr;
    struct ibv_srq *srq_ptr = NULL;

    memset(&srq_init_attr, 0, sizeof(srq_init_attr));

    srq_init_attr.srq_context    = hca_list[hca_num].nic_context;
    srq_init_attr.attr.max_wr    = viadev_srq_alloc_size;
    srq_init_attr.attr.max_sge   = 1;
    /* The limit value should be ignored during SRQ create */
    srq_init_attr.attr.srq_limit = viadev_srq_limit;

    srq_ptr = ibv_create_srq(hca_list[hca_num].ptag, &srq_init_attr);

    if (!srq_ptr) {
        ibv_error_abort(-1, "Error creating SRQ\n");
    }

    return srq_ptr;
}
Пример #5
0
int fi_ibv_srq_context(struct fid_domain *domain, struct fi_rx_attr *attr,
		struct fid_ep **rx_ep, void *context)
{
	struct ibv_srq_init_attr srq_init_attr = {};
	struct fi_ibv_domain *dom;
	struct fi_ibv_srq_ep *_rx_ep;

	if (!domain)
		return -FI_EINVAL;

	_rx_ep = calloc(1, sizeof *_rx_ep);
	if (!_rx_ep)
		return -FI_ENOMEM;

	dom = container_of(domain, struct fi_ibv_domain, domain_fid);

	_rx_ep->ep_fid.fid.fclass = FI_CLASS_SRX_CTX;
	_rx_ep->ep_fid.fid.context = context;
	_rx_ep->ep_fid.fid.ops = &fi_ibv_srq_ep_ops;
	_rx_ep->ep_fid.ops = &fi_ibv_srq_ep_base_ops;
	_rx_ep->ep_fid.msg = &fi_ibv_srq_msg_ops;
	_rx_ep->ep_fid.cm = &fi_ibv_srq_cm_ops;
	_rx_ep->ep_fid.rma = &fi_ibv_srq_rma_ops;
	_rx_ep->ep_fid.atomic = &fi_ibv_srq_atomic_ops;

	srq_init_attr.attr.max_wr = attr->size;
	srq_init_attr.attr.max_sge = attr->iov_limit;

	_rx_ep->srq = ibv_create_srq(dom->pd, &srq_init_attr);
	if (!_rx_ep->srq) {
		free(_rx_ep);
		return -errno;
	}

	*rx_ep = &_rx_ep->ep_fid;

	return 0;
}
Пример #6
0
/***************************************************************************//**
 * Description 
 * Init rdma global resources
 *
 ******************************************************************************/
static struct thread_context*
init_rdma_thread_resources() {

    struct thread_context *ctx = calloc(1, sizeof(struct thread_context));

    ctx->qp_hash = hashtable_create(1024);

    int num_device;
    if ( !(ctx->device_ctx_list = rdma_get_devices(&num_device)) ) {
        perror("rdma_get_devices()");
        return NULL;
    }
    ctx->device_ctx = *ctx->device_ctx_list;
    if (verbose) {
        printf("Get device: %d\n", num_device); 
    }

    if ( !(ctx->pd = ibv_alloc_pd(ctx->device_ctx)) ) {
        perror("ibv_alloc_pd()");
        return NULL;
    }

    if ( !(ctx->comp_channel = ibv_create_comp_channel(ctx->device_ctx)) ) {
        perror("ibv_create_comp_channel()");
        return NULL;
    }

    struct ibv_srq_init_attr srq_init_attr;
    srq_init_attr.srq_context = NULL;
    srq_init_attr.attr.max_sge = 16;
    srq_init_attr.attr.max_wr = srq_size;
    srq_init_attr.attr.srq_limit = srq_size; /* RDMA TODO: what is srq_limit? */

    if ( !(ctx->srq = ibv_create_srq(ctx->pd, &srq_init_attr)) ) {
        perror("ibv_create_srq()");
        return NULL;
    }

    if ( !(ctx->send_cq = ibv_create_cq(ctx->device_ctx, 
                    cq_size, NULL, ctx->comp_channel, 0)) ) {
        perror("ibv_create_cq()");
        return NULL;
    }

    if (0 != ibv_req_notify_cq(ctx->send_cq, 0)) {
        perror("ibv_reg_notify_cq()");
        return NULL;
    }

    if ( !(ctx->recv_cq = ibv_create_cq(ctx->device_ctx, 
                    cq_size, NULL, ctx->comp_channel, 0)) ) {
        perror("ibv_create_cq()");
        return NULL;
    }

    if (0 != ibv_req_notify_cq(ctx->recv_cq, 0)) {
        perror("ibv_reg_notify_cq()");
        return NULL;
    }

    ctx->rsize = BUFF_SIZE;
    ctx->rbuf_list = calloc(buff_per_thread, sizeof(char *));
    ctx->rmr_list = calloc(buff_per_thread, sizeof(struct ibv_mr*));
    ctx->poll_wc = calloc(poll_wc_size, sizeof(struct ibv_wc));

    int i = 0;
    for (i = 0; i < buff_per_thread; ++i) {
        ctx->rbuf_list[i] = malloc(ctx->rsize);
        if (ctx->rbuf_list[i] == 0) {
            break;
        }
    }
    if (i != buff_per_thread) {
        int j = 0;
        for (j = 0; j < i; ++j) {
            free(ctx->rbuf_list[j]);
        }
        free(ctx->rbuf_list);
        ctx->rbuf_list = 0;
    }
    if (!ctx->rmr_list || !ctx->rbuf_list) {
        fprintf(stderr, "out of ctxmory in init_rdma_thread_resources()\n");
        return NULL;
    }

    struct ibv_recv_wr *bad = NULL;
    struct ibv_sge sge;
    struct ibv_recv_wr rwr;
    for (i = 0; i < buff_per_thread; ++i) {
        ctx->rmr_list[i] = ibv_reg_mr(ctx->pd, ctx->rbuf_list[i], ctx->rsize, IBV_ACCESS_LOCAL_WRITE);

        sge.addr = (uintptr_t)ctx->rbuf_list[i];
        sge.length = ctx->rsize;
        sge.lkey = ctx->rmr_list[i]->lkey;

        rwr.wr_id = (uintptr_t)ctx->rmr_list[i];
        rwr.next = NULL;
        rwr.sg_list = &sge;
        rwr.num_sge = 1;

        if (0 != ibv_post_srq_recv(ctx->srq, &rwr, &bad)) {
            perror("ibv_post_srq_recv()");
            return NULL;
        }
    }

    return ctx;
}
Пример #7
0
/* 
 * create both the high and low priority completion queues 
 * and the shared receive queue (if requested)
 */ 
int mca_btl_openib_create_cq_srq(mca_btl_openib_module_t *openib_btl)
{
    /* Allocate Protection Domain */ 
    openib_btl->poll_cq = false; 
    
    if (mca_btl_openib_component.use_srq) { 
        
        struct ibv_srq_init_attr attr; 
        attr.attr.max_wr = mca_btl_openib_component.srq_rd_max;
        attr.attr.max_sge = mca_btl_openib_component.ib_sg_list_size;

        openib_btl->srd_posted[BTL_OPENIB_HP_QP] = 0; 
        openib_btl->srd_posted[BTL_OPENIB_LP_QP] = 0; 
        
        openib_btl->srq[BTL_OPENIB_HP_QP] =
            ibv_create_srq(openib_btl->hca->ib_pd, &attr); 
        if (NULL == openib_btl->srq[BTL_OPENIB_HP_QP]) { 
            show_init_error(__FILE__, __LINE__, "ibv_create_srq",
                            ibv_get_device_name(openib_btl->hca->ib_dev));
            return OMPI_ERROR; 
        }
        
        openib_btl->srq[BTL_OPENIB_LP_QP] =
            ibv_create_srq(openib_btl->hca->ib_pd, &attr); 
        if (NULL == openib_btl->srq[BTL_OPENIB_LP_QP]) { 
            show_init_error(__FILE__, __LINE__, "ibv_create_srq",
                            ibv_get_device_name(openib_btl->hca->ib_dev));
            return OMPI_ERROR; 
        }
        
        
    } else { 
        openib_btl->srq[BTL_OPENIB_HP_QP] = NULL; 
        openib_btl->srq[BTL_OPENIB_LP_QP] = NULL;
    } 
    
    /* Create the low and high priority queue pairs */ 
#if OMPI_MCA_BTL_OPENIB_IBV_CREATE_CQ_ARGS == 3
    openib_btl->ib_cq[BTL_OPENIB_LP_QP] =
        ibv_create_cq(openib_btl->hca->ib_dev_context,
                mca_btl_openib_component.ib_cq_size, NULL); 
#else
    openib_btl->ib_cq[BTL_OPENIB_LP_QP] =
        ibv_create_cq(openib_btl->hca->ib_dev_context,
                mca_btl_openib_component.ib_cq_size, NULL, NULL, 0); 
#endif
    
    if (NULL == openib_btl->ib_cq[BTL_OPENIB_LP_QP]) {
        show_init_error(__FILE__, __LINE__, "ibv_create_cq",
                        ibv_get_device_name(openib_btl->hca->ib_dev));
        return OMPI_ERROR;
    }

#if OMPI_MCA_BTL_OPENIB_IBV_CREATE_CQ_ARGS == 3
    openib_btl->ib_cq[BTL_OPENIB_HP_QP] =
        ibv_create_cq(openib_btl->hca->ib_dev_context,
                mca_btl_openib_component.ib_cq_size, NULL); 
#else
    openib_btl->ib_cq[BTL_OPENIB_HP_QP] =
        ibv_create_cq(openib_btl->hca->ib_dev_context,
                mca_btl_openib_component.ib_cq_size, NULL, NULL, 0); 
#endif    

    if(NULL == openib_btl->ib_cq[BTL_OPENIB_HP_QP]) {
        show_init_error(__FILE__, __LINE__, "ibv_create_cq",
                        ibv_get_device_name(openib_btl->hca->ib_dev));
        return OMPI_ERROR;
    }
    
    return OMPI_SUCCESS;
}
Пример #8
0
int rdma_backend_qp_state_rtr(RdmaBackendDev *backend_dev, RdmaBackendQP *qp,
                              uint8_t qp_type, uint8_t sgid_idx,
                              union ibv_gid *dgid, uint32_t dqpn,
                              uint32_t rq_psn, uint32_t qkey, bool use_qkey)
{
    struct ibv_qp_attr attr = {};
    union ibv_gid ibv_gid = {
        .global.interface_id = dgid->global.interface_id,
        .global.subnet_prefix = dgid->global.subnet_prefix
    };
    int rc, attr_mask;

    attr.qp_state = IBV_QPS_RTR;
    attr_mask = IBV_QP_STATE;

    qp->sgid_idx = sgid_idx;

    switch (qp_type) {
    case IBV_QPT_RC:
        attr.path_mtu               = IBV_MTU_1024;
        attr.dest_qp_num            = dqpn;
        attr.max_dest_rd_atomic     = 1;
        attr.min_rnr_timer          = 12;
        attr.ah_attr.port_num       = backend_dev->port_num;
        attr.ah_attr.is_global      = 1;
        attr.ah_attr.grh.hop_limit  = 1;
        attr.ah_attr.grh.dgid       = ibv_gid;
        attr.ah_attr.grh.sgid_index = qp->sgid_idx;
        attr.rq_psn                 = rq_psn;

        attr_mask |= IBV_QP_AV | IBV_QP_PATH_MTU | IBV_QP_DEST_QPN |
                     IBV_QP_RQ_PSN | IBV_QP_MAX_DEST_RD_ATOMIC |
                     IBV_QP_MIN_RNR_TIMER;

        trace_rdma_backend_rc_qp_state_rtr(qp->ibqp->qp_num,
                                           be64_to_cpu(ibv_gid.global.
                                                       subnet_prefix),
                                           be64_to_cpu(ibv_gid.global.
                                                       interface_id),
                                           qp->sgid_idx, dqpn, rq_psn);
        break;

    case IBV_QPT_UD:
        if (use_qkey) {
            attr.qkey = qkey;
            attr_mask |= IBV_QP_QKEY;
        }
        trace_rdma_backend_ud_qp_state_rtr(qp->ibqp->qp_num, use_qkey ? qkey :
                                           0);
        break;
    }

    rc = ibv_modify_qp(qp->ibqp, &attr, attr_mask);
    if (rc) {
        rdma_error_report("ibv_modify_qp fail, rc=%d, errno=%d", rc, errno);
        return -EIO;
    }

    return 0;
}

int rdma_backend_qp_state_rts(RdmaBackendQP *qp, uint8_t qp_type,
                              uint32_t sq_psn, uint32_t qkey, bool use_qkey)
{
    struct ibv_qp_attr attr = {};
    int rc, attr_mask;

    attr.qp_state = IBV_QPS_RTS;
    attr.sq_psn = sq_psn;
    attr_mask = IBV_QP_STATE | IBV_QP_SQ_PSN;

    switch (qp_type) {
    case IBV_QPT_RC:
        attr.timeout       = 14;
        attr.retry_cnt     = 7;
        attr.rnr_retry     = 7;
        attr.max_rd_atomic = 1;

        attr_mask |= IBV_QP_TIMEOUT | IBV_QP_RETRY_CNT | IBV_QP_RNR_RETRY |
                     IBV_QP_MAX_QP_RD_ATOMIC;
        trace_rdma_backend_rc_qp_state_rts(qp->ibqp->qp_num, sq_psn);
        break;

    case IBV_QPT_UD:
        if (use_qkey) {
            attr.qkey = qkey;
            attr_mask |= IBV_QP_QKEY;
        }
        trace_rdma_backend_ud_qp_state_rts(qp->ibqp->qp_num, sq_psn,
                                           use_qkey ? qkey : 0);
        break;
    }

    rc = ibv_modify_qp(qp->ibqp, &attr, attr_mask);
    if (rc) {
        rdma_error_report("ibv_modify_qp fail, rc=%d, errno=%d", rc, errno);
        return -EIO;
    }

    return 0;
}

int rdma_backend_query_qp(RdmaBackendQP *qp, struct ibv_qp_attr *attr,
                          int attr_mask, struct ibv_qp_init_attr *init_attr)
{
    if (!qp->ibqp) {
        attr->qp_state = IBV_QPS_RTS;
        return 0;
    }

    return ibv_query_qp(qp->ibqp, attr, attr_mask, init_attr);
}

void rdma_backend_destroy_qp(RdmaBackendQP *qp, RdmaDeviceResources *dev_res)
{
    if (qp->ibqp) {
        ibv_destroy_qp(qp->ibqp);
    }
    g_slist_foreach(qp->cqe_ctx_list.list, free_cqe_ctx, dev_res);
    rdma_protected_gslist_destroy(&qp->cqe_ctx_list);
}

int rdma_backend_create_srq(RdmaBackendSRQ *srq, RdmaBackendPD *pd,
                            uint32_t max_wr, uint32_t max_sge,
                            uint32_t srq_limit)
{
    struct ibv_srq_init_attr srq_init_attr = {};

    srq_init_attr.attr.max_wr = max_wr;
    srq_init_attr.attr.max_sge = max_sge;
    srq_init_attr.attr.srq_limit = srq_limit;

    srq->ibsrq = ibv_create_srq(pd->ibpd, &srq_init_attr);
    if (!srq->ibsrq) {
        rdma_error_report("ibv_create_srq failed, errno=%d", errno);
        return -EIO;
    }

    rdma_protected_gslist_init(&srq->cqe_ctx_list);

    return 0;
}

int rdma_backend_query_srq(RdmaBackendSRQ *srq, struct ibv_srq_attr *srq_attr)
{
    if (!srq->ibsrq) {
        return -EINVAL;
    }

    return ibv_query_srq(srq->ibsrq, srq_attr);
}

int rdma_backend_modify_srq(RdmaBackendSRQ *srq, struct ibv_srq_attr *srq_attr,
                int srq_attr_mask)
{
    if (!srq->ibsrq) {
        return -EINVAL;
    }

    return ibv_modify_srq(srq->ibsrq, srq_attr, srq_attr_mask);
}

void rdma_backend_destroy_srq(RdmaBackendSRQ *srq, RdmaDeviceResources *dev_res)
{
    if (srq->ibsrq) {
        ibv_destroy_srq(srq->ibsrq);
    }
    g_slist_foreach(srq->cqe_ctx_list.list, free_cqe_ctx, dev_res);
    rdma_protected_gslist_destroy(&srq->cqe_ctx_list);
}

#define CHK_ATTR(req, dev, member, fmt) ({ \
    trace_rdma_check_dev_attr(#member, dev.member, req->member); \
    if (req->member > dev.member) { \
        rdma_warn_report("%s = "fmt" is higher than host device capability "fmt, \
                         #member, req->member, dev.member); \
        req->member = dev.member; \
    } \
})

static int init_device_caps(RdmaBackendDev *backend_dev,
                            struct ibv_device_attr *dev_attr)
{
    struct ibv_device_attr bk_dev_attr;
    int rc;

    rc = ibv_query_device(backend_dev->context, &bk_dev_attr);
    if (rc) {
        rdma_error_report("ibv_query_device fail, rc=%d, errno=%d", rc, errno);
        return -EIO;
    }

    dev_attr->max_sge = MAX_SGE;
    dev_attr->max_srq_sge = MAX_SGE;

    CHK_ATTR(dev_attr, bk_dev_attr, max_mr_size, "%" PRId64);
    CHK_ATTR(dev_attr, bk_dev_attr, max_qp, "%d");
    CHK_ATTR(dev_attr, bk_dev_attr, max_sge, "%d");
    CHK_ATTR(dev_attr, bk_dev_attr, max_cq, "%d");
    CHK_ATTR(dev_attr, bk_dev_attr, max_mr, "%d");
    CHK_ATTR(dev_attr, bk_dev_attr, max_pd, "%d");
    CHK_ATTR(dev_attr, bk_dev_attr, max_qp_rd_atom, "%d");
    CHK_ATTR(dev_attr, bk_dev_attr, max_qp_init_rd_atom, "%d");
    CHK_ATTR(dev_attr, bk_dev_attr, max_ah, "%d");
    CHK_ATTR(dev_attr, bk_dev_attr, max_srq, "%d");

    return 0;
}

static inline void build_mad_hdr(struct ibv_grh *grh, union ibv_gid *sgid,
                                 union ibv_gid *my_gid, int paylen)
{
    grh->paylen = htons(paylen);
    grh->sgid = *sgid;
    grh->dgid = *my_gid;
}