static void build_verbs(IbvConnection *conn, struct ibv_context *verbs) { conn->ibvctx = verbs; TEST_Z(conn->pd = ibv_alloc_pd(conn->ibvctx)); TEST_Z(conn->comp_channel = ibv_create_comp_channel(conn->ibvctx)); TEST_Z(conn->cq = ibv_create_cq(conn->ibvctx, 10, NULL, conn->comp_channel, 0)); /* cqe=10 is arbitrary */ TEST_NZ(ibv_req_notify_cq(conn->cq, 0)); TEST_NZ(pthread_create(&conn->cq_poller_thread, NULL, poll_cq, conn)); }
static CqPtr make_cq(CtxPtr ctx, CcPtr cc=CcPtr(nullptr)) { auto ptr = ibv_create_cq(ctx.get(), 1, nullptr, cc.get(), 0); if(!ptr) { throw std::runtime_error("cannot create cq"); } return CqPtr(ptr, ibv_destroy_cq); }
RDMAChannel::RDMAChannel(const RDMAAdapter& adapter) : adapter_(adapter), buffers_(), memory_regions_(MAX_BUFFERS), region_regions_(MAX_BUFFERS), memory_regions_received_() { // Create write completion queue write_cq_ = ibv_create_cq(adapter_.context_, 1, NULL, NULL, 0); CHECK(write_cq_) << "Failed to create completion queue"; // Create queue pair { struct ibv_qp_init_attr attr; caffe_memset(sizeof(ibv_qp_init_attr), 0, &attr); attr.send_cq = write_cq_; attr.recv_cq = adapter.cq_; attr.cap.max_send_wr = RDMAAdapter::MAX_CONCURRENT_WRITES; attr.cap.max_recv_wr = RDMAAdapter::MAX_CONCURRENT_WRITES; attr.cap.max_send_sge = 1; attr.cap.max_recv_sge = 1; attr.qp_type = IBV_QPT_RC; qp_ = ibv_create_qp(adapter.pd_, &attr); CHECK(qp_) << "Failed to create queue pair"; } // Init queue pair { struct ibv_qp_attr attr; caffe_memset(sizeof(ibv_qp_attr), 0, &attr); attr.qp_state = IBV_QPS_INIT; attr.pkey_index = 0; attr.port_num = 1; attr.qp_access_flags = IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE; int mask = IBV_QP_STATE | IBV_QP_PKEY_INDEX | IBV_QP_PORT | IBV_QP_ACCESS_FLAGS; CHECK(!ibv_modify_qp(qp_, &attr, mask)) << "Failed to set QP to INIT"; } // Local address { struct ibv_port_attr attr; CHECK(!ibv_query_port(adapter.context_, (uint8_t) 1, &attr)) << "Query port"; self_.lid = attr.lid; self_.qpn = qp_->qp_num; self_.psn = caffe_rng_rand() & 0xffffff; } for (int i = 0; i < MAX_BUFFERS; ++i) { RecvMR(i); } // Create initial recv request for data. recv(); // Create initial recv request for ctrl signals. recv(); }
CompletionQueue::CompletionQueue(ibv_context* context, const CompletionChannel& channel, int length) : mQueue(ibv_create_cq(context, length, nullptr, channel.get(), 0)) { if (mQueue == nullptr) { throw std::system_error(errno, std::generic_category()); } LOG_TRACE("Created completion queue"); }
static int init_node(struct cmatest_node *node) { struct ibv_qp_init_attr init_qp_attr; int cqe, ret; int i; struct ibv_cq **cqs[] = {&node->cq[SEND_CQ_INDEX], &node->cq[RECV_CQ_INDEX]}; node->pd = ibv_alloc_pd(node->cma_id->verbs); if (!node->pd) { ret = -ENOMEM; printf("cmatose: unable to allocate PD\n"); goto out; } cqe = message_count ? message_count : 1; for (i = 0; i < sizeof(cqs)/sizeof(cqs[0]); i++) { if (set_ts) { struct ibv_exp_cq_init_attr cq_init_attr; memset(&cq_init_attr, 0, sizeof(cq_init_attr)); cq_init_attr.flags = IBV_EXP_CQ_TIMESTAMP; cq_init_attr.comp_mask = IBV_EXP_CQ_INIT_ATTR_FLAGS; *cqs[i] = (struct ibv_cq *)ibv_exp_create_cq( node->cma_id->verbs, cqe, node, NULL, 0, &cq_init_attr); } else { *cqs[i] = ibv_create_cq(node->cma_id->verbs, cqe, node, 0, 0); } } if (!node->cq[SEND_CQ_INDEX] || !node->cq[RECV_CQ_INDEX]) { ret = -ENOMEM; printf("cmatose: unable to create CQ\n"); goto out; } memset(&init_qp_attr, 0, sizeof init_qp_attr); init_qp_attr.cap.max_send_wr = cqe; init_qp_attr.cap.max_recv_wr = cqe; init_qp_attr.cap.max_send_sge = 1; init_qp_attr.cap.max_recv_sge = 1; init_qp_attr.qp_context = node; init_qp_attr.sq_sig_all = 1; init_qp_attr.qp_type = IBV_QPT_RC; init_qp_attr.send_cq = node->cq[SEND_CQ_INDEX]; init_qp_attr.recv_cq = node->cq[RECV_CQ_INDEX]; ret = rdma_create_qp(node->cma_id, node->pd, &init_qp_attr); if (ret) { perror("cmatose: unable to create QP"); goto out; } ret = create_message(node); if (ret) { printf("cmatose: failed to create messages: %d\n", ret); goto out; } out: return ret; }
struct ibv_cq_1_0 *__ibv_create_cq_1_0(struct ibv_context_1_0 *context, int cqe, void *cq_context, struct ibv_comp_channel *channel, int comp_vector) { struct ibv_cq *real_cq; struct ibv_cq_1_0 *cq; cq = malloc(sizeof *cq); if (!cq) return NULL; real_cq = ibv_create_cq(context->real_context, cqe, cq_context, channel, comp_vector); if (!real_cq) { free(cq); return NULL; } cq->context = context; cq->cq_context = cq_context; cq->cqe = cqe; cq->real_cq = real_cq; real_cq->cq_context = cq; return cq; }
int create_cq(void) { hca.cq = ibv_create_cq(hca.context, opts.num_cqe, NULL, NULL, 0); assert(hca.cq); return 0; }
struct ibv_cq_1_0 *__ibv_create_cq_1_0(struct ibv_context_1_0 *context, int cqe, void *cq_context, struct ibv_comp_channel *channel, int comp_vector) { fprintf(stderr, "%s:%s:%d \n", __func__, __FILE__, __LINE__); struct ibv_cq *real_cq; struct ibv_cq_1_0 *cq; cq = malloc(sizeof *cq); if (!cq) return NULL; real_cq = ibv_create_cq(context->real_context, cqe, cq_context, channel, comp_vector); if (!real_cq) { free(cq); return NULL; } cq->context = context; cq->cq_context = cq_context; cq->cqe = cqe; cq->real_cq = real_cq; real_cq->cq_context = cq; return cq; }
static int fi_ibv_rdm_tagged_find_max_inline_size(struct ibv_pd *pd, struct ibv_context *context) { struct ibv_qp_init_attr qp_attr; struct ibv_qp *qp = NULL; struct ibv_cq *cq = ibv_create_cq(context, 1, NULL, NULL, 0); assert(cq); int max_inline = 2; int rst = 0; memset(&qp_attr, 0, sizeof(qp_attr)); qp_attr.send_cq = cq; qp_attr.recv_cq = cq; qp_attr.qp_type = IBV_QPT_RC; qp_attr.cap.max_send_wr = 1; qp_attr.cap.max_recv_wr = 1; qp_attr.cap.max_send_sge = 1; qp_attr.cap.max_recv_sge = 1; do { if (qp) ibv_destroy_qp(qp); qp_attr.cap.max_inline_data = max_inline; qp = ibv_create_qp(pd, &qp_attr); if (qp) rst = max_inline; } while (qp && (max_inline *= 2)); if (rst != 0) { int pos = rst, neg = max_inline; do { max_inline = pos + (neg - pos) / 2; if (qp) ibv_destroy_qp(qp); qp_attr.cap.max_inline_data = max_inline; qp = ibv_create_qp(pd, &qp_attr); if (qp) pos = max_inline; else neg = max_inline; } while (neg - pos > 2); rst = pos; } if (qp) { ibv_destroy_qp(qp); } if (cq) { ibv_destroy_cq(cq); } return rst; }
static int init_node(struct cmatest_node *node) { struct ibv_qp_init_attr init_qp_attr; int cqe, ret; node->pd = ibv_alloc_pd(node->cma_id->verbs); if (!node->pd) { ret = -ENOMEM; printf("cmatose: unable to allocate PD\n"); goto out; } cqe = message_count ? message_count : 1; node->cq[SEND_CQ_INDEX] = ibv_create_cq(node->cma_id->verbs, cqe, node, NULL, 0); node->cq[RECV_CQ_INDEX] = ibv_create_cq(node->cma_id->verbs, cqe, node, NULL, 0); if (!node->cq[SEND_CQ_INDEX] || !node->cq[RECV_CQ_INDEX]) { ret = -ENOMEM; printf("cmatose: unable to create CQ\n"); goto out; } memset(&init_qp_attr, 0, sizeof init_qp_attr); init_qp_attr.cap.max_send_wr = cqe; init_qp_attr.cap.max_recv_wr = cqe; init_qp_attr.cap.max_send_sge = 1; init_qp_attr.cap.max_recv_sge = 1; init_qp_attr.qp_context = node; init_qp_attr.sq_sig_all = 1; init_qp_attr.qp_type = IBV_QPT_RC; init_qp_attr.send_cq = node->cq[SEND_CQ_INDEX]; init_qp_attr.recv_cq = node->cq[RECV_CQ_INDEX]; ret = rdma_create_qp(node->cma_id, node->pd, &init_qp_attr); if (ret) { perror("cmatose: unable to create QP"); goto out; } ret = create_message(node); if (ret) { printf("cmatose: failed to create messages: %d\n", ret); goto out; } out: return ret; }
cq_mgr::cq_mgr(ring* p_ring, ib_ctx_handler* p_ib_ctx_handler, int cq_size, struct ibv_comp_channel* p_comp_event_channel, bool is_rx) : m_p_ring(p_ring), m_p_ib_ctx_handler(p_ib_ctx_handler), m_b_is_rx(is_rx), m_comp_event_channel(p_comp_event_channel), m_p_next_rx_desc_poll(NULL) { cq_logfunc(""); m_n_wce_counter = 0; m_b_was_drained = false; m_b_notification_armed = false; m_n_out_of_free_bufs_warning = 0; m_n_cq_poll_sn = 0; m_cq_id = atomic_fetch_and_inc(&m_n_cq_id_counter); // cq id is nonzero m_transport_type = m_p_ring->get_transport_type(); m_p_ibv_cq = ibv_create_cq(m_p_ib_ctx_handler->get_ibv_context(), cq_size, (void*)this, m_comp_event_channel, 0); BULLSEYE_EXCLUDE_BLOCK_START if (!m_p_ibv_cq) { cq_logpanic("ibv_create_cq failed (errno=%d %m)", errno); } BULLSEYE_EXCLUDE_BLOCK_END // use local copy of stats by default (on rx cq get shared memory stats) m_p_cq_stat = &m_cq_stat_static; memset(m_p_cq_stat , 0, sizeof(*m_p_cq_stat)); /* m_p_cq_stat->n_rx_sw_queue_len = 0; m_p_cq_stat->n_rx_pkt_drop = 0; m_p_cq_stat->n_rx_drained_at_once_max = 0; m_p_cq_stat->n_buffer_pool_len = 0; m_p_cq_stat->buffer_miss_rate = 0.0; //*/ m_buffer_miss_count = 0; m_buffer_total_count = 0; m_buffer_prev_id = 0; m_sz_transport_header = 0; switch (m_transport_type) { case VMA_TRANSPORT_IB: m_sz_transport_header = GRH_HDR_LEN; break; case VMA_TRANSPORT_ETH: m_sz_transport_header = ETH_HDR_LEN; break; BULLSEYE_EXCLUDE_BLOCK_START default: cq_logpanic("Unknown transport type: %d", m_transport_type); break; BULLSEYE_EXCLUDE_BLOCK_END } if (m_b_is_rx) vma_stats_instance_create_cq_block(m_p_cq_stat); cq_logdbg("Created CQ as %s with fd[%d] and of size %d elements (ibv_cq_hndl=%p)", (m_b_is_rx?"Rx":"Tx"), get_channel_fd(), cq_size, m_p_ibv_cq); }
RDMAAdapter::RDMAAdapter() : context_(open_default_device()), pd_(alloc_protection_domain(context_)) { channel_ = ibv_create_comp_channel(context_); CHECK(channel_) << "Failed to create completion channel"; cq_ = ibv_create_cq(context_, MAX_CONCURRENT_WRITES * 2, NULL, channel_, 0); CHECK(cq_) << "Failed to create completion queue"; CHECK(!ibv_req_notify_cq(cq_, 0)) << "Failed to request CQ notification"; StartInternalThread(); }
int opal_common_verbs_qp_test(struct ibv_context *device_context, int flags) { int rc = OPAL_SUCCESS; struct ibv_pd *pd = NULL; struct ibv_cq *cq = NULL; /* Bozo check */ if (NULL == device_context || (0 == (flags & (OPAL_COMMON_VERBS_FLAGS_RC | OPAL_COMMON_VERBS_FLAGS_UD)))) { return OPAL_ERR_BAD_PARAM; } /* Try to make both the PD and CQ */ pd = ibv_alloc_pd(device_context); if (NULL == pd) { return OPAL_ERR_OUT_OF_RESOURCE; } cq = ibv_create_cq(device_context, 2, NULL, NULL, 0); if (NULL == cq) { rc = OPAL_ERR_OUT_OF_RESOURCE; goto out; } /* Now try to make the QP(s) of the desired type(s) */ if (flags & OPAL_COMMON_VERBS_FLAGS_RC && !make_qp(pd, cq, IBV_QPT_RC)) { rc = OPAL_ERR_NOT_SUPPORTED; goto out; } if (flags & OPAL_COMMON_VERBS_FLAGS_NOT_RC && make_qp(pd, cq, IBV_QPT_RC)) { rc = OPAL_ERR_TYPE_MISMATCH; goto out; } if (flags & OPAL_COMMON_VERBS_FLAGS_UD && !make_qp(pd, cq, IBV_QPT_UD)) { rc = OPAL_ERR_NOT_SUPPORTED; goto out; } out: /* Free the PD and/or CQ */ if (NULL != pd) { ibv_dealloc_pd(pd); } if (NULL != cq) { ibv_destroy_cq(cq); } return rc; }
static int rping_setup_qp(struct rping_cb *cb, struct rdma_cm_id *cm_id) { int ret; cb->pd = ibv_alloc_pd(cm_id->verbs); if (!cb->pd) { fprintf(stderr, "ibv_alloc_pd failed\n"); return errno; } DEBUG_LOG("created pd %p\n", cb->pd); cb->channel = ibv_create_comp_channel(cm_id->verbs); if (!cb->channel) { fprintf(stderr, "ibv_create_comp_channel failed\n"); ret = errno; goto err1; } DEBUG_LOG("created channel %p\n", cb->channel); cb->cq = ibv_create_cq(cm_id->verbs, RPING_SQ_DEPTH * 2, cb, cb->channel, 0); if (!cb->cq) { fprintf(stderr, "ibv_create_cq failed\n"); ret = errno; goto err2; } DEBUG_LOG("created cq %p\n", cb->cq); ret = ibv_req_notify_cq(cb->cq, 0); if (ret) { fprintf(stderr, "ibv_create_cq failed\n"); ret = errno; goto err3; } ret = rping_create_qp(cb); if (ret) { perror("rdma_create_qp"); goto err3; } DEBUG_LOG("created qp %p\n", cb->qp); return 0; err3: ibv_destroy_cq(cb->cq); err2: ibv_destroy_comp_channel(cb->channel); err1: ibv_dealloc_pd(cb->pd); return ret; }
static inline int fi_ibv_get_qp_cap(struct ibv_context *ctx, struct fi_info *info) { struct ibv_pd *pd; struct ibv_cq *cq; struct ibv_qp *qp; struct ibv_qp_init_attr init_attr; int ret = 0; pd = ibv_alloc_pd(ctx); if (!pd) { VERBS_INFO_ERRNO(FI_LOG_FABRIC, "ibv_alloc_pd", errno); return -errno; } cq = ibv_create_cq(ctx, 1, NULL, NULL, 0); if (!cq) { VERBS_INFO_ERRNO(FI_LOG_FABRIC, "ibv_create_cq", errno); ret = -errno; goto err1; } memset(&init_attr, 0, sizeof init_attr); init_attr.send_cq = cq; init_attr.recv_cq = cq; init_attr.cap.max_send_wr = verbs_default_tx_size; init_attr.cap.max_recv_wr = verbs_default_rx_size; init_attr.cap.max_send_sge = verbs_default_tx_iov_limit; init_attr.cap.max_recv_sge = verbs_default_rx_iov_limit; init_attr.cap.max_inline_data = verbs_default_inline_size; init_attr.qp_type = IBV_QPT_RC; qp = ibv_create_qp(pd, &init_attr); if (!qp) { VERBS_INFO_ERRNO(FI_LOG_FABRIC, "ibv_create_qp", errno); ret = -errno; goto err2; } info->tx_attr->inject_size = init_attr.cap.max_inline_data; ibv_destroy_qp(qp); err2: ibv_destroy_cq(cq); err1: ibv_dealloc_pd(pd); return ret; }
static struct ibv_cq *psofed_open_cq(struct ibv_context *ctx, int cqe_num) { /* create completion queue - used for both send and receive queues */ struct ibv_cq *cq; errno = 0; cq = ibv_create_cq(ctx, cqe_num, NULL, NULL, 0); if (!cq) { psofed_err_errno("ibv_create_cq() failed", errno); } return cq; }
static ucs_status_t uct_ib_mlx5_check_dc(uct_ib_device_t *dev) { ucs_status_t status = UCS_OK; struct ibv_context *ctx = dev->ibv_context; struct ibv_qp_init_attr_ex qp_attr = {}; struct mlx5dv_qp_init_attr dv_attr = {}; struct ibv_pd *pd; struct ibv_cq *cq; struct ibv_qp *qp; pd = ibv_alloc_pd(ctx); if (pd == NULL) { ucs_error("ibv_alloc_pd() failed: %m"); return UCS_ERR_IO_ERROR; } cq = ibv_create_cq(ctx, 1, NULL, NULL, 0); if (cq == NULL) { ucs_error("ibv_create_cq() failed: %m"); status = UCS_ERR_IO_ERROR; goto err_cq; } qp_attr.send_cq = cq; qp_attr.recv_cq = cq; qp_attr.cap.max_send_wr = 1; qp_attr.cap.max_send_sge = 1; qp_attr.qp_type = IBV_QPT_DRIVER; qp_attr.comp_mask = IBV_QP_INIT_ATTR_PD; qp_attr.pd = pd; dv_attr.comp_mask = MLX5DV_QP_INIT_ATTR_MASK_DC; dv_attr.dc_init_attr.dc_type = MLX5DV_DCTYPE_DCI; /* create DCI qp successful means DC is supported */ qp = mlx5dv_create_qp(ctx, &qp_attr, &dv_attr); if (qp) { ibv_destroy_qp(qp); dev->flags |= UCT_IB_DEVICE_FLAG_DC; } ibv_destroy_cq(cq); err_cq: ibv_dealloc_pd(pd); return status; }
/// Initialize the InfiniBand verbs context. void init_context(struct ibv_context* context) { context_ = context; L_(debug) << "create verbs objects"; pd_ = ibv_alloc_pd(context); if (!pd_) throw InfinibandException("ibv_alloc_pd failed"); cq_ = ibv_create_cq(context, num_cqe_, nullptr, nullptr, 0); if (!cq_) throw InfinibandException("ibv_create_cq failed"); if (ibv_req_notify_cq(cq_, 0)) throw InfinibandException("ibv_req_notify_cq failed"); }
void build_context(struct ibv_context *verbs) { if (s_ctx) { if (s_ctx->ctx != verbs) { die("cannot handle events in more than one context."); } return; } s_ctx = (rdma_ctx_t *)malloc(sizeof(rdma_ctx_t)); s_ctx->ctx = verbs; TEST_Z(s_ctx->pd = ibv_alloc_pd(s_ctx->ctx)); TEST_Z(s_ctx->comp_channel = ibv_create_comp_channel(s_ctx->ctx)); TEST_Z(s_ctx->cq = ibv_create_cq(s_ctx->ctx, 10, NULL, s_ctx->comp_channel, 0)); /* cqe=10 is arbitrary */ TEST_NZ(ibv_req_notify_cq(s_ctx->cq, 0)); }
static int init_node(struct cmatest_node *node) { struct ibv_qp_init_attr init_qp_attr; int cqe, ret; node->pd = ibv_alloc_pd(node->cma_id->verbs); if (!node->pd) { ret = -ENOMEM; printf("rxe_send_mc: unable to allocate PD\n"); goto out; } cqe = message_buffer ? message_buffer * 2 : 2; node->cq = ibv_create_cq(node->cma_id->verbs, cqe, node, 0, 0); if (!node->cq) { ret = -ENOMEM; printf("rxe_send_mc: unable to create CQ\n"); goto out; } memset(&init_qp_attr, 0, sizeof init_qp_attr); init_qp_attr.cap.max_send_wr = message_buffer ? message_buffer : 1; init_qp_attr.cap.max_recv_wr = message_buffer ? message_buffer : 1; init_qp_attr.cap.max_send_sge = 1; init_qp_attr.cap.max_recv_sge = 1; init_qp_attr.qp_context = node; init_qp_attr.sq_sig_all = 1; //singal all init_qp_attr.qp_type = IBV_QPT_UD; init_qp_attr.send_cq = node->cq; init_qp_attr.recv_cq = node->cq; ret = rdma_create_qp(node->cma_id, node->pd, &init_qp_attr); if (ret) { perror("rxe_send_mc: unable to create QP"); goto out; } ret = create_message(node); if (ret) { printf("rxe_send_mc: failed to create messages: %d\n", ret); goto out; } out: return ret; }
static void build_context(struct ibv_context *verbs) { if (s_ctx) { if (s_ctx->ctx != verbs) die("cannot handle events in more than one context."); return; } s_ctx = (struct context *)malloc(sizeof(struct context)); s_ctx->ctx = verbs; TEST_Z(s_ctx->pd = ibv_alloc_pd(s_ctx->ctx)); TEST_Z(s_ctx->comp_channel = ibv_create_comp_channel(s_ctx->ctx)); TEST_Z(s_ctx->cq = ibv_create_cq(s_ctx->ctx, 10, NULL, s_ctx->comp_channel, 0)); /* cqe=10 is arbitrary */ TEST_NZ(ibv_req_notify_cq(s_ctx->cq, 0)); // TEST_NZ(pthread_create(&s_ctx->cq_poller_thread, NULL, poll_cq, NULL)); }
void Connector::build_context(struct ibv_context* verb_) { if (s_ctx_ && s_ctx_->ctx_ != verb_) { log_(ERROR, "cannot handle events in more than one context.") exit(EXIT_FAILURE); } s_ctx_ = (struct context*)malloc(sizeof(struct context) ); s_ctx_->ctx_ = verb_; TEST_Z(s_ctx_->pd_ = ibv_alloc_pd(s_ctx_->ctx_) ); TEST_Z(s_ctx_->comp_channel_ = ibv_create_comp_channel(s_ctx_->ctx_) ); TEST_Z(s_ctx_->cq_ = ibv_create_cq(s_ctx_->ctx_, MAX_QP__CQ_LENGTH, NULL, s_ctx_->comp_channel_, 0) ); TEST_NZ(ibv_req_notify_cq(s_ctx_->cq_, 0) ) // TODO // TEST_NZ(pthread_create(pthread_v.back(), NULL, &Connector::bst_poll_cq, (void*)(this) ) ) pthread_v.push_back(new pthread_t() ); wrap_Connector* wrap_ = new wrap_Connector(this, s_ctx_); TEST_NZ(pthread_create(pthread_v.back(), NULL, call_poll_cq_w_wrap, wrap_) ) }
int rdma_backend_create_cq(RdmaBackendDev *backend_dev, RdmaBackendCQ *cq, int cqe) { int rc; cq->ibcq = ibv_create_cq(backend_dev->context, cqe + 1, NULL, backend_dev->channel, 0); if (!cq->ibcq) { rdma_error_report("ibv_create_cq fail, errno=%d", errno); return -EIO; } rc = ibv_req_notify_cq(cq->ibcq, 0); if (rc) { rdma_warn_report("ibv_req_notify_cq fail, rc=%d, errno=%d", rc, errno); } cq->backend_dev = backend_dev; return 0; }
int MV_Setup_QPs() { int i = 0; int port = 0; D_PRINT("Num HCAs: %d\n", mvdev.num_hcas); mvdev.cq = (struct ibv_cq **) malloc(sizeof(struct ibv_cq *) * mvdev.num_hcas); mvdev.ud_qp = (mv_qp *) malloc(sizeof(mv_qp) * mvdev.num_hcas * mvparams.num_qps); mvdev.num_cqs = mvdev.num_hcas; mvdev.num_ud_qps = mvdev.num_hcas * mvparams.num_qps; /* create one data cq for each HCA */ for(i = 0; i < mvdev.num_hcas; i++) { mvdev.cq[i] = ibv_create_cq(mvdev.hca[i].context, mvparams.cq_size, NULL, NULL, 0); if (!mvdev.cq[i]) { error_abort_all(IBV_RETURN_ERR, "Couldn't create Data CQ"); return 0; } } for(port = 0; port < mvparams.num_qps; port++) { for(i = 0; i < mvdev.num_hcas; i++) { int index = (port * mvdev.num_hcas) + i; D_PRINT("index is %d\n", index); /* Setup the UD QP for normal data transfer */ mv_qp_setup_information si; si.send_cq = si.recv_cq = mvdev.cq[i]; si.sq_psn = mvparams.psn; si.pd = mvdev.hca[i].pd; si.cap.max_send_wr = mvparams.ud_sq_size; si.cap.max_recv_wr = mvparams.ud_rq_size; si.cap.max_send_sge = 1; si.cap.max_recv_sge = 1; if(mvparams.ud_max_inline != -1) { si.cap.max_inline_data = mvparams.ud_max_inline; } else { si.cap.max_inline_data = 0; } mvdev.ud_qp[index].qp = MV_Setup_UD_QP(&si); if(!mvdev.ud_qp[index].qp) { error_abort_all(IBV_RETURN_ERR, "Couldn't create data QP"); } mvdev.ud_qp[index].send_wqes_avail = mvparams.ud_sq_size - 50; mvdev.ud_qp[index].send_wqes_total = mvparams.ud_sq_size - 50; mvdev.ud_qp[index].ext_sendq_head = mvdev.ud_qp[index].ext_sendq_tail = NULL; mvdev.ud_qp[index].hca = &(mvdev.hca[i]); mvdev.ud_qp[index].ext_sendq_size = 0; mvdev.ud_qp[index].unsignaled_count = 0; mvdev.ud_qp[index].type = MVDEV_CH_UD_RQ; { struct ibv_qp_attr attr; struct ibv_qp_init_attr init_attr; ibv_query_qp(mvdev.ud_qp[index].qp, &attr, 0, &init_attr); mvdev.ud_qp[index].max_inline = init_attr.cap.max_inline_data; } /* get a receive pool setup for this qp */ mvdev.ud_qp[index].rpool = MV_Create_RPool(mvparams.recvq_size, 100, mvparams.mtu, NULL, &(mvdev.ud_qp[index])); D_PRINT("Finished setting up UD QP %d, num: %u\n", i, mvdev.ud_qp[i].qp->qp_num); } } return 1; }
int MV_Setup_Rndv_QPs() { int i; mvdev.rndv_pool_qps = (mv_qp_pool_entry *) malloc(sizeof(mv_qp_pool_entry) * mvparams.rndv_qps); mvdev.grh_buf = (char *) malloc(sizeof(char) * 40); mvdev.rndv_cq = (struct ibv_cq **) malloc(sizeof(struct ibv_cq *) * mvdev.num_hcas); mvdev.rndv_qp = (mv_qp *) malloc(sizeof(mv_qp) * mvdev.num_hcas); /* setup the pool of QPs */ for(i = 0; i < mvparams.rndv_qps; i++) { int hca = i % mvdev.num_hcas; mvdev.rndv_pool_qps[i].ud_cq = ibv_create_cq(mvdev.hca[hca].context, 8192 * 2, NULL, NULL, 0); if(!mvdev.rndv_pool_qps[i].ud_cq) { error_abort_all(IBV_RETURN_ERR, "Couldn't create RNDV CQ %d", i); return 0; } mvdev.rndv_si.recv_cq = mvdev.rndv_si.send_cq = mvdev.rndv_pool_qps[i].ud_cq; mvdev.rndv_si.sq_psn = mvparams.psn; mvdev.rndv_si.pd = mvdev.hca[hca].pd; mvdev.rndv_si.cap.max_send_wr = 1; mvdev.rndv_si.cap.max_recv_wr = 4096; mvdev.rndv_si.cap.max_send_sge = 1; mvdev.rndv_si.cap.max_recv_sge = 2; mvdev.rndv_si.cap.max_inline_data = 0; mvdev.rndv_pool_qps[i].ud_qp = MV_Setup_UD_QP(&mvdev.rndv_si); if(!mvdev.rndv_pool_qps[i].ud_qp) { error_abort_all(IBV_RETURN_ERR, "Couldn't create RNDV UD QP %d", i); } mvdev.rndv_pool_qps[i].associated_qpn = -1; mvdev.rndv_pool_qps[i].associated_rank = -1; mvdev.rndv_pool_qps[i].seqnum = 0; mvdev.rndv_pool_qps[i].ptr.next = NULL; mvdev.rndv_pool_qps[i].ptr.prev = NULL; mvdev.rndv_pool_qps[i].hca = &(mvdev.hca[hca]); } for(i = 0; i < mvparams.rndv_qps - 1; i++) { mvdev.rndv_pool_qps[i].ptr.next = &(mvdev.rndv_pool_qps[i + 1]); } mvdev.rndv_pool_qps_free_head = &(mvdev.rndv_pool_qps[0]); /* setup the cqs for completions of send ops */ for(i = 0; i < mvdev.num_hcas; i++) { mvdev.rndv_cq[i] = ibv_create_cq(mvdev.hca[i].context, 16384, NULL, NULL, 0); if (!mvdev.rndv_cq[i]) { error_abort_all(IBV_RETURN_ERR, "Couldn't create RNDV CQ"); return 0; } /* register the GRH buffer for each HCA */ mvdev.grh_mr[i] = register_memory(i, mvdev.grh_buf, 40); } /* setup the qps we send zcopy messages on */ for(i = 0; i < mvdev.num_hcas; i++) { mv_qp_setup_information si; si.send_cq = si.recv_cq = mvdev.rndv_cq[i]; si.sq_psn = mvparams.psn; si.pd = mvdev.hca[i].pd; si.cap.max_send_wr = 15000; si.cap.max_recv_wr = 1; si.cap.max_send_sge = 1; si.cap.max_recv_sge = 1; si.cap.max_inline_data = 0; mvdev.rndv_qp[i].qp = MV_Setup_UD_QP(&si); mvdev.rndv_qp[i].send_wqes_avail = 15000; mvdev.rndv_qp[i].send_wqes_total = 15000; mvdev.rndv_qp[i].ext_sendq_head = mvdev.rndv_qp[i].ext_sendq_tail = NULL; mvdev.rndv_qp[i].hca = &(mvdev.hca[i]); mvdev.rndv_qp[i].type = MVDEV_CH_UD_RQ; } return 0; }
/* * create both the high and low priority completion queues * and the shared receive queue (if requested) */ int mca_btl_openib_create_cq_srq(mca_btl_openib_module_t *openib_btl) { /* Allocate Protection Domain */ openib_btl->poll_cq = false; if (mca_btl_openib_component.use_srq) { struct ibv_srq_init_attr attr; attr.attr.max_wr = mca_btl_openib_component.srq_rd_max; attr.attr.max_sge = mca_btl_openib_component.ib_sg_list_size; openib_btl->srd_posted[BTL_OPENIB_HP_QP] = 0; openib_btl->srd_posted[BTL_OPENIB_LP_QP] = 0; openib_btl->srq[BTL_OPENIB_HP_QP] = ibv_create_srq(openib_btl->hca->ib_pd, &attr); if (NULL == openib_btl->srq[BTL_OPENIB_HP_QP]) { show_init_error(__FILE__, __LINE__, "ibv_create_srq", ibv_get_device_name(openib_btl->hca->ib_dev)); return OMPI_ERROR; } openib_btl->srq[BTL_OPENIB_LP_QP] = ibv_create_srq(openib_btl->hca->ib_pd, &attr); if (NULL == openib_btl->srq[BTL_OPENIB_LP_QP]) { show_init_error(__FILE__, __LINE__, "ibv_create_srq", ibv_get_device_name(openib_btl->hca->ib_dev)); return OMPI_ERROR; } } else { openib_btl->srq[BTL_OPENIB_HP_QP] = NULL; openib_btl->srq[BTL_OPENIB_LP_QP] = NULL; } /* Create the low and high priority queue pairs */ #if OMPI_MCA_BTL_OPENIB_IBV_CREATE_CQ_ARGS == 3 openib_btl->ib_cq[BTL_OPENIB_LP_QP] = ibv_create_cq(openib_btl->hca->ib_dev_context, mca_btl_openib_component.ib_cq_size, NULL); #else openib_btl->ib_cq[BTL_OPENIB_LP_QP] = ibv_create_cq(openib_btl->hca->ib_dev_context, mca_btl_openib_component.ib_cq_size, NULL, NULL, 0); #endif if (NULL == openib_btl->ib_cq[BTL_OPENIB_LP_QP]) { show_init_error(__FILE__, __LINE__, "ibv_create_cq", ibv_get_device_name(openib_btl->hca->ib_dev)); return OMPI_ERROR; } #if OMPI_MCA_BTL_OPENIB_IBV_CREATE_CQ_ARGS == 3 openib_btl->ib_cq[BTL_OPENIB_HP_QP] = ibv_create_cq(openib_btl->hca->ib_dev_context, mca_btl_openib_component.ib_cq_size, NULL); #else openib_btl->ib_cq[BTL_OPENIB_HP_QP] = ibv_create_cq(openib_btl->hca->ib_dev_context, mca_btl_openib_component.ib_cq_size, NULL, NULL, 0); #endif if(NULL == openib_btl->ib_cq[BTL_OPENIB_HP_QP]) { show_init_error(__FILE__, __LINE__, "ibv_create_cq", ibv_get_device_name(openib_btl->hca->ib_dev)); return OMPI_ERROR; } return OMPI_SUCCESS; }
/***************************************************************************//** * Description * Init rdma global resources * ******************************************************************************/ static struct thread_context* init_rdma_thread_resources() { struct thread_context *ctx = calloc(1, sizeof(struct thread_context)); ctx->qp_hash = hashtable_create(1024); int num_device; if ( !(ctx->device_ctx_list = rdma_get_devices(&num_device)) ) { perror("rdma_get_devices()"); return NULL; } ctx->device_ctx = *ctx->device_ctx_list; if (verbose) { printf("Get device: %d\n", num_device); } if ( !(ctx->pd = ibv_alloc_pd(ctx->device_ctx)) ) { perror("ibv_alloc_pd()"); return NULL; } if ( !(ctx->comp_channel = ibv_create_comp_channel(ctx->device_ctx)) ) { perror("ibv_create_comp_channel()"); return NULL; } struct ibv_srq_init_attr srq_init_attr; srq_init_attr.srq_context = NULL; srq_init_attr.attr.max_sge = 16; srq_init_attr.attr.max_wr = srq_size; srq_init_attr.attr.srq_limit = srq_size; /* RDMA TODO: what is srq_limit? */ if ( !(ctx->srq = ibv_create_srq(ctx->pd, &srq_init_attr)) ) { perror("ibv_create_srq()"); return NULL; } if ( !(ctx->send_cq = ibv_create_cq(ctx->device_ctx, cq_size, NULL, ctx->comp_channel, 0)) ) { perror("ibv_create_cq()"); return NULL; } if (0 != ibv_req_notify_cq(ctx->send_cq, 0)) { perror("ibv_reg_notify_cq()"); return NULL; } if ( !(ctx->recv_cq = ibv_create_cq(ctx->device_ctx, cq_size, NULL, ctx->comp_channel, 0)) ) { perror("ibv_create_cq()"); return NULL; } if (0 != ibv_req_notify_cq(ctx->recv_cq, 0)) { perror("ibv_reg_notify_cq()"); return NULL; } ctx->rsize = BUFF_SIZE; ctx->rbuf_list = calloc(buff_per_thread, sizeof(char *)); ctx->rmr_list = calloc(buff_per_thread, sizeof(struct ibv_mr*)); ctx->poll_wc = calloc(poll_wc_size, sizeof(struct ibv_wc)); int i = 0; for (i = 0; i < buff_per_thread; ++i) { ctx->rbuf_list[i] = malloc(ctx->rsize); if (ctx->rbuf_list[i] == 0) { break; } } if (i != buff_per_thread) { int j = 0; for (j = 0; j < i; ++j) { free(ctx->rbuf_list[j]); } free(ctx->rbuf_list); ctx->rbuf_list = 0; } if (!ctx->rmr_list || !ctx->rbuf_list) { fprintf(stderr, "out of ctxmory in init_rdma_thread_resources()\n"); return NULL; } struct ibv_recv_wr *bad = NULL; struct ibv_sge sge; struct ibv_recv_wr rwr; for (i = 0; i < buff_per_thread; ++i) { ctx->rmr_list[i] = ibv_reg_mr(ctx->pd, ctx->rbuf_list[i], ctx->rsize, IBV_ACCESS_LOCAL_WRITE); sge.addr = (uintptr_t)ctx->rbuf_list[i]; sge.length = ctx->rsize; sge.lkey = ctx->rmr_list[i]->lkey; rwr.wr_id = (uintptr_t)ctx->rmr_list[i]; rwr.next = NULL; rwr.sg_list = &sge; rwr.num_sge = 1; if (0 != ibv_post_srq_recv(ctx->srq, &rwr, &bad)) { perror("ibv_post_srq_recv()"); return NULL; } } return ctx; }
static inline int fi_ibv_get_qp_cap(struct ibv_context *ctx, struct ibv_device_attr *device_attr, struct fi_info *info) { struct ibv_pd *pd; struct ibv_cq *cq; struct ibv_qp *qp; struct ibv_qp_init_attr init_attr; int ret = 0; pd = ibv_alloc_pd(ctx); if (!pd) { VERBS_INFO_ERRNO(FI_LOG_FABRIC, "ibv_alloc_pd", errno); return -errno; } cq = ibv_create_cq(ctx, 1, NULL, NULL, 0); if (!cq) { VERBS_INFO_ERRNO(FI_LOG_FABRIC, "ibv_create_cq", errno); ret = -errno; goto err1; } /* TODO: serialize access to string buffers */ fi_read_file(FI_CONF_DIR, "def_tx_ctx_size", def_tx_ctx_size, sizeof def_tx_ctx_size); fi_read_file(FI_CONF_DIR, "def_rx_ctx_size", def_rx_ctx_size, sizeof def_rx_ctx_size); fi_read_file(FI_CONF_DIR, "def_tx_iov_limit", def_tx_iov_limit, sizeof def_tx_iov_limit); fi_read_file(FI_CONF_DIR, "def_rx_iov_limit", def_rx_iov_limit, sizeof def_rx_iov_limit); fi_read_file(FI_CONF_DIR, "def_inject_size", def_inject_size, sizeof def_inject_size); memset(&init_attr, 0, sizeof init_attr); init_attr.send_cq = cq; init_attr.recv_cq = cq; init_attr.cap.max_send_wr = MIN(atoi(def_tx_ctx_size), device_attr->max_qp_wr); init_attr.cap.max_recv_wr = MIN(atoi(def_rx_ctx_size), device_attr->max_qp_wr); init_attr.cap.max_send_sge = MIN(atoi(def_tx_iov_limit), device_attr->max_sge); init_attr.cap.max_recv_sge = MIN(atoi(def_rx_iov_limit), device_attr->max_sge); init_attr.cap.max_inline_data = atoi(def_inject_size); init_attr.qp_type = IBV_QPT_RC; qp = ibv_create_qp(pd, &init_attr); if (!qp) { VERBS_INFO_ERRNO(FI_LOG_FABRIC, "ibv_create_qp", errno); ret = -errno; goto err2; } info->tx_attr->inject_size = init_attr.cap.max_inline_data; info->tx_attr->iov_limit = init_attr.cap.max_send_sge; info->tx_attr->size = init_attr.cap.max_send_wr; info->rx_attr->iov_limit = init_attr.cap.max_recv_sge; /* * On some HW ibv_create_qp can increase max_recv_wr value more than * it really supports. So, alignment with device capability is needed. */ info->rx_attr->size = MIN(init_attr.cap.max_recv_wr, device_attr->max_qp_wr); ibv_destroy_qp(qp); err2: ibv_destroy_cq(cq); err1: ibv_dealloc_pd(pd); return ret; }
static int ibw_setup_cq_qp(struct ibw_conn *conn) { struct ibw_ctx_priv *pctx = talloc_get_type(conn->ctx->internal, struct ibw_ctx_priv); struct ibw_conn_priv *pconn = talloc_get_type(conn->internal, struct ibw_conn_priv); struct ibv_qp_init_attr init_attr; struct ibv_qp_attr attr; int rc; DEBUG(DEBUG_DEBUG, ("ibw_setup_cq_qp(cmid: %p)\n", pconn->cm_id)); /* init verbs */ pconn->verbs_channel = ibv_create_comp_channel(pconn->cm_id->verbs); if (!pconn->verbs_channel) { sprintf(ibw_lasterr, "ibv_create_comp_channel failed %d\n", errno); return -1; } DEBUG(DEBUG_DEBUG, ("created channel %p\n", pconn->verbs_channel)); pconn->verbs_channel_event = tevent_add_fd(pctx->ectx, NULL, /* not pconn or conn */ pconn->verbs_channel->fd, TEVENT_FD_READ, ibw_event_handler_verbs, conn); pconn->pd = ibv_alloc_pd(pconn->cm_id->verbs); if (!pconn->pd) { sprintf(ibw_lasterr, "ibv_alloc_pd failed %d\n", errno); return -1; } DEBUG(DEBUG_DEBUG, ("created pd %p\n", pconn->pd)); /* init mr */ if (ibw_init_memory(conn)) return -1; /* init cq */ pconn->cq = ibv_create_cq(pconn->cm_id->verbs, pctx->opts.max_recv_wr + pctx->opts.max_send_wr, conn, pconn->verbs_channel, 0); if (pconn->cq==NULL) { sprintf(ibw_lasterr, "ibv_create_cq failed\n"); return -1; } rc = ibv_req_notify_cq(pconn->cq, 0); if (rc) { sprintf(ibw_lasterr, "ibv_req_notify_cq failed with %d\n", rc); return rc; } /* init qp */ memset(&init_attr, 0, sizeof(init_attr)); init_attr.cap.max_send_wr = pctx->opts.max_send_wr; init_attr.cap.max_recv_wr = pctx->opts.max_recv_wr; init_attr.cap.max_recv_sge = 1; init_attr.cap.max_send_sge = 1; init_attr.qp_type = IBV_QPT_RC; init_attr.send_cq = pconn->cq; init_attr.recv_cq = pconn->cq; rc = rdma_create_qp(pconn->cm_id, pconn->pd, &init_attr); if (rc) { sprintf(ibw_lasterr, "rdma_create_qp failed with %d\n", rc); return rc; } /* elase result is in pconn->cm_id->qp */ rc = ibv_query_qp(pconn->cm_id->qp, &attr, IBV_QP_PATH_MTU, &init_attr); if (rc) { sprintf(ibw_lasterr, "ibv_query_qp failed with %d\n", rc); return rc; } return ibw_fill_cq(conn); }
int main(int argc, char *argv[]) { struct pdata rep_pdata; struct rdma_event_channel *cm_channel; struct rdma_cm_id *listen_id; struct rdma_cm_id *cm_id; struct rdma_cm_event *event; struct rdma_conn_param conn_param = { }; struct ibv_pd *pd; struct ibv_comp_channel *comp_chan; struct ibv_cq *cq; struct ibv_cq *evt_cq; struct ibv_mr *mr; struct ibv_qp_init_attr qp_attr = { }; struct ibv_sge sge; struct ibv_send_wr send_wr = { }; struct ibv_send_wr *bad_send_wr; struct ibv_recv_wr recv_wr = { }; struct ibv_recv_wr *bad_recv_wr; struct ibv_wc wc; void *cq_context; struct sockaddr_in sin; uint32_t *buf; int err; /* Set up RDMA CM structures */ cm_channel = rdma_create_event_channel(); if (!cm_channel) return 1; err = rdma_create_id(cm_channel, &listen_id, NULL, RDMA_PS_TCP); if (err) return err; sin.sin_family = AF_INET; sin.sin_port = htons(20079); sin.sin_addr.s_addr = INADDR_ANY; /* Bind to local port and listen for connection request */ err = rdma_bind_addr(listen_id, (struct sockaddr *) &sin); if (err) return 1; err = rdma_listen(listen_id, 1); if (err) return 1; err = rdma_get_cm_event(cm_channel, &event); if (err) return err; printf("after get_cm_event\n"); if (event->event != RDMA_CM_EVENT_CONNECT_REQUEST) return 1; cm_id = event->id; rdma_ack_cm_event(event); /* Create verbs objects now that we know which device to use */ pd = ibv_alloc_pd(cm_id->verbs); if (!pd) return 1; comp_chan = ibv_create_comp_channel(cm_id->verbs); if (!comp_chan) return 1; cq = ibv_create_cq(cm_id->verbs, 2, NULL, comp_chan, 0); if (!cq) return 1; if (ibv_req_notify_cq(cq, 0)) return 1; buf = calloc(2, sizeof(uint32_t)); if (!buf) return 1; mr = ibv_reg_mr(pd, buf, 2 * sizeof(uint32_t), IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_READ | IBV_ACCESS_REMOTE_WRITE); if (!mr) return 1; qp_attr.cap.max_send_wr = 1; qp_attr.cap.max_send_sge = 1; qp_attr.cap.max_recv_wr = 1; qp_attr.cap.max_recv_sge = 1; qp_attr.send_cq = cq; qp_attr.recv_cq = cq; qp_attr.qp_type = IBV_QPT_RC; err = rdma_create_qp(cm_id, pd, &qp_attr); if (err) return err; /* Post receive before accepting connection */ sge.addr = (uintptr_t) buf + sizeof(uint32_t); sge.length = sizeof(uint32_t); sge.lkey = mr->lkey; recv_wr.sg_list = &sge; recv_wr.num_sge = 1; if (ibv_post_recv(cm_id->qp, &recv_wr, &bad_recv_wr)) return 1; rep_pdata.buf_va = htonll((uintptr_t) buf); rep_pdata.buf_rkey = htonl(mr->rkey); conn_param.responder_resources = 1; conn_param.private_data = &rep_pdata; conn_param.private_data_len = sizeof rep_pdata; /* Accept connection */ printf("before accept\n"); err = rdma_accept(cm_id, &conn_param); if (err) return 1; printf("after accept\n"); err = rdma_get_cm_event(cm_channel, &event); if (err) return err; if (event->event != RDMA_CM_EVENT_ESTABLISHED) return 1; rdma_ack_cm_event(event); /* Wait for receive completion */ if (ibv_get_cq_event(comp_chan, &evt_cq, &cq_context)) return 1; if (ibv_req_notify_cq(cq, 0)) return 1; if (ibv_poll_cq(cq, 1, &wc) < 1) return 1; if (wc.status != IBV_WC_SUCCESS) return 1; /* Add two integers and send reply back */ buf[0] = htonl(ntohl(buf[0]) + ntohl(buf[1])); sge.addr = (uintptr_t) buf; sge.length = sizeof(uint32_t); sge.lkey = mr->lkey; send_wr.opcode = IBV_WR_SEND; send_wr.send_flags = IBV_SEND_SIGNALED; send_wr.sg_list = &sge; send_wr.num_sge = 1; if (ibv_post_send(cm_id->qp, &send_wr, &bad_send_wr)) return 1; /* Wait for send completion */ if (ibv_get_cq_event(comp_chan, &evt_cq, &cq_context)) return 1; if (ibv_poll_cq(cq, 1, &wc) < 1) return 1; if (wc.status != IBV_WC_SUCCESS) return 1; printf("before ack cq 2\n"); ibv_ack_cq_events(cq, 2); return 0; }