bool IBInterface::_createQueuePair( IBAdapter* adapter ) { // Attributes used to initialize a queue pair at creation time. ib_qp_create_t queuePairCreate; memset( &queuePairCreate, 0, sizeof( ib_qp_create_t )); queuePairCreate.h_sq_cq = _completionQueue->getWriteHandle(); queuePairCreate.h_rq_cq = _completionQueue->getReadHandle(); // Indicates the requested maximum number of work requests that may be // outstanding on the queue pair's send and receive queue. // This value must be less than or equal to the maximum reported // by the channel adapter associated with the queue pair. queuePairCreate.sq_depth = EQ_NUMBLOCKMEMORY; queuePairCreate.rq_depth = EQ_NUMBLOCKMEMORY; // Indicates the maximum number scatter-gather elements that may be // given in a send and receive work request. This value must be less // than or equal to the maximum reported by the channel adapter associated // with the queue pair. queuePairCreate.sq_sge = 1; queuePairCreate.rq_sge = 1; // Send immediate data with the given request. queuePairCreate.sq_signaled = IB_SEND_OPT_SIGNALED; // connection type RC queuePairCreate.qp_type = IB_QPT_RELIABLE_CONN; queuePairCreate.sq_signaled = true; queuePairCreate.sq_max_inline = EQ_IB_MAXINLINE; // Creates a queue pair ib_api_status_t ibStatus = ib_create_qp( adapter->getProtectionDomain(), &queuePairCreate, 0, 0, &_queuePair ); if ( ibStatus != IB_SUCCESS ) { LBERROR << "cannot create a queue pair" << std::endl; return false; } return true; }
static struct ib_qp *ipoib_cm_create_rx_qp(struct net_device *dev, struct ipoib_cm_rx *p) { struct ipoib_dev_priv *priv = netdev_priv(dev); struct ib_qp_init_attr attr = { .event_handler = ipoib_cm_rx_event_handler, .send_cq = priv->recv_cq, /* For drain WR */ .recv_cq = priv->recv_cq, .srq = priv->cm.srq, .cap.max_send_wr = 1, /* For drain WR */ .cap.max_send_sge = 1, /* FIXME: 0 Seems not to work */ .sq_sig_type = IB_SIGNAL_ALL_WR, .qp_type = IB_QPT_RC, .qp_context = p, }; if (!ipoib_cm_has_srq(dev)) { attr.cap.max_recv_wr = ipoib_recvq_size; attr.cap.max_recv_sge = IPOIB_CM_RX_SG; } return ib_create_qp(priv->pd, &attr); } static int ipoib_cm_modify_rx_qp(struct net_device *dev, struct ib_cm_id *cm_id, struct ib_qp *qp, unsigned psn) { struct ipoib_dev_priv *priv = netdev_priv(dev); struct ib_qp_attr qp_attr; int qp_attr_mask, ret; qp_attr.qp_state = IB_QPS_INIT; ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask); if (ret) { ipoib_warn(priv, "failed to init QP attr for INIT: %d\n", ret); return ret; } ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask); if (ret) { ipoib_warn(priv, "failed to modify QP to INIT: %d\n", ret); return ret; } qp_attr.qp_state = IB_QPS_RTR; ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask); if (ret) { ipoib_warn(priv, "failed to init QP attr for RTR: %d\n", ret); return ret; } qp_attr.rq_psn = psn; ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask); if (ret) { ipoib_warn(priv, "failed to modify QP to RTR: %d\n", ret); return ret; } /* * Current Mellanox HCA firmware won't generate completions * with error for drain WRs unless the QP has been moved to * RTS first. This work-around leaves a window where a QP has * moved to error asynchronously, but this will eventually get * fixed in firmware, so let's not error out if modify QP * fails. */ qp_attr.qp_state = IB_QPS_RTS; ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask); if (ret) { ipoib_warn(priv, "failed to init QP attr for RTS: %d\n", ret); return 0; } ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask); if (ret) { ipoib_warn(priv, "failed to modify QP to RTS: %d\n", ret); return 0; } return 0; } static void ipoib_cm_init_rx_wr(struct net_device *dev, struct ib_recv_wr *wr, struct ib_sge *sge) { struct ipoib_dev_priv *priv = netdev_priv(dev); int i; for (i = 0; i < priv->cm.num_frags; ++i) sge[i].lkey = priv->mr->lkey; sge[0].length = IPOIB_CM_HEAD_SIZE; for (i = 1; i < priv->cm.num_frags; ++i) sge[i].length = PAGE_SIZE; wr->next = NULL; wr->sg_list = sge; wr->num_sge = priv->cm.num_frags; } static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_id, struct ipoib_cm_rx *rx) { struct ipoib_dev_priv *priv = netdev_priv(dev); struct { struct ib_recv_wr wr; struct ib_sge sge[IPOIB_CM_RX_SG]; } *t; int ret; int i; <<<<<<< HEAD rx->rx_ring = vzalloc(ipoib_recvq_size * sizeof *rx->rx_ring); =======
static int roq_eth_init_qp(struct net_device *ndev) { struct roq_eth_priv *vdev = netdev_priv(ndev); struct ib_qp_init_attr create_qp_attrs; struct ib_device *ibdev = vdev->ibdev; struct ib_qp_attr qp_attr; enum ib_qp_attr_mask qp_attr_mask; struct ib_srq_init_attr srq_attr; int rank, i, size, ret = 0; /* initialize variables */ memset(&create_qp_attrs, 0, sizeof(struct ib_qp_init_attr)); rank = roq_tcoords_to_rank(vdev->netdesc, vdev->personality.Network_Config.Acoord, vdev->personality.Network_Config.Bcoord, vdev->personality.Network_Config.Ccoord, vdev->personality.Network_Config.Dcoord, vdev->personality.Network_Config.Ecoord); if (IS_ERR_VALUE(rank)) { ret = -EINVAL; pr_info("roq_eth_init_qp: invalid rank\n"); goto out; } if ((ret = roq_alloc_rx_buffers(vdev)) != 0) goto out; /* create completion queues */ vdev->send_cq = ib_create_cq(ibdev, roq_eth_tx_ib_compl, NULL, vdev->ndev, MAX_TX_SKBS, 0); if (IS_ERR(vdev->send_cq)) { pr_warn("roq_eth_init_qp: ib_create_cq failed"); ret = PTR_ERR(vdev->send_cq); vdev->send_cq = NULL; goto out; } vdev->recv_cq = ib_create_cq(ibdev, roq_eth_rx_ib_compl, NULL, vdev->ndev, MAX_RX_SKBS, 0); if (IS_ERR(vdev->recv_cq)) { pr_warn("roq_eth_init_qp: ib_create_cq failed"); ret = PTR_ERR(vdev->recv_cq); vdev->recv_cq = NULL; goto out; } create_qp_attrs.send_cq = vdev->send_cq; create_qp_attrs.recv_cq = vdev->recv_cq; /* allocate protection domain and qp array */ vdev->kpd = ib_alloc_pd(ibdev); if (IS_ERR(vdev->kpd)) { pr_warn("roq_eth_init_qp: ib_alloc_pd failed"); ret = PTR_ERR(vdev->kpd); vdev->kpd = NULL; goto out; } memset(&srq_attr, 0, sizeof(struct ib_srq_init_attr)); srq_attr.attr.max_wr = MAX_RX_SKBS; srq_attr.attr.max_sge = 1; vdev->srq = ib_create_srq(vdev->kpd, &srq_attr); if (IS_ERR(vdev->srq)) { pr_warn("roq_eth_init_qp: ib_create_srq failed"); ret = PTR_ERR(vdev->srq); vdev->srq = NULL; goto out; } /* set some more parameters */ create_qp_attrs.qp_type = IB_QPT_UD; create_qp_attrs.event_handler = NULL; create_qp_attrs.qp_context = NULL; create_qp_attrs.srq = vdev->srq; create_qp_attrs.cap.max_send_wr = MAX_TX_SKBS; create_qp_attrs.cap.max_recv_wr = 1; create_qp_attrs.cap.max_send_sge = 1; create_qp_attrs.cap.max_recv_sge = 1; create_qp_attrs.cap.max_inline_data = 0; size = sizeof(struct ib_qp *) * vdev->part_size; vdev->qps = kmalloc(size, GFP_KERNEL); if (!vdev->qps) { pr_warn("roq_eth_init_qp: kmalloc failed\n"); goto out; } for (i = 0; i < vdev->part_size; i++) { vdev->qps[i] = ib_create_qp(vdev->kpd, &create_qp_attrs); if (IS_ERR(vdev->qps[i])) { ret = PTR_ERR(vdev->qps[i]); pr_warn("roq_eth_init_qp: ib_create_qp failed: %d", ret); goto out; } } ret = ib_req_notify_cq(vdev->send_cq, IB_CQ_NEXT_COMP); if (ret) goto out; ret = ib_req_notify_cq(vdev->recv_cq, IB_CQ_NEXT_COMP); if (ret) goto out; for (i = 0; i < vdev->part_size; i++) { qp_attr_mask = 0; qp_attr_mask |= IB_QP_STATE; qp_attr.qp_state = IB_QPS_RTS; qp_attr_mask |= IB_QP_AV; /* this QP will send to peer rank i (zero based) */ qp_attr.ah_attr.dlid = i; qp_attr_mask |= IB_QP_DEST_QPN; /* * this QP will send to peer QP num rank + 1 * (QP zero is reserved) */ qp_attr.dest_qp_num = rank + 1; ib_modify_qp(vdev->qps[i], &qp_attr, qp_attr_mask); } /* SETUP RECEIVE QP */ for (i = 0; i < MAX_RX_SKBS; i++) roq_eth_post_recv(vdev, i); if (vdev->fix_rem == 1) { roq_eth_rem_init_qp(ndev); vdev->fix_rem = 0; } out: if (ret) { pr_warn("roq_eth_init_qp: rv = %d\n", ret); roq_eth_cleanup_ofa(vdev); } return ret; }
int roq_eth_rem_init_qp(struct net_device *ndev) { struct roq_eth_priv *vdev = netdev_priv(ndev); struct ib_qp_init_attr create_qp_attrs; struct ib_qp_attr qp_attr; enum ib_qp_attr_mask qp_attr_mask; char *argv[] = {"/etc/init.d/post_discovery", NULL}; char *env[] = { "HOME=/", "TERM=linux", "PATH=/sbin:/usr/sbin:/bin:/usr/bin", "LD_LIBRARY_PATH=/lib:/usr/lib", NULL}; int i, rank, size, ret = 0; if (vdev->send_cq == NULL || vdev->recv_cq == NULL || vdev->srq == NULL || vdev->kpd == NULL) { vdev->fix_rem = 1; pr_warn("roq_eth_rem_init: return w/o discovery\n"); return 0; } /* clean old remote qps */ if (vdev->rem_part_size) { for (i = 0; i < vdev->rem_part_size; i++) ib_destroy_qp(vdev->qps_rem[i]); kfree(vdev->qps_rem); } vdev->rem_part_size = RoQ_NetworkSize(vdev->netdesc_rem); rank = roq_tcoords_to_rank(vdev->netdesc, vdev->personality.Network_Config.Acoord, vdev->personality.Network_Config.Bcoord, vdev->personality.Network_Config.Ccoord, vdev->personality.Network_Config.Dcoord, vdev->personality.Network_Config.Ecoord); if (IS_ERR_VALUE(rank)) { ret = -EINVAL; pr_warn("roq_eth_rem_init: invalid rank\n"); goto out; } memset(&create_qp_attrs, 0, sizeof(struct ib_qp_init_attr)); create_qp_attrs.send_cq = vdev->send_cq; create_qp_attrs.recv_cq = vdev->recv_cq; /* set some more parameters */ create_qp_attrs.qp_type = IB_QPT_UD; create_qp_attrs.event_handler = NULL; create_qp_attrs.qp_context = NULL; create_qp_attrs.srq = vdev->srq; create_qp_attrs.cap.max_send_wr = MAX_TX_SKBS; create_qp_attrs.cap.max_recv_wr = 1; create_qp_attrs.cap.max_send_sge = 1; create_qp_attrs.cap.max_recv_sge = 1; create_qp_attrs.cap.max_inline_data = 0; size = sizeof(struct ib_qp *) * vdev->rem_part_size; vdev->qps_rem = (struct ib_qp **)kmalloc(size, GFP_KERNEL); if (!vdev->qps_rem) { pr_warn("roq_eth_rem_init_qp: remote QP alloc failed"); ret = -ENOMEM; goto out; } for (i = 0; i < vdev->rem_part_size; i++) { vdev->qps_rem[i] = ib_create_qp(vdev->kpd, &create_qp_attrs); if (IS_ERR(vdev->qps_rem[i])) { pr_warn("roq_eth_rem_init_qp: error creating qp %p\n", vdev->qps_rem[i]); ret = PTR_ERR(vdev->qps_rem[i]); goto out; } } for (i = 0; i < vdev->rem_part_size; i++) { qp_attr_mask = 0; qp_attr_mask |= IB_QP_STATE; qp_attr.qp_state = IB_QPS_RTS; qp_attr_mask |= IB_QP_AV; /* this QP will send to peer rank i (zero based) */ qp_attr.ah_attr.dlid = 0x8000 | i; qp_attr_mask |= IB_QP_DEST_QPN; /* * this QP will send to peer qp num rank + 1 * (QP zero is reserved) */ qp_attr.dest_qp_num = rank + 1; ib_modify_qp(vdev->qps_rem[i], &qp_attr, qp_attr_mask); } ret = call_usermodehelper(argv[0], argv, env, UMH_WAIT_EXEC); out: return ret; }
rdma_ctx_t rdma_init(int npages, char* ip_addr, int port) { int retval; rdma_ctx_t ctx; LOG_KERN(LOG_INFO, ("RDMA_INIT. ip_addr: %s port: %d npages: %d", ip_addr, port, npages)); ctx = kmalloc(sizeof(struct rdma_ctx), GFP_KERNEL); if (!ctx) return NULL; memset(ctx, 0, sizeof(struct rdma_ctx)); ctx->rem_mem_size = npages * (1024 * 4); if (!rdma_ib_device.ib_device_initialized) { LOG_KERN(LOG_INFO, ("ERROR")); } ctx->pd = ib_alloc_pd(rdma_ib_device.dev); CHECK_MSG_RET(ctx->pd != 0, "Error creating pd", 0); // Note that we set the CQ context to our ctx structure ctx->send_cq = ib_create_cq(rdma_ib_device.dev, comp_handler_send, cq_event_handler_send, ctx, 10, 0); ctx->recv_cq = ib_create_cq(rdma_ib_device.dev, comp_handler_recv, cq_event_handler_recv, ctx, 10, 0); CHECK_MSG_RET(ctx->send_cq != 0, "Error creating CQ", 0); CHECK_MSG_RET(ctx->recv_cq != 0, "Error creating CQ", 0); CHECK_MSG_RET(ib_req_notify_cq(ctx->recv_cq, IB_CQ_NEXT_COMP) == 0, "Error ib_req_notify_cq", 0); CHECK_MSG_RET(ib_req_notify_cq(ctx->send_cq, IB_CQ_NEXT_COMP) == 0, "Error ib_req_notify_cq", 0); // initialize qp_attr memset(&ctx->qp_attr, 0, sizeof(struct ib_qp_init_attr)); ctx->qp_attr.send_cq = ctx->send_cq; ctx->qp_attr.recv_cq = ctx->recv_cq; ctx->qp_attr.cap.max_send_wr = 10; ctx->qp_attr.cap.max_recv_wr = 10; ctx->qp_attr.cap.max_send_sge = 1; ctx->qp_attr.cap.max_recv_sge = 1; ctx->qp_attr.cap.max_inline_data = 0; ctx->qp_attr.qp_type = IB_QPT_RC; ctx->qp_attr.sq_sig_type = IB_SIGNAL_ALL_WR; ctx->qp = ib_create_qp(ctx->pd, &ctx->qp_attr); // connect with server with TCP retval = connect(ctx, ip_addr, port); if (retval != 0) return 0; retval = populate_port_data(ctx); if (retval != 0) return 0; // some necessary stuff ctx->lid = rdma_ib_device.attr.lid; ctx->qpn = ctx->qp->qp_num; get_random_bytes(&ctx->psn, sizeof(ctx->psn)); ctx->psn &= 0xffffff; // exchange data to bootstrap RDMA retval = handshake(ctx); if (retval != 0) return 0; // create memory region // modify QP to RTS retval = rdma_setup(ctx); if (retval != 0) return 0; return ctx; }
static void verbs_add_device (struct ib_device *dev) { int ret; struct ib_qp_init_attr attrs; if (ib_dev) return; /* durty hack for ib_dma_map_single not to segfault */ dev->dma_ops = NULL; ib_dev = dev; printk (KERN_INFO "IB add device called. Name = %s\n", dev->name); ret = ib_query_device (dev, &dev_attr); if (ret) { printk (KERN_INFO "ib_quer_device failed: %d\n", ret); return; } printk (KERN_INFO "IB device caps: max_qp %d, max_mcast_grp: %d, max_pkeys: %d\n", dev_attr.max_qp, dev_attr.max_mcast_grp, (int)dev_attr.max_pkeys); /* We'll work with first port. It's a sample module, anyway. Who is that moron which decided * to count ports from one? */ ret = ib_query_port (dev, 1, &port_attr); if (ret) { printk (KERN_INFO "ib_query_port failed: %d\n", ret); return; } printk (KERN_INFO "Port info: lid: %u, sm_lid: %u, max_msg_size: %u\n", (unsigned)port_attr.lid, (unsigned)port_attr.sm_lid, port_attr.max_msg_sz); pd = ib_alloc_pd (dev); if (IS_ERR (pd)) { ret = PTR_ERR (pd); printk (KERN_INFO "pd allocation failed: %d\n", ret); return; } printk (KERN_INFO "PD allocated\n"); mr = ib_get_dma_mr (pd, IB_ACCESS_LOCAL_WRITE); if (IS_ERR (mr)) { ret = PTR_ERR (mr); printk (KERN_INFO "get_dma_mr failed: %d\n", ret); return; } send_cq = ib_create_cq (dev, NULL, NULL, NULL, 1, 1); if (IS_ERR (send_cq)) { ret = PTR_ERR (send_cq); printk (KERN_INFO "ib_create_cq failed: %d\n", ret); return; } recv_cq = ib_create_cq (dev, verbs_comp_handler_recv, NULL, NULL, 1, 1); if (IS_ERR (recv_cq)) { ret = PTR_ERR (recv_cq); printk (KERN_INFO "ib_create_cq failed: %d\n", ret); return; } ib_req_notify_cq (recv_cq, IB_CQ_NEXT_COMP); printk (KERN_INFO "CQs allocated\n"); ib_query_pkey (dev, 1, 0, &pkey); /* allocate memory */ send_buf = kmalloc (buf_size + 40, GFP_KERNEL); recv_buf = kmalloc (buf_size + 40, GFP_KERNEL); if (!send_buf || !recv_buf) { printk (KERN_INFO "Memory allocation error\n"); return; } printk (KERN_INFO "Trying to register regions\n"); if (ib_dev->dma_ops) printk (KERN_INFO "DMA ops are defined\n"); memset (send_buf, 0, buf_size+40); memset (send_buf, 0, buf_size+40); send_key = ib_dma_map_single (ib_dev, send_buf, buf_size, DMA_FROM_DEVICE); printk (KERN_INFO "send_key obtained %llx\n", send_key); recv_key = ib_dma_map_single (ib_dev, recv_buf, buf_size, DMA_TO_DEVICE); printk (KERN_INFO "recv_key obtained %llx\n", recv_key); if (ib_dma_mapping_error (ib_dev, send_key)) { printk (KERN_INFO "Error mapping send buffer\n"); return; } if (ib_dma_mapping_error (ib_dev, recv_key)) { printk (KERN_INFO "Error mapping recv buffer\n"); return; } memset (&attrs, 0, sizeof (attrs)); attrs.qp_type = IB_QPT_UD; attrs.sq_sig_type = IB_SIGNAL_ALL_WR; attrs.event_handler = verbs_qp_event; attrs.cap.max_send_wr = CQ_SIZE; attrs.cap.max_recv_wr = CQ_SIZE; attrs.cap.max_send_sge = 1; attrs.cap.max_recv_sge = 1; attrs.send_cq = send_cq; attrs.recv_cq = recv_cq; qp = ib_create_qp (pd, &attrs); if (IS_ERR (qp)) { ret = PTR_ERR (qp); printk (KERN_INFO "qp allocation failed: %d\n", ret); return; } printk (KERN_INFO "Create QP with num %x\n", qp->qp_num); if (init_qp (qp)) { printk (KERN_INFO "Failed to initialize QP\n"); return; } ret = ib_query_gid (ib_dev, 1, 0, &local_info.gid); if (ret) { printk (KERN_INFO "query_gid failed %d\n", ret); return; } local_info.qp_num = qp->qp_num; local_info.lid = port_attr.lid; /* now we are ready to send our QP number and other stuff to other party */ if (!server_addr) { schedule_work (&sock_accept); flush_scheduled_work (); } else exchange_info (server_addr); if (!have_remote_info) { printk (KERN_INFO "Have no remote info, give up\n"); return; } ret = path_rec_lookup_start (); if (ret) { printk (KERN_INFO "path_rec lookup start failed: %d\n", ret); return; } /* post receive request */ verbs_post_recv_req (); mod_timer (&verbs_timer, NEXTJIFF(1)); }