Example #1
0
bool IBInterface::_createQueuePair( IBAdapter* adapter )
{
    // Attributes used to initialize a queue pair at creation time.
    ib_qp_create_t    queuePairCreate;
    memset( &queuePairCreate, 0, sizeof( ib_qp_create_t ));
    
    queuePairCreate.h_sq_cq     = _completionQueue->getWriteHandle();
    queuePairCreate.h_rq_cq     = _completionQueue->getReadHandle();
    // Indicates the requested maximum number of work requests that may be
    // outstanding on the queue pair's send and receive queue.  
    // This value must be less than or equal to the maximum reported 
    // by the channel adapter associated with the queue pair.
    queuePairCreate.sq_depth     = EQ_NUMBLOCKMEMORY;
    queuePairCreate.rq_depth     = EQ_NUMBLOCKMEMORY;
    // Indicates the maximum number scatter-gather elements that may be
    // given in a send and receive  work request.  This value must be less
    // than or equal to the maximum reported by the channel adapter associated
    // with the queue pair.
    queuePairCreate.sq_sge         = 1;
    queuePairCreate.rq_sge         = 1;
    // Send immediate data with the given request.
    queuePairCreate.sq_signaled  = IB_SEND_OPT_SIGNALED;
    
    // connection type RC
    queuePairCreate.qp_type       = IB_QPT_RELIABLE_CONN;
    queuePairCreate.sq_signaled   = true;
    queuePairCreate.sq_max_inline = EQ_IB_MAXINLINE;
    
    // Creates a queue pair
    ib_api_status_t ibStatus = ib_create_qp( adapter->getProtectionDomain(), 
                                             &queuePairCreate, 
                                             0, 0, &_queuePair );
    if ( ibStatus != IB_SUCCESS )
    {
        LBERROR << "cannot create a queue pair" << std::endl; 
        return false;
    }
    return true;
}
static struct ib_qp *ipoib_cm_create_rx_qp(struct net_device *dev,
					   struct ipoib_cm_rx *p)
{
	struct ipoib_dev_priv *priv = netdev_priv(dev);
	struct ib_qp_init_attr attr = {
		.event_handler = ipoib_cm_rx_event_handler,
		.send_cq = priv->recv_cq, /* For drain WR */
		.recv_cq = priv->recv_cq,
		.srq = priv->cm.srq,
		.cap.max_send_wr = 1, /* For drain WR */
		.cap.max_send_sge = 1, /* FIXME: 0 Seems not to work */
		.sq_sig_type = IB_SIGNAL_ALL_WR,
		.qp_type = IB_QPT_RC,
		.qp_context = p,
	};

	if (!ipoib_cm_has_srq(dev)) {
		attr.cap.max_recv_wr  = ipoib_recvq_size;
		attr.cap.max_recv_sge = IPOIB_CM_RX_SG;
	}

	return ib_create_qp(priv->pd, &attr);
}

static int ipoib_cm_modify_rx_qp(struct net_device *dev,
				 struct ib_cm_id *cm_id, struct ib_qp *qp,
				 unsigned psn)
{
	struct ipoib_dev_priv *priv = netdev_priv(dev);
	struct ib_qp_attr qp_attr;
	int qp_attr_mask, ret;

	qp_attr.qp_state = IB_QPS_INIT;
	ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask);
	if (ret) {
		ipoib_warn(priv, "failed to init QP attr for INIT: %d\n", ret);
		return ret;
	}
	ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask);
	if (ret) {
		ipoib_warn(priv, "failed to modify QP to INIT: %d\n", ret);
		return ret;
	}
	qp_attr.qp_state = IB_QPS_RTR;
	ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask);
	if (ret) {
		ipoib_warn(priv, "failed to init QP attr for RTR: %d\n", ret);
		return ret;
	}
	qp_attr.rq_psn = psn;
	ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask);
	if (ret) {
		ipoib_warn(priv, "failed to modify QP to RTR: %d\n", ret);
		return ret;
	}

	/*
	 * Current Mellanox HCA firmware won't generate completions
	 * with error for drain WRs unless the QP has been moved to
	 * RTS first. This work-around leaves a window where a QP has
	 * moved to error asynchronously, but this will eventually get
	 * fixed in firmware, so let's not error out if modify QP
	 * fails.
	 */
	qp_attr.qp_state = IB_QPS_RTS;
	ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask);
	if (ret) {
		ipoib_warn(priv, "failed to init QP attr for RTS: %d\n", ret);
		return 0;
	}
	ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask);
	if (ret) {
		ipoib_warn(priv, "failed to modify QP to RTS: %d\n", ret);
		return 0;
	}

	return 0;
}

static void ipoib_cm_init_rx_wr(struct net_device *dev,
				struct ib_recv_wr *wr,
				struct ib_sge *sge)
{
	struct ipoib_dev_priv *priv = netdev_priv(dev);
	int i;

	for (i = 0; i < priv->cm.num_frags; ++i)
		sge[i].lkey = priv->mr->lkey;

	sge[0].length = IPOIB_CM_HEAD_SIZE;
	for (i = 1; i < priv->cm.num_frags; ++i)
		sge[i].length = PAGE_SIZE;

	wr->next    = NULL;
	wr->sg_list = sge;
	wr->num_sge = priv->cm.num_frags;
}

static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_id,
				   struct ipoib_cm_rx *rx)
{
	struct ipoib_dev_priv *priv = netdev_priv(dev);
	struct {
		struct ib_recv_wr wr;
		struct ib_sge sge[IPOIB_CM_RX_SG];
	} *t;
	int ret;
	int i;

<<<<<<< HEAD
	rx->rx_ring = vzalloc(ipoib_recvq_size * sizeof *rx->rx_ring);
=======
Example #3
0
static int
roq_eth_init_qp(struct net_device *ndev)
{
	struct roq_eth_priv *vdev = netdev_priv(ndev);
	struct ib_qp_init_attr create_qp_attrs;
	struct ib_device *ibdev = vdev->ibdev;
	struct ib_qp_attr qp_attr;
	enum ib_qp_attr_mask qp_attr_mask;
	struct ib_srq_init_attr srq_attr;
	int rank, i, size, ret = 0;

	/* initialize variables */
	memset(&create_qp_attrs, 0, sizeof(struct ib_qp_init_attr));

	rank = roq_tcoords_to_rank(vdev->netdesc,
			vdev->personality.Network_Config.Acoord,
			vdev->personality.Network_Config.Bcoord,
			vdev->personality.Network_Config.Ccoord,
			vdev->personality.Network_Config.Dcoord,
			vdev->personality.Network_Config.Ecoord);
	if (IS_ERR_VALUE(rank)) {
		ret = -EINVAL;
		pr_info("roq_eth_init_qp: invalid rank\n");
		goto out;
	}

	if ((ret = roq_alloc_rx_buffers(vdev)) != 0)
		goto out;

	/* create completion queues */
	vdev->send_cq = ib_create_cq(ibdev, roq_eth_tx_ib_compl, NULL,
	                             vdev->ndev, MAX_TX_SKBS, 0);
	if (IS_ERR(vdev->send_cq)) {
		pr_warn("roq_eth_init_qp: ib_create_cq failed");
		ret = PTR_ERR(vdev->send_cq);
		vdev->send_cq = NULL;
		goto out;
	}
	vdev->recv_cq = ib_create_cq(ibdev, roq_eth_rx_ib_compl, NULL,
	                             vdev->ndev, MAX_RX_SKBS, 0);
	if (IS_ERR(vdev->recv_cq)) {
		pr_warn("roq_eth_init_qp: ib_create_cq failed");
		ret = PTR_ERR(vdev->recv_cq);
		vdev->recv_cq = NULL;
		goto out;
	}
	create_qp_attrs.send_cq = vdev->send_cq;
	create_qp_attrs.recv_cq = vdev->recv_cq;

	/* allocate protection domain and qp array */
	vdev->kpd = ib_alloc_pd(ibdev);
	if (IS_ERR(vdev->kpd)) {
		pr_warn("roq_eth_init_qp: ib_alloc_pd failed");
		ret = PTR_ERR(vdev->kpd);
		vdev->kpd = NULL;
		goto out;
	}

	memset(&srq_attr, 0, sizeof(struct ib_srq_init_attr));
	srq_attr.attr.max_wr  = MAX_RX_SKBS;
	srq_attr.attr.max_sge = 1;
	vdev->srq = ib_create_srq(vdev->kpd, &srq_attr);
	if (IS_ERR(vdev->srq)) {
		pr_warn("roq_eth_init_qp: ib_create_srq failed");
		ret = PTR_ERR(vdev->srq);
		vdev->srq = NULL;
		goto out;
	}

	/* set some more parameters */
	create_qp_attrs.qp_type       = IB_QPT_UD;
	create_qp_attrs.event_handler = NULL;
	create_qp_attrs.qp_context    = NULL;
	create_qp_attrs.srq           = vdev->srq;

	create_qp_attrs.cap.max_send_wr      = MAX_TX_SKBS;
	create_qp_attrs.cap.max_recv_wr      = 1;
	create_qp_attrs.cap.max_send_sge     = 1;
	create_qp_attrs.cap.max_recv_sge     = 1;
	create_qp_attrs.cap.max_inline_data  = 0;

	size = sizeof(struct ib_qp *) * vdev->part_size;
	vdev->qps = kmalloc(size, GFP_KERNEL);
	if (!vdev->qps) {
		pr_warn("roq_eth_init_qp: kmalloc failed\n");
		goto out;
	}

	for (i = 0; i < vdev->part_size; i++) {
		vdev->qps[i] = ib_create_qp(vdev->kpd, &create_qp_attrs);
		if (IS_ERR(vdev->qps[i])) {
			ret =  PTR_ERR(vdev->qps[i]);
			pr_warn("roq_eth_init_qp: ib_create_qp failed: %d",
				ret);
			goto out;
		}
	}

	ret = ib_req_notify_cq(vdev->send_cq, IB_CQ_NEXT_COMP);
	if (ret)
		goto out;
	ret = ib_req_notify_cq(vdev->recv_cq, IB_CQ_NEXT_COMP);
	if (ret)
		goto out;

	for (i = 0; i < vdev->part_size; i++) {
		qp_attr_mask          = 0;
		qp_attr_mask         |= IB_QP_STATE;
		qp_attr.qp_state      = IB_QPS_RTS;
		qp_attr_mask         |= IB_QP_AV;
		/* this QP will send to peer rank i (zero based) */
		qp_attr.ah_attr.dlid = i;

		qp_attr_mask         |= IB_QP_DEST_QPN;
		/*
		 * this QP will send to peer QP num rank + 1
		 * (QP zero is reserved)
		 */
		qp_attr.dest_qp_num   = rank + 1;

		ib_modify_qp(vdev->qps[i], &qp_attr, qp_attr_mask);
	}

	/* SETUP RECEIVE QP */
	for (i = 0; i < MAX_RX_SKBS; i++)
		roq_eth_post_recv(vdev, i);

	if (vdev->fix_rem == 1) {
		roq_eth_rem_init_qp(ndev);
		vdev->fix_rem = 0;
	}

out:
	if (ret) {
		pr_warn("roq_eth_init_qp: rv = %d\n", ret);
		roq_eth_cleanup_ofa(vdev);
	}
	return ret;
}
Example #4
0
int
roq_eth_rem_init_qp(struct net_device *ndev)
{
	struct roq_eth_priv	*vdev = netdev_priv(ndev);
	struct ib_qp_init_attr	create_qp_attrs;
	struct ib_qp_attr	qp_attr;
	enum ib_qp_attr_mask	qp_attr_mask;
	char *argv[] = {"/etc/init.d/post_discovery", NULL};
	char *env[] = {	"HOME=/",
			"TERM=linux",
			"PATH=/sbin:/usr/sbin:/bin:/usr/bin",
			"LD_LIBRARY_PATH=/lib:/usr/lib", NULL};
	int i, rank, size, ret = 0;

	if (vdev->send_cq == NULL || vdev->recv_cq == NULL ||
	    vdev->srq == NULL || vdev->kpd == NULL) {
		vdev->fix_rem = 1;
		pr_warn("roq_eth_rem_init: return w/o discovery\n");
		return 0;
	}


	/* clean old remote qps */
	if (vdev->rem_part_size) {
		for (i = 0; i < vdev->rem_part_size; i++)
			ib_destroy_qp(vdev->qps_rem[i]);
		kfree(vdev->qps_rem);
	}
	vdev->rem_part_size = RoQ_NetworkSize(vdev->netdesc_rem);

	rank = roq_tcoords_to_rank(vdev->netdesc,
			vdev->personality.Network_Config.Acoord,
			vdev->personality.Network_Config.Bcoord,
			vdev->personality.Network_Config.Ccoord,
			vdev->personality.Network_Config.Dcoord,
			vdev->personality.Network_Config.Ecoord);
	if (IS_ERR_VALUE(rank)) {
		ret = -EINVAL;
		pr_warn("roq_eth_rem_init: invalid rank\n");
		goto out;
	}

	memset(&create_qp_attrs, 0, sizeof(struct ib_qp_init_attr));
	create_qp_attrs.send_cq = vdev->send_cq;
	create_qp_attrs.recv_cq = vdev->recv_cq;

	/* set some more parameters */
	create_qp_attrs.qp_type       = IB_QPT_UD;
	create_qp_attrs.event_handler = NULL;
	create_qp_attrs.qp_context    = NULL;
	create_qp_attrs.srq           = vdev->srq;

	create_qp_attrs.cap.max_send_wr      = MAX_TX_SKBS;
	create_qp_attrs.cap.max_recv_wr      = 1;
	create_qp_attrs.cap.max_send_sge     = 1;
	create_qp_attrs.cap.max_recv_sge     = 1;
	create_qp_attrs.cap.max_inline_data  = 0;

	size = sizeof(struct ib_qp *) * vdev->rem_part_size;

	vdev->qps_rem = (struct ib_qp **)kmalloc(size, GFP_KERNEL);
	if (!vdev->qps_rem) {
		pr_warn("roq_eth_rem_init_qp: remote QP alloc failed");
		ret = -ENOMEM;
		goto out;
	}

	for (i = 0; i < vdev->rem_part_size; i++) {
		vdev->qps_rem[i] = ib_create_qp(vdev->kpd, &create_qp_attrs);
		if (IS_ERR(vdev->qps_rem[i])) {
			pr_warn("roq_eth_rem_init_qp: error creating qp %p\n",
				vdev->qps_rem[i]);
			ret = PTR_ERR(vdev->qps_rem[i]);
			goto out;
		}
	}

	for (i = 0; i < vdev->rem_part_size; i++) {
		qp_attr_mask          = 0;
		qp_attr_mask         |= IB_QP_STATE;
		qp_attr.qp_state      = IB_QPS_RTS;
		qp_attr_mask         |= IB_QP_AV;
		/* this QP will send to peer rank i (zero based) */
		qp_attr.ah_attr.dlid = 0x8000 | i;
		qp_attr_mask         |= IB_QP_DEST_QPN;
		/*
		 * this QP will send to peer qp num rank + 1
		 * (QP zero is reserved)
		 */
		qp_attr.dest_qp_num   = rank + 1;

		ib_modify_qp(vdev->qps_rem[i], &qp_attr, qp_attr_mask);
	}

	ret = call_usermodehelper(argv[0], argv, env, UMH_WAIT_EXEC);
out:
	return ret;
}
Example #5
0
rdma_ctx_t rdma_init(int npages, char* ip_addr, int port)
{
    int retval;
    rdma_ctx_t ctx;

    LOG_KERN(LOG_INFO, ("RDMA_INIT. ip_addr: %s port: %d npages: %d", ip_addr, port, npages));
    
    ctx = kmalloc(sizeof(struct rdma_ctx), GFP_KERNEL);
    if (!ctx)
        return NULL;

    memset(ctx, 0, sizeof(struct rdma_ctx));
    ctx->rem_mem_size = npages * (1024 * 4);

    if (!rdma_ib_device.ib_device_initialized) {
        LOG_KERN(LOG_INFO, ("ERROR"));
    }

    ctx->pd = ib_alloc_pd(rdma_ib_device.dev);
    CHECK_MSG_RET(ctx->pd != 0, "Error creating pd", 0);
   
    // Note that we set the CQ context to our ctx structure 
    ctx->send_cq = ib_create_cq(rdma_ib_device.dev, 
            comp_handler_send, cq_event_handler_send, ctx, 10, 0);
    ctx->recv_cq = ib_create_cq(rdma_ib_device.dev, 
            comp_handler_recv, cq_event_handler_recv, ctx, 10, 0);
    CHECK_MSG_RET(ctx->send_cq != 0, "Error creating CQ", 0);
    CHECK_MSG_RET(ctx->recv_cq != 0, "Error creating CQ", 0);
    
    CHECK_MSG_RET(ib_req_notify_cq(ctx->recv_cq, IB_CQ_NEXT_COMP) == 0,
            "Error ib_req_notify_cq", 0);
    CHECK_MSG_RET(ib_req_notify_cq(ctx->send_cq, IB_CQ_NEXT_COMP) == 0,
            "Error ib_req_notify_cq", 0);

    // initialize qp_attr
    memset(&ctx->qp_attr, 0, sizeof(struct ib_qp_init_attr));
    ctx->qp_attr.send_cq = ctx->send_cq;
    ctx->qp_attr.recv_cq = ctx->recv_cq;
    ctx->qp_attr.cap.max_send_wr  = 10;
    ctx->qp_attr.cap.max_recv_wr  = 10;
    ctx->qp_attr.cap.max_send_sge = 1;
    ctx->qp_attr.cap.max_recv_sge = 1;
    ctx->qp_attr.cap.max_inline_data = 0;
    ctx->qp_attr.qp_type = IB_QPT_RC;
    ctx->qp_attr.sq_sig_type = IB_SIGNAL_ALL_WR;
    
    ctx->qp = ib_create_qp(ctx->pd, &ctx->qp_attr);

    // connect with server with TCP
    retval = connect(ctx, ip_addr, port);
    if (retval != 0)
        return 0;
  
    retval = populate_port_data(ctx);
    if (retval != 0)
        return 0;

    // some necessary stuff 
    ctx->lid = rdma_ib_device.attr.lid;
    ctx->qpn = ctx->qp->qp_num;
    get_random_bytes(&ctx->psn, sizeof(ctx->psn));
    ctx->psn &= 0xffffff;
    
    // exchange data to bootstrap RDMA
    retval = handshake(ctx); 
    if (retval != 0)
        return 0;

    // create memory region
    // modify QP to RTS
    retval = rdma_setup(ctx);
    if (retval != 0)
        return 0;

    return ctx;
}
Example #6
0
static void verbs_add_device (struct ib_device *dev)
{
	int ret;
	struct ib_qp_init_attr attrs;

	if (ib_dev)
		return;

	/* durty hack for ib_dma_map_single not to segfault */
	dev->dma_ops = NULL;
	ib_dev = dev;

	printk (KERN_INFO "IB add device called. Name = %s\n", dev->name);

	ret = ib_query_device (dev, &dev_attr);
	if (ret) {
		printk (KERN_INFO "ib_quer_device failed: %d\n", ret);
		return;
	}

	printk (KERN_INFO "IB device caps: max_qp %d, max_mcast_grp: %d, max_pkeys: %d\n",
		dev_attr.max_qp, dev_attr.max_mcast_grp, (int)dev_attr.max_pkeys);

	/* We'll work with first port. It's a sample module, anyway. Who is that moron which decided
	 * to count ports from one? */
	ret = ib_query_port (dev, 1, &port_attr);
	if (ret) {
		printk (KERN_INFO "ib_query_port failed: %d\n", ret);
		return;
	}

	printk (KERN_INFO "Port info: lid: %u, sm_lid: %u, max_msg_size: %u\n",
		(unsigned)port_attr.lid, (unsigned)port_attr.sm_lid, port_attr.max_msg_sz);

	pd = ib_alloc_pd (dev);
	if (IS_ERR (pd)) {
		ret = PTR_ERR (pd);
		printk (KERN_INFO "pd allocation failed: %d\n", ret);
		return;
	}

	printk (KERN_INFO "PD allocated\n");

	mr = ib_get_dma_mr (pd, IB_ACCESS_LOCAL_WRITE);
	if (IS_ERR (mr)) {
		ret = PTR_ERR (mr);
		printk (KERN_INFO "get_dma_mr failed: %d\n", ret);
		return;
	}

	send_cq = ib_create_cq (dev, NULL, NULL, NULL, 1, 1);
	if (IS_ERR (send_cq)) {
		ret = PTR_ERR (send_cq);
		printk (KERN_INFO "ib_create_cq failed: %d\n", ret);
		return;
	}

	recv_cq = ib_create_cq (dev, verbs_comp_handler_recv, NULL, NULL, 1, 1);
	if (IS_ERR (recv_cq)) {
		ret = PTR_ERR (recv_cq);
		printk (KERN_INFO "ib_create_cq failed: %d\n", ret);
		return;
	}

	ib_req_notify_cq (recv_cq, IB_CQ_NEXT_COMP);
	printk (KERN_INFO "CQs allocated\n");

	ib_query_pkey (dev, 1, 0, &pkey);

	/* allocate memory */
	send_buf = kmalloc (buf_size + 40, GFP_KERNEL);
	recv_buf = kmalloc (buf_size + 40, GFP_KERNEL);

	if (!send_buf || !recv_buf) {
		printk (KERN_INFO "Memory allocation error\n");
		return;
	}

	printk (KERN_INFO "Trying to register regions\n");
	if (ib_dev->dma_ops)
		printk (KERN_INFO "DMA ops are defined\n");

	memset (send_buf, 0, buf_size+40);
	memset (send_buf, 0, buf_size+40);

	send_key = ib_dma_map_single (ib_dev, send_buf, buf_size, DMA_FROM_DEVICE);
	printk (KERN_INFO "send_key obtained %llx\n", send_key);
	recv_key = ib_dma_map_single (ib_dev, recv_buf, buf_size, DMA_TO_DEVICE);
	printk (KERN_INFO "recv_key obtained %llx\n", recv_key);

	if (ib_dma_mapping_error (ib_dev, send_key)) {
		printk (KERN_INFO "Error mapping send buffer\n");
		return;
	}

	if (ib_dma_mapping_error (ib_dev, recv_key)) {
		printk (KERN_INFO "Error mapping recv buffer\n");
		return;
	}

	memset (&attrs, 0, sizeof (attrs));
	attrs.qp_type = IB_QPT_UD;
	attrs.sq_sig_type = IB_SIGNAL_ALL_WR;
	attrs.event_handler = verbs_qp_event;
	attrs.cap.max_send_wr = CQ_SIZE;
	attrs.cap.max_recv_wr = CQ_SIZE;
	attrs.cap.max_send_sge = 1;
	attrs.cap.max_recv_sge = 1;
	attrs.send_cq = send_cq;
	attrs.recv_cq = recv_cq;

	qp = ib_create_qp (pd, &attrs);
	if (IS_ERR (qp)) {
		ret = PTR_ERR (qp);
		printk (KERN_INFO "qp allocation failed: %d\n", ret);
		return;
	}

	printk (KERN_INFO "Create QP with num %x\n", qp->qp_num);

	if (init_qp (qp)) {
		printk (KERN_INFO "Failed to initialize QP\n");
		return;
	}

	ret = ib_query_gid (ib_dev, 1, 0, &local_info.gid);
	if (ret) {
		printk (KERN_INFO "query_gid failed %d\n", ret);
		return;
	}

	local_info.qp_num = qp->qp_num;
	local_info.lid = port_attr.lid;

	/* now we are ready to send our QP number and other stuff to other party */
	if (!server_addr) {
		schedule_work (&sock_accept);
		flush_scheduled_work ();
	}
	else
		exchange_info (server_addr);

	if (!have_remote_info) {
		printk (KERN_INFO "Have no remote info, give up\n");
		return;
	}

	ret = path_rec_lookup_start ();
	if (ret) {
		printk (KERN_INFO "path_rec lookup start failed: %d\n", ret);
		return;
	}

	/* post receive request */
	verbs_post_recv_req ();

	mod_timer (&verbs_timer, NEXTJIFF(1));
}