Esempio n. 1
0
static void krping_free_buffers(struct krping_cb *cb)
{
	DEBUG_LOG(PFX "krping_free_buffers called on cb %p\n", cb);
	
#if 0
	dma_unmap_single(cb->pd->device->dma_device,
			 pci_unmap_addr(cb, recv_mapping),
			 sizeof(cb->recv_buf), DMA_BIDIRECTIONAL);
	dma_unmap_single(cb->pd->device->dma_device,
			 pci_unmap_addr(cb, send_mapping),
			 sizeof(cb->send_buf), DMA_BIDIRECTIONAL);
	dma_unmap_single(cb->pd->device->dma_device,
			 pci_unmap_addr(cb, rdma_mapping),
			 cb->size, DMA_BIDIRECTIONAL);
#endif
	contigfree(cb->rdma_buf, cb->size, M_DEVBUF);
	if (!cb->server || cb->wlat || cb->rlat || cb->bw) {
#if 0
		dma_unmap_single(cb->pd->device->dma_device,
			 pci_unmap_addr(cb, start_mapping),
			 cb->size, DMA_BIDIRECTIONAL);
#endif
		contigfree(cb->start_buf, cb->size, M_DEVBUF);
	}
	if (cb->use_dmamr)
		ib_dereg_mr(cb->dma_mr);
	else {
		ib_dereg_mr(cb->send_mr);
		ib_dereg_mr(cb->recv_mr);
		ib_dereg_mr(cb->rdma_mr);
		if (!cb->server)
			ib_dereg_mr(cb->start_mr);
	}
}
Esempio n. 2
0
static int
__frwr_reset_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *r)
{
	struct rpcrdma_frmr *f = &r->frmr;
	int rc;

	rc = ib_dereg_mr(f->fr_mr);
	if (rc) {
		pr_warn("rpcrdma: ib_dereg_mr status %d, frwr %p orphaned\n",
			rc, r);
		return rc;
	}

	f->fr_mr = ib_alloc_mr(ia->ri_pd, IB_MR_TYPE_MEM_REG,
			       ia->ri_max_frmr_depth);
	if (IS_ERR(f->fr_mr)) {
		pr_warn("rpcrdma: ib_alloc_mr status %ld, frwr %p orphaned\n",
			PTR_ERR(f->fr_mr), r);
		return PTR_ERR(f->fr_mr);
	}

	dprintk("RPC:       %s: recovered FRMR %p\n", __func__, f);
	f->fr_state = FRMR_IS_INVALID;
	return 0;
}
Esempio n. 3
0
/*
 * rpcrdma_ep_destroy
 *
 * Disconnect and destroy endpoint. After this, the only
 * valid operations on the ep are to free it (if dynamically
 * allocated) or re-create it.
 */
void
rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
{
	int rc;

	dprintk("RPC:       %s: entering, connected is %d\n",
		__func__, ep->rep_connected);

	cancel_delayed_work_sync(&ep->rep_connect_worker);

	if (ia->ri_id->qp) {
		rpcrdma_ep_disconnect(ep, ia);
		rdma_destroy_qp(ia->ri_id);
		ia->ri_id->qp = NULL;
	}

	ib_free_cq(ep->rep_attr.recv_cq);
	ib_free_cq(ep->rep_attr.send_cq);

	if (ia->ri_dma_mr) {
		rc = ib_dereg_mr(ia->ri_dma_mr);
		dprintk("RPC:       %s: ib_dereg_mr returned %i\n",
			__func__, rc);
	}
}
Esempio n. 4
0
static struct svc_rdma_fastreg_mr *rdma_alloc_frmr(struct svcxprt_rdma *xprt)
{
	struct ib_mr *mr;
	struct scatterlist *sg;
	struct svc_rdma_fastreg_mr *frmr;
	u32 num_sg;

	frmr = kmalloc(sizeof(*frmr), GFP_KERNEL);
	if (!frmr)
		goto err;

	num_sg = min_t(u32, RPCSVC_MAXPAGES, xprt->sc_frmr_pg_list_len);
	mr = ib_alloc_mr(xprt->sc_pd, IB_MR_TYPE_MEM_REG, num_sg);
	if (IS_ERR(mr))
		goto err_free_frmr;

	sg = kcalloc(RPCSVC_MAXPAGES, sizeof(*sg), GFP_KERNEL);
	if (!sg)
		goto err_free_mr;

	sg_init_table(sg, RPCSVC_MAXPAGES);

	frmr->mr = mr;
	frmr->sg = sg;
	INIT_LIST_HEAD(&frmr->frmr_list);
	return frmr;

 err_free_mr:
	ib_dereg_mr(mr);
 err_free_frmr:
	kfree(frmr);
 err:
	return ERR_PTR(-ENOMEM);
}
Esempio n. 5
0
static int
__frwr_mr_reset(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr)
{
	struct rpcrdma_frwr *frwr = &mr->frwr;
	int rc;

	rc = ib_dereg_mr(frwr->fr_mr);
	if (rc) {
		pr_warn("rpcrdma: ib_dereg_mr status %d, frwr %p orphaned\n",
			rc, mr);
		return rc;
	}

	frwr->fr_mr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype,
				  ia->ri_max_frwr_depth);
	if (IS_ERR(frwr->fr_mr)) {
		pr_warn("rpcrdma: ib_alloc_mr status %ld, frwr %p orphaned\n",
			PTR_ERR(frwr->fr_mr), mr);
		return PTR_ERR(frwr->fr_mr);
	}

	dprintk("RPC:       %s: recovered FRWR %p\n", __func__, frwr);
	frwr->fr_state = FRWR_IS_INVALID;
	return 0;
}
Esempio n. 6
0
static struct svc_rdma_fastreg_mr *rdma_alloc_frmr(struct svcxprt_rdma *xprt)
{
	struct ib_mr *mr;
	struct ib_fast_reg_page_list *pl;
	struct svc_rdma_fastreg_mr *frmr;

	frmr = kmalloc(sizeof(*frmr), GFP_KERNEL);
	if (!frmr)
		goto err;

	mr = ib_alloc_fast_reg_mr(xprt->sc_pd, RPCSVC_MAXPAGES);
	if (IS_ERR(mr))
		goto err_free_frmr;

	pl = ib_alloc_fast_reg_page_list(xprt->sc_cm_id->device,
					 RPCSVC_MAXPAGES);
	if (IS_ERR(pl))
		goto err_free_mr;

	frmr->mr = mr;
	frmr->page_list = pl;
	INIT_LIST_HEAD(&frmr->frmr_list);
	return frmr;

 err_free_mr:
	ib_dereg_mr(mr);
 err_free_frmr:
	kfree(frmr);
 err:
	return ERR_PTR(-ENOMEM);
}
Esempio n. 7
0
static int
frwr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *r)
{
	unsigned int depth = ia->ri_max_frmr_depth;
	struct rpcrdma_frmr *f = &r->frmr;
	int rc;

	f->fr_mr = ib_alloc_mr(ia->ri_pd, IB_MR_TYPE_MEM_REG, depth);
	if (IS_ERR(f->fr_mr))
		goto out_mr_err;

	r->mw_sg = kcalloc(depth, sizeof(*r->mw_sg), GFP_KERNEL);
	if (!r->mw_sg)
		goto out_list_err;

	sg_init_table(r->mw_sg, depth);
	init_completion(&f->fr_linv_done);
	return 0;

out_mr_err:
	rc = PTR_ERR(f->fr_mr);
	dprintk("RPC:       %s: ib_alloc_mr status %i\n",
		__func__, rc);
	return rc;

out_list_err:
	rc = -ENOMEM;
	dprintk("RPC:       %s: sg allocation failure\n",
		__func__);
	ib_dereg_mr(f->fr_mr);
	return rc;
}
Esempio n. 8
0
static void ib_sock_mem_fini_common(struct IB_SOCK *sock)
{
	if (sock->is_mem.ism_mr)
		ib_dereg_mr(sock->is_mem.ism_mr);

	if (sock->is_mem.ism_pd)
		ib_dealloc_pd(sock->is_mem.ism_pd);
}
Esempio n. 9
0
/**
 * iser_create_device_ib_res - creates Protection Domain (PD), Completion
 * Queue (CQ), DMA Memory Region (DMA MR) with the device associated with
 * the adapator.
 *
 * returns 0 on success, -1 on failure
 */
static int iser_create_device_ib_res(struct iser_device *device)
{
	device->pd = ib_alloc_pd(device->ib_device);
	if (IS_ERR(device->pd))
		goto pd_err;

	device->rx_cq = ib_create_cq(device->ib_device,
				  iser_cq_callback,
				  iser_cq_event_callback,
				  (void *)device,
				  ISER_MAX_RX_CQ_LEN, 0);
	if (IS_ERR(device->rx_cq))
		goto rx_cq_err;

	device->tx_cq = ib_create_cq(device->ib_device,
				  NULL, iser_cq_event_callback,
				  (void *)device,
				  ISER_MAX_TX_CQ_LEN, 0);

	if (IS_ERR(device->tx_cq))
		goto tx_cq_err;

	if (ib_req_notify_cq(device->rx_cq, IB_CQ_NEXT_COMP))
		goto cq_arm_err;

	tasklet_init(&device->cq_tasklet,
		     iser_cq_tasklet_fn,
		     (unsigned long)device);

	device->mr = ib_get_dma_mr(device->pd, IB_ACCESS_LOCAL_WRITE |
				   IB_ACCESS_REMOTE_WRITE |
				   IB_ACCESS_REMOTE_READ);
	if (IS_ERR(device->mr))
		goto dma_mr_err;

	INIT_IB_EVENT_HANDLER(&device->event_handler, device->ib_device,
				iser_event_handler);
	if (ib_register_event_handler(&device->event_handler))
		goto handler_err;

	return 0;

handler_err:
	ib_dereg_mr(device->mr);
dma_mr_err:
	tasklet_kill(&device->cq_tasklet);
cq_arm_err:
	ib_destroy_cq(device->tx_cq);
tx_cq_err:
	ib_destroy_cq(device->rx_cq);
rx_cq_err:
	ib_dealloc_pd(device->pd);
pd_err:
	iser_err("failed to allocate an IB resource\n");
	return -1;
}
Esempio n. 10
0
void IBMemBlock::close()
{
    if ( _memoryRegion )
    {
       ib_api_status_t status = ib_dereg_mr( _memoryRegion );
       _memoryRegion = 0;
    }
    _localKey = 0;
    _remoteKey = 0;
}
Esempio n. 11
0
static int fi_ib_mr_close(struct fid *fid)
{
	int			ret;
	struct fi_ib_mem_desc	*md = (struct fi_ib_mem_desc *) fid;

	print_trace("in\n");

	ret = ib_dereg_mr(md->mr);
	if (ret)
		print_err("ib_dereg_mr returned %d\n", ret);

	return ret;
}
Esempio n. 12
0
static void rdma_dealloc_frmr_q(struct svcxprt_rdma *xprt)
{
	struct svc_rdma_fastreg_mr *frmr;

	while (!list_empty(&xprt->sc_frmr_q)) {
		frmr = list_entry(xprt->sc_frmr_q.next,
				  struct svc_rdma_fastreg_mr, frmr_list);
		list_del_init(&frmr->frmr_list);
		ib_dereg_mr(frmr->mr);
		ib_free_fast_reg_page_list(frmr->page_list);
		kfree(frmr);
	}
}
Esempio n. 13
0
static void iser_free_device_ib_res(struct iser_device *device)
{
	BUG_ON(device->mr == NULL);

	tasklet_kill(&device->cq_tasklet);

	(void)ib_dereg_mr(device->mr);
	(void)ib_destroy_cq(device->cq);
	(void)ib_dealloc_pd(device->pd);

	device->mr = NULL;
	device->cq = NULL;
	device->pd = NULL;
}
Esempio n. 14
0
static void
frwr_op_release_mr(struct rpcrdma_mw *r)
{
	int rc;

	/* Ensure MW is not on any rl_registered list */
	if (!list_empty(&r->mw_list))
		list_del(&r->mw_list);

	rc = ib_dereg_mr(r->frmr.fr_mr);
	if (rc)
		pr_err("rpcrdma: final ib_dereg_mr for %p returned %i\n",
		       r, rc);
	kfree(r->mw_sg);
	kfree(r);
}
Esempio n. 15
0
static void isert_device_release(struct isert_device *isert_dev)
{
	int err, i;

	TRACE_ENTRY();

	lockdep_assert_held(&dev_list_mutex);

	isert_dev_list_remove(isert_dev); /* remove from global list */

	for (i = 0; i < isert_dev->num_cqs; ++i) {
		struct isert_cq *cq_desc = &isert_dev->cq_desc[i];

#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 22)
		/*
		 * cancel_work_sync() was introduced in 2.6.22. We can
		 * only wait until all scheduled work is done.
		 */
		flush_workqueue(cq_desc->cq_workqueue);
#else
		cancel_work_sync(&cq_desc->cq_comp_work);
#endif

		err = ib_destroy_cq(cq_desc->cq);
		if (unlikely(err))
			pr_err("Failed to destroy cq, err:%d\n", err);

		destroy_workqueue(cq_desc->cq_workqueue);
	}

	err = ib_dereg_mr(isert_dev->mr);
	if (unlikely(err))
		pr_err("Failed to destroy mr, err:%d\n", err);
	err = ib_dealloc_pd(isert_dev->pd);
	if (unlikely(err))
		pr_err("Failed to destroy pd, err:%d\n", err);

	vfree(isert_dev->cq_desc);
	isert_dev->cq_desc = NULL;

	kfree(isert_dev->cq_qps);
	isert_dev->cq_qps = NULL;

	kfree(isert_dev);

	TRACE_EXIT();
}
Esempio n. 16
0
static void verbs_remove_device (struct ib_device *dev)
{
	printk (KERN_INFO "IB remove device called. Name = %s\n", dev->name);

	if (ah)
		ib_destroy_ah (ah);
	if (qp)
		ib_destroy_qp (qp);
	if (send_cq)
		ib_destroy_cq (send_cq);
	if (recv_cq)
		ib_destroy_cq (recv_cq);
	if (mr)
		ib_dereg_mr (mr);
	if (pd)
		ib_dealloc_pd (pd);
}
Esempio n. 17
0
void rds_iw_send_clear_ring(struct rds_iw_connection *ic)
{
	struct rds_iw_send_work *send;
	u32 i;

	for (i = 0, send = ic->i_sends; i < ic->i_send_ring.w_nr; i++, send++) {
		BUG_ON(!send->s_mr);
		ib_dereg_mr(send->s_mr);
		BUG_ON(!send->s_page_list);
		ib_free_fast_reg_page_list(send->s_page_list);
		if (send->s_wr.opcode == 0xdead)
			continue;
		if (send->s_rm)
			rds_iw_send_unmap_rm(ic, send, IB_WC_WR_FLUSH_ERR);
		if (send->s_op)
			rds_iw_send_unmap_rdma(ic, send->s_op);
	}
}
Esempio n. 18
0
/**
 * ib_dealloc_pd - Deallocates a protection domain.
 * @pd: The protection domain to deallocate.
 *
 * It is an error to call this function while any resources in the pd still
 * exist.  The caller is responsible to synchronously destroy them and
 * guarantee no new allocations will happen.
 */
void ib_dealloc_pd(struct ib_pd *pd)
{
	int ret;

	if (pd->local_mr) {
		ret = ib_dereg_mr(pd->local_mr);
		WARN_ON(ret);
		pd->local_mr = NULL;
	}

	/* uverbs manipulates usecnt with proper locking, while the kabi
	   requires the caller to guarantee we can't race here. */
	WARN_ON(atomic_read(&pd->usecnt));

	/* Making delalloc_pd a void return is a WIP, no driver should return
	   an error here. */
	ret = pd->device->dealloc_pd(pd);
	WARN_ONCE(ret, "Infiniband HW driver failed dealloc_pd");
}
Esempio n. 19
0
File: verbs.c Progetto: 274914765/C
/*
 * Clean up/close an IA.
 *   o if event handles and PD have been initialized, free them.
 *   o close the IA
 */
void
rpcrdma_ia_close(struct rpcrdma_ia *ia)
{
    int rc;

    dprintk("RPC:       %s: entering\n", __func__);
    if (ia->ri_bind_mem != NULL) {
        rc = ib_dereg_mr(ia->ri_bind_mem);
        dprintk("RPC:       %s: ib_dereg_mr returned %i\n",
            __func__, rc);
    }
    if (ia->ri_id != NULL && !IS_ERR(ia->ri_id) && ia->ri_id->qp)
        rdma_destroy_qp(ia->ri_id);
    if (ia->ri_pd != NULL && !IS_ERR(ia->ri_pd)) {
        rc = ib_dealloc_pd(ia->ri_pd);
        dprintk("RPC:       %s: ib_dealloc_pd returned %i\n",
            __func__, rc);
    }
    if (ia->ri_id != NULL && !IS_ERR(ia->ri_id))
        rdma_destroy_id(ia->ri_id);
}
Esempio n. 20
0
static void rdma_destroy_trans(struct p9_trans_rdma *rdma)
{
	if (!rdma)
		return;

	if (rdma->dma_mr && !IS_ERR(rdma->dma_mr))
		ib_dereg_mr(rdma->dma_mr);

	if (rdma->qp && !IS_ERR(rdma->qp))
		ib_destroy_qp(rdma->qp);

	if (rdma->pd && !IS_ERR(rdma->pd))
		ib_dealloc_pd(rdma->pd);

	if (rdma->cq && !IS_ERR(rdma->cq))
		ib_destroy_cq(rdma->cq);

	if (rdma->cm_id && !IS_ERR(rdma->cm_id))
		rdma_destroy_id(rdma->cm_id);

	kfree(rdma);
}
ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
			   const char __user *buf, int in_len,
			   int out_len)
{
	struct ib_uverbs_dereg_mr cmd;
	struct ib_mr             *mr;
	struct ib_umem_object    *memobj;
	int                       ret = -EINVAL;

	if (copy_from_user(&cmd, buf, sizeof cmd))
		return -EFAULT;

	mutex_lock(&ib_uverbs_idr_mutex);

	mr = idr_find(&ib_uverbs_mr_idr, cmd.mr_handle);
	if (!mr || mr->uobject->context != file->ucontext)
		goto out;

	memobj = container_of(mr->uobject, struct ib_umem_object, uobject);

	ret = ib_dereg_mr(mr);
	if (ret)
		goto out;

	idr_remove(&ib_uverbs_mr_idr, cmd.mr_handle);

	mutex_lock(&file->mutex);
	list_del(&memobj->uobject.list);
	mutex_unlock(&file->mutex);

	ib_umem_release(file->device->ib_dev, &memobj->umem);
	kfree(memobj);

out:
	mutex_unlock(&ib_uverbs_idr_mutex);

	return ret ? ret : in_len;
}
Esempio n. 22
0
void rds_ib_unreg_frmr(struct list_head *list, unsigned int *nfreed,
		       unsigned long *unpinned, unsigned int goal)
{
	struct rds_ib_mr *ibmr, *next;
	struct rds_ib_frmr *frmr;
	int ret = 0;
	unsigned int freed = *nfreed;

	/* String all ib_mr's onto one list and hand them to ib_unmap_fmr */
	list_for_each_entry(ibmr, list, unmap_list) {
		if (ibmr->sg_dma_len)
			ret |= rds_ib_post_inv(ibmr);
	}
	if (ret)
		pr_warn("RDS/IB: %s failed (err=%d)\n", __func__, ret);

	/* Now we can destroy the DMA mapping and unpin any pages */
	list_for_each_entry_safe(ibmr, next, list, unmap_list) {
		*unpinned += ibmr->sg_len;
		frmr = &ibmr->u.frmr;
		__rds_ib_teardown_mr(ibmr);
		if (freed < goal || frmr->fr_state == FRMR_IS_STALE) {
			/* Don't de-allocate if the MR is not free yet */
			if (frmr->fr_state == FRMR_IS_INUSE)
				continue;

			if (ibmr->pool->pool_type == RDS_IB_MR_8K_POOL)
				rds_ib_stats_inc(s_ib_rdma_mr_8k_free);
			else
				rds_ib_stats_inc(s_ib_rdma_mr_1m_free);
			list_del(&ibmr->unmap_list);
			if (frmr->mr)
				ib_dereg_mr(frmr->mr);
			kfree(ibmr);
			freed++;
		}
	}
Esempio n. 23
0
static struct isert_device *isert_device_create(struct ib_device *ib_dev)
{
	struct isert_device *isert_dev;
	struct ib_device_attr *dev_attr;
	int cqe_num, err;
	struct ib_pd *pd;
	struct ib_mr *mr;
	struct ib_cq *cq;
	char wq_name[64];
	int i, j;

	TRACE_ENTRY();

	isert_dev = kzalloc(sizeof(*isert_dev), GFP_KERNEL);
	if (unlikely(isert_dev == NULL)) {
		pr_err("Failed to allocate iser dev\n");
		err = -ENOMEM;
		goto out;
	}

	dev_attr = &isert_dev->device_attr;
	err = ib_query_device(ib_dev, dev_attr);
	if (unlikely(err)) {
		pr_err("Failed to query device, err: %d\n", err);
		goto fail_query;
	}

	isert_dev->num_cqs = min_t(int, num_online_cpus(),
				   ib_dev->num_comp_vectors);

	isert_dev->cq_qps = kzalloc(sizeof(*isert_dev->cq_qps) * isert_dev->num_cqs,
				    GFP_KERNEL);
	if (unlikely(isert_dev->cq_qps == NULL)) {
		pr_err("Failed to allocate iser cq_qps\n");
		err = -ENOMEM;
		goto fail_cq_qps;
	}

	isert_dev->cq_desc = vmalloc(sizeof(*isert_dev->cq_desc) * isert_dev->num_cqs);
	if (unlikely(isert_dev->cq_desc == NULL)) {
		pr_err("Failed to allocate %ld bytes for iser cq_desc\n",
		       sizeof(*isert_dev->cq_desc) * isert_dev->num_cqs);
		err = -ENOMEM;
		goto fail_alloc_cq_desc;
	}

	pd = ib_alloc_pd(ib_dev);
	if (unlikely(IS_ERR(pd))) {
		err = PTR_ERR(pd);
		pr_err("Failed to alloc iser dev pd, err:%d\n", err);
		goto fail_pd;
	}

	mr = ib_get_dma_mr(pd, IB_ACCESS_LOCAL_WRITE);
	if (unlikely(IS_ERR(mr))) {
		err = PTR_ERR(mr);
		pr_err("Failed to get dma mr, err: %d\n", err);
		goto fail_mr;
	}

	cqe_num = min(isert_dev->device_attr.max_cqe, ISER_CQ_ENTRIES);
	cqe_num = cqe_num / isert_dev->num_cqs;

#ifdef CONFIG_SCST_EXTRACHECKS
	if (isert_dev->device_attr.max_cqe == 0)
		pr_err("Zero max_cqe encountered: you may have a compilation problem\n");
#endif

	for (i = 0; i < isert_dev->num_cqs; ++i) {
		struct isert_cq *cq_desc = &isert_dev->cq_desc[i];

		cq_desc->dev = isert_dev;
		cq_desc->idx = i;
#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20)
		INIT_WORK(&cq_desc->cq_comp_work, isert_cq_comp_work_cb, NULL);
#else
		INIT_WORK(&cq_desc->cq_comp_work, isert_cq_comp_work_cb);
#endif

		snprintf(wq_name, sizeof(wq_name), "isert_cq_%p", cq_desc);
#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 36)
		cq_desc->cq_workqueue = create_singlethread_workqueue(wq_name);
#else
#if LINUX_VERSION_CODE == KERNEL_VERSION(2, 6, 36)
		cq_desc->cq_workqueue = alloc_workqueue(wq_name,
							WQ_CPU_INTENSIVE|
							WQ_RESCUER, 1);
#else
		cq_desc->cq_workqueue = alloc_workqueue(wq_name,
							WQ_CPU_INTENSIVE|
							WQ_MEM_RECLAIM, 1);
#endif
#endif
		if (unlikely(!cq_desc->cq_workqueue)) {
			pr_err("Failed to alloc iser cq work queue for dev:%s\n",
			       ib_dev->name);
			err = -ENOMEM;
			goto fail_cq;
		}

		cq = ib_create_cq(ib_dev,
				  isert_cq_comp_handler,
				  isert_async_evt_handler,
				  cq_desc, /* context */
				  cqe_num,
				  i); /* completion vector */
		if (unlikely(IS_ERR(cq))) {
			cq_desc->cq = NULL;
			err = PTR_ERR(cq);
			pr_err("Failed to create iser dev cq, err:%d\n", err);
			goto fail_cq;
		}

		cq_desc->cq = cq;
		err = ib_req_notify_cq(cq, IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
		if (unlikely(err)) {
			pr_err("Failed to request notify cq, err: %d\n", err);
			goto fail_cq;
		}
	}

	isert_dev->ib_dev = ib_dev;
	isert_dev->pd = pd;
	isert_dev->mr = mr;

	INIT_LIST_HEAD(&isert_dev->conn_list);

	lockdep_assert_held(&dev_list_mutex);

	isert_dev_list_add(isert_dev);

	pr_info("iser created device:%p\n", isert_dev);
	return isert_dev;

fail_cq:
	for (j = 0; j <= i; ++j) {
		if (isert_dev->cq_desc[j].cq)
			ib_destroy_cq(isert_dev->cq_desc[j].cq);
		if (isert_dev->cq_desc[j].cq_workqueue)
			destroy_workqueue(isert_dev->cq_desc[j].cq_workqueue);
	}
	ib_dereg_mr(mr);
fail_mr:
	ib_dealloc_pd(pd);
fail_pd:
	vfree(isert_dev->cq_desc);
fail_alloc_cq_desc:
	kfree(isert_dev->cq_qps);
fail_cq_qps:
fail_query:
	kfree(isert_dev);
out:
	TRACE_EXIT_RES(err);
	return ERR_PTR(err);
}
Esempio n. 24
0
static int krping_setup_buffers(struct krping_cb *cb)
{
	int ret;
	struct ib_phys_buf buf;
	u64 iovbase;

	DEBUG_LOG(PFX "krping_setup_buffers called on cb %p\n", cb);

	if (cb->use_dmamr) {
		cb->dma_mr = ib_get_dma_mr(cb->pd, IB_ACCESS_LOCAL_WRITE|
					   IB_ACCESS_REMOTE_READ|
				           IB_ACCESS_REMOTE_WRITE);
		if (IS_ERR(cb->dma_mr)) {
			log(LOG_ERR, "reg_dmamr failed\n");
			return PTR_ERR(cb->dma_mr);
		}
	} else {

		buf.addr = vtophys(&cb->recv_buf);
		buf.size = sizeof cb->recv_buf;
		iovbase = vtophys(&cb->recv_buf);
		cb->recv_mr = ib_reg_phys_mr(cb->pd, &buf, 1, 
					     IB_ACCESS_LOCAL_WRITE, 
					     &iovbase);

		if (IS_ERR(cb->recv_mr)) {
			log(LOG_ERR, "recv_buf reg_mr failed\n");
			return PTR_ERR(cb->recv_mr);
		}

		buf.addr = vtophys(&cb->send_buf);
		buf.size = sizeof cb->send_buf;
		iovbase = vtophys(&cb->send_buf);
		cb->send_mr = ib_reg_phys_mr(cb->pd, &buf, 1, 
					     0, &iovbase);

		if (IS_ERR(cb->send_mr)) {
			log(LOG_ERR, "send_buf reg_mr failed\n");
			ib_dereg_mr(cb->recv_mr);
			return PTR_ERR(cb->send_mr);
		}
	}

	/* RNIC adapters have a limit upto which it can register physical memory
	 * If DMA-MR memory mode is set then normally driver registers maximum
	 * supported memory. After that if contigmalloc allocates memory beyond the
	 * specified RNIC limit then Krping may not work.
	 */
	if (cb->use_dmamr && cb->memlimit)
		cb->rdma_buf = contigmalloc(cb->size, M_DEVBUF, M_WAITOK, 0, cb->memlimit,
					    PAGE_SIZE, 0);
	else 
		cb->rdma_buf = contigmalloc(cb->size, M_DEVBUF, M_WAITOK, 0, -1UL,
					    PAGE_SIZE, 0);

	if (!cb->rdma_buf) {
		log(LOG_ERR, "rdma_buf malloc failed\n");
		ret = ENOMEM;
		goto err1;
	}
	if (!cb->use_dmamr) {

		buf.addr = vtophys(cb->rdma_buf);
		buf.size = cb->size;
		iovbase = vtophys(cb->rdma_buf);
		cb->rdma_mr = ib_reg_phys_mr(cb->pd, &buf, 1, 
					     IB_ACCESS_REMOTE_READ| 
					     IB_ACCESS_REMOTE_WRITE, 
					     &iovbase);

		if (IS_ERR(cb->rdma_mr)) {
			log(LOG_ERR, "rdma_buf reg_mr failed\n");
			ret = PTR_ERR(cb->rdma_mr);
			goto err2;
		}
	}

	if (!cb->server || cb->wlat || cb->rlat || cb->bw) {
		if (cb->use_dmamr && cb->memlimit)
			cb->start_buf = contigmalloc(cb->size, M_DEVBUF, M_WAITOK,
						     0, cb->memlimit, PAGE_SIZE, 0);
		else
			cb->start_buf = contigmalloc(cb->size, M_DEVBUF, M_WAITOK,
						     0, -1UL, PAGE_SIZE, 0);
		if (!cb->start_buf) {
			log(LOG_ERR, "start_buf malloc failed\n");
			ret = ENOMEM;
			goto err2;
		}
		if (!cb->use_dmamr) {
			unsigned flags = IB_ACCESS_REMOTE_READ;

			if (cb->wlat || cb->rlat || cb->bw) 
				flags |= IB_ACCESS_REMOTE_WRITE;
			buf.addr = vtophys(cb->start_buf);
			buf.size = cb->size;
			iovbase = vtophys(cb->start_buf);
			cb->start_mr = ib_reg_phys_mr(cb->pd, &buf, 1, 
					     flags,
					     &iovbase);

			if (IS_ERR(cb->start_mr)) {
				log(LOG_ERR, "start_buf reg_mr failed\n");
				ret = PTR_ERR(cb->start_mr);
				goto err3;
			}
		}
	}

	krping_setup_wr(cb);
	DEBUG_LOG(PFX "allocated & registered buffers...\n");
	return 0;
err3:
	contigfree(cb->start_buf, cb->size, M_DEVBUF);

	if (!cb->use_dmamr)
		ib_dereg_mr(cb->rdma_mr);
err2:
	contigfree(cb->rdma_buf, cb->size, M_DEVBUF);
err1:
	if (cb->use_dmamr)
		ib_dereg_mr(cb->dma_mr);
	else {
		ib_dereg_mr(cb->recv_mr);
		ib_dereg_mr(cb->send_mr);
	}
	return ret;
}
Esempio n. 25
0
/* A vanilla 2.6.19 or older kernel without backported OFED kernel headers. */
static void isert_cq_comp_work_cb(void *ctx)
{
	struct isert_cq *cq_desc = ctx;
#else
static void isert_cq_comp_work_cb(struct work_struct *work)
{
	struct isert_cq *cq_desc =
		container_of(work, struct isert_cq, cq_comp_work);
#endif
	int ret;

	TRACE_ENTRY();

	ret = isert_poll_cq(cq_desc);
	if (unlikely(ret < 0)) { /* poll error */
		pr_err("ib_poll_cq failed\n");
		goto out;
	}

	ib_req_notify_cq(cq_desc->cq,
			 IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
	/*
	 * not all HCAs support IB_CQ_REPORT_MISSED_EVENTS,
	 * so we need to make sure we don't miss any events between
	 * last call to ib_poll_cq() and ib_req_notify_cq()
	 */
	isert_poll_cq(cq_desc);

out:
	TRACE_EXIT();
	return;
}

static void isert_cq_comp_handler(struct ib_cq *cq, void *context)
{
	struct isert_cq *cq_desc = context;

#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20)
	queue_work(cq_desc->cq_workqueue, &cq_desc->cq_comp_work);
#else
	queue_work_on(smp_processor_id(), cq_desc->cq_workqueue,
		      &cq_desc->cq_comp_work);
#endif
}

static const char *ib_event_type_str(enum ib_event_type ev_type)
{
	switch (ev_type) {
	case IB_EVENT_COMM_EST:
		return "COMM_EST";
	case IB_EVENT_QP_FATAL:
		return "QP_FATAL";
	case IB_EVENT_QP_REQ_ERR:
		return "QP_REQ_ERR";
	case IB_EVENT_QP_ACCESS_ERR:
		return "QP_ACCESS_ERR";
	case IB_EVENT_SQ_DRAINED:
		return "SQ_DRAINED";
	case IB_EVENT_PATH_MIG:
		return "PATH_MIG";
	case IB_EVENT_PATH_MIG_ERR:
		return "PATH_MIG_ERR";
	case IB_EVENT_QP_LAST_WQE_REACHED:
		return "QP_LAST_WQE_REACHED";
	case IB_EVENT_CQ_ERR:
		return "CQ_ERR";
	case IB_EVENT_SRQ_ERR:
		return "SRQ_ERR";
	case IB_EVENT_SRQ_LIMIT_REACHED:
		return "SRQ_LIMIT_REACHED";
	case IB_EVENT_PORT_ACTIVE:
		return "PORT_ACTIVE";
	case IB_EVENT_PORT_ERR:
		return "PORT_ERR";
	case IB_EVENT_LID_CHANGE:
		return "LID_CHANGE";
	case IB_EVENT_PKEY_CHANGE:
		return "PKEY_CHANGE";
	case IB_EVENT_SM_CHANGE:
		return "SM_CHANGE";
	case IB_EVENT_CLIENT_REREGISTER:
		return "CLIENT_REREGISTER";
	case IB_EVENT_DEVICE_FATAL:
		return "DEVICE_FATAL";
	default:
		return "UNKNOWN";
	}
}

static void isert_async_evt_handler(struct ib_event *async_ev, void *context)
{
	struct isert_cq *cq = context;
	struct isert_device *isert_dev = cq->dev;
	struct ib_device *ib_dev = isert_dev->ib_dev;
	char *dev_name = ib_dev->name;
	enum ib_event_type ev_type = async_ev->event;
	struct isert_connection *isert_conn;

	TRACE_ENTRY();

	switch (ev_type) {
	case IB_EVENT_COMM_EST:
		isert_conn = async_ev->element.qp->qp_context;
		pr_info("conn:0x%p cm_id:0x%p dev:%s, QP evt: %s\n",
			isert_conn, isert_conn->cm_id, dev_name,
			ib_event_type_str(IB_EVENT_COMM_EST));
		/* force "connection established" event */
		rdma_notify(isert_conn->cm_id, IB_EVENT_COMM_EST);
		break;

	/* rest of QP-related events */
	case IB_EVENT_QP_FATAL:
	case IB_EVENT_QP_REQ_ERR:
	case IB_EVENT_QP_ACCESS_ERR:
	case IB_EVENT_SQ_DRAINED:
	case IB_EVENT_PATH_MIG:
	case IB_EVENT_PATH_MIG_ERR:
	case IB_EVENT_QP_LAST_WQE_REACHED:
		isert_conn = async_ev->element.qp->qp_context;
		pr_err("conn:0x%p cm_id:0x%p dev:%s, QP evt: %s\n",
		       isert_conn, isert_conn->cm_id, dev_name,
		       ib_event_type_str(ev_type));
		break;

	/* CQ-related events */
	case IB_EVENT_CQ_ERR:
		pr_err("dev:%s CQ evt: %s\n", dev_name,
		       ib_event_type_str(ev_type));
		break;

	/* SRQ events */
	case IB_EVENT_SRQ_ERR:
	case IB_EVENT_SRQ_LIMIT_REACHED:
		pr_err("dev:%s SRQ evt: %s\n", dev_name,
		       ib_event_type_str(ev_type));
		break;

	/* Port events */
	case IB_EVENT_PORT_ACTIVE:
	case IB_EVENT_PORT_ERR:
	case IB_EVENT_LID_CHANGE:
	case IB_EVENT_PKEY_CHANGE:
	case IB_EVENT_SM_CHANGE:
	case IB_EVENT_CLIENT_REREGISTER:
		pr_err("dev:%s port:%d evt: %s\n",
		       dev_name, async_ev->element.port_num,
		       ib_event_type_str(ev_type));
		break;

	/* HCA events */
	case IB_EVENT_DEVICE_FATAL:
		pr_err("dev:%s HCA evt: %s\n", dev_name,
		       ib_event_type_str(ev_type));
		break;

	default:
		pr_err("dev:%s evt: %s\n", dev_name,
		       ib_event_type_str(ev_type));
		break;
	}

	TRACE_EXIT();
}

static struct isert_device *isert_device_create(struct ib_device *ib_dev)
{
	struct isert_device *isert_dev;
	struct ib_device_attr *dev_attr;
	int cqe_num, err;
	struct ib_pd *pd;
	struct ib_mr *mr;
	struct ib_cq *cq;
	char wq_name[64];
	int i, j;

	TRACE_ENTRY();

	isert_dev = kzalloc(sizeof(*isert_dev), GFP_KERNEL);
	if (unlikely(isert_dev == NULL)) {
		pr_err("Failed to allocate iser dev\n");
		err = -ENOMEM;
		goto out;
	}

	dev_attr = &isert_dev->device_attr;
	err = ib_query_device(ib_dev, dev_attr);
	if (unlikely(err)) {
		pr_err("Failed to query device, err: %d\n", err);
		goto fail_query;
	}

	isert_dev->num_cqs = min_t(int, num_online_cpus(),
				   ib_dev->num_comp_vectors);

	isert_dev->cq_qps = kzalloc(sizeof(*isert_dev->cq_qps) * isert_dev->num_cqs,
				    GFP_KERNEL);
	if (unlikely(isert_dev->cq_qps == NULL)) {
		pr_err("Failed to allocate iser cq_qps\n");
		err = -ENOMEM;
		goto fail_cq_qps;
	}

	isert_dev->cq_desc = vmalloc(sizeof(*isert_dev->cq_desc) * isert_dev->num_cqs);
	if (unlikely(isert_dev->cq_desc == NULL)) {
		pr_err("Failed to allocate %ld bytes for iser cq_desc\n",
		       sizeof(*isert_dev->cq_desc) * isert_dev->num_cqs);
		err = -ENOMEM;
		goto fail_alloc_cq_desc;
	}

	pd = ib_alloc_pd(ib_dev);
	if (unlikely(IS_ERR(pd))) {
		err = PTR_ERR(pd);
		pr_err("Failed to alloc iser dev pd, err:%d\n", err);
		goto fail_pd;
	}

	mr = ib_get_dma_mr(pd, IB_ACCESS_LOCAL_WRITE);
	if (unlikely(IS_ERR(mr))) {
		err = PTR_ERR(mr);
		pr_err("Failed to get dma mr, err: %d\n", err);
		goto fail_mr;
	}

	cqe_num = min(isert_dev->device_attr.max_cqe, ISER_CQ_ENTRIES);
	cqe_num = cqe_num / isert_dev->num_cqs;

#ifdef CONFIG_SCST_EXTRACHECKS
	if (isert_dev->device_attr.max_cqe == 0)
		pr_err("Zero max_cqe encountered: you may have a compilation problem\n");
#endif

	for (i = 0; i < isert_dev->num_cqs; ++i) {
		struct isert_cq *cq_desc = &isert_dev->cq_desc[i];

		cq_desc->dev = isert_dev;
		cq_desc->idx = i;
#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20)
		INIT_WORK(&cq_desc->cq_comp_work, isert_cq_comp_work_cb, NULL);
#else
		INIT_WORK(&cq_desc->cq_comp_work, isert_cq_comp_work_cb);
#endif

		snprintf(wq_name, sizeof(wq_name), "isert_cq_%p", cq_desc);
#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 36)
		cq_desc->cq_workqueue = create_singlethread_workqueue(wq_name);
#else
#if LINUX_VERSION_CODE == KERNEL_VERSION(2, 6, 36)
		cq_desc->cq_workqueue = alloc_workqueue(wq_name,
							WQ_CPU_INTENSIVE|
							WQ_RESCUER, 1);
#else
		cq_desc->cq_workqueue = alloc_workqueue(wq_name,
							WQ_CPU_INTENSIVE|
							WQ_MEM_RECLAIM, 1);
#endif
#endif
		if (unlikely(!cq_desc->cq_workqueue)) {
			pr_err("Failed to alloc iser cq work queue for dev:%s\n",
			       ib_dev->name);
			err = -ENOMEM;
			goto fail_cq;
		}

#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 2, 0)
		cq = ib_create_cq(ib_dev,
				  isert_cq_comp_handler,
				  isert_async_evt_handler,
				  cq_desc, /* context */
				  cqe_num,
				  i); /* completion vector */
#else
		{
		struct ib_cq_init_attr ia = {
			.cqe		 = cqe_num,
			.comp_vector	 = i,
		};
		cq = ib_create_cq(ib_dev,
				  isert_cq_comp_handler,
				  isert_async_evt_handler,
				  cq_desc, /* context */
				  &ia);
		}
#endif
		if (unlikely(IS_ERR(cq))) {
			cq_desc->cq = NULL;
			err = PTR_ERR(cq);
			pr_err("Failed to create iser dev cq, err:%d\n", err);
			goto fail_cq;
		}

		cq_desc->cq = cq;
		err = ib_req_notify_cq(cq, IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
		if (unlikely(err)) {
			pr_err("Failed to request notify cq, err: %d\n", err);
			goto fail_cq;
		}
	}

	isert_dev->ib_dev = ib_dev;
	isert_dev->pd = pd;
	isert_dev->mr = mr;

	INIT_LIST_HEAD(&isert_dev->conn_list);

	lockdep_assert_held(&dev_list_mutex);

	isert_dev_list_add(isert_dev);

	pr_info("iser created device:%p\n", isert_dev);
	return isert_dev;

fail_cq:
	for (j = 0; j <= i; ++j) {
		if (isert_dev->cq_desc[j].cq)
			ib_destroy_cq(isert_dev->cq_desc[j].cq);
		if (isert_dev->cq_desc[j].cq_workqueue)
			destroy_workqueue(isert_dev->cq_desc[j].cq_workqueue);
	}
	ib_dereg_mr(mr);
fail_mr:
	ib_dealloc_pd(pd);
fail_pd:
	vfree(isert_dev->cq_desc);
fail_alloc_cq_desc:
	kfree(isert_dev->cq_qps);
fail_cq_qps:
fail_query:
	kfree(isert_dev);
out:
	TRACE_EXIT_RES(err);
	return ERR_PTR(err);
}

static void isert_device_release(struct isert_device *isert_dev)
{
	int err, i;

	TRACE_ENTRY();

	lockdep_assert_held(&dev_list_mutex);

	isert_dev_list_remove(isert_dev); /* remove from global list */

	for (i = 0; i < isert_dev->num_cqs; ++i) {
		struct isert_cq *cq_desc = &isert_dev->cq_desc[i];

#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 22)
		/*
		 * cancel_work_sync() was introduced in 2.6.22. We can
		 * only wait until all scheduled work is done.
		 */
		flush_workqueue(cq_desc->cq_workqueue);
#else
		cancel_work_sync(&cq_desc->cq_comp_work);
#endif

		err = ib_destroy_cq(cq_desc->cq);
		if (unlikely(err))
			pr_err("Failed to destroy cq, err:%d\n", err);

		destroy_workqueue(cq_desc->cq_workqueue);
	}

	err = ib_dereg_mr(isert_dev->mr);
	if (unlikely(err))
		pr_err("Failed to destroy mr, err:%d\n", err);
	err = ib_dealloc_pd(isert_dev->pd);
	if (unlikely(err))
		pr_err("Failed to destroy pd, err:%d\n", err);

	vfree(isert_dev->cq_desc);
	isert_dev->cq_desc = NULL;

	kfree(isert_dev->cq_qps);
	isert_dev->cq_qps = NULL;

	kfree(isert_dev);

	TRACE_EXIT();
}
Esempio n. 26
0
/*
 * Create unconnected endpoint.
 */
int
rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
				struct rpcrdma_create_data_internal *cdata)
{
	struct ib_device_attr *devattr = &ia->ri_devattr;
	struct ib_cq *sendcq, *recvcq;
	struct ib_cq_init_attr cq_attr = {};
	unsigned int max_qp_wr;
	int rc, err;

	if (devattr->max_sge < RPCRDMA_MAX_IOVS) {
		dprintk("RPC:       %s: insufficient sge's available\n",
			__func__);
		return -ENOMEM;
	}

	if (devattr->max_qp_wr <= RPCRDMA_BACKWARD_WRS) {
		dprintk("RPC:       %s: insufficient wqe's available\n",
			__func__);
		return -ENOMEM;
	}
	max_qp_wr = devattr->max_qp_wr - RPCRDMA_BACKWARD_WRS;

	/* check provider's send/recv wr limits */
	if (cdata->max_requests > max_qp_wr)
		cdata->max_requests = max_qp_wr;

	ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall;
	ep->rep_attr.qp_context = ep;
	ep->rep_attr.srq = NULL;
	ep->rep_attr.cap.max_send_wr = cdata->max_requests;
	ep->rep_attr.cap.max_send_wr += RPCRDMA_BACKWARD_WRS;
	rc = ia->ri_ops->ro_open(ia, ep, cdata);
	if (rc)
		return rc;
	ep->rep_attr.cap.max_recv_wr = cdata->max_requests;
	ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS;
	ep->rep_attr.cap.max_send_sge = RPCRDMA_MAX_IOVS;
	ep->rep_attr.cap.max_recv_sge = 1;
	ep->rep_attr.cap.max_inline_data = 0;
	ep->rep_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
	ep->rep_attr.qp_type = IB_QPT_RC;
	ep->rep_attr.port_num = ~0;

	dprintk("RPC:       %s: requested max: dtos: send %d recv %d; "
		"iovs: send %d recv %d\n",
		__func__,
		ep->rep_attr.cap.max_send_wr,
		ep->rep_attr.cap.max_recv_wr,
		ep->rep_attr.cap.max_send_sge,
		ep->rep_attr.cap.max_recv_sge);

	/* set trigger for requesting send completion */
	ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 - 1;
	if (ep->rep_cqinit <= 2)
		ep->rep_cqinit = 0;	/* always signal? */
	INIT_CQCOUNT(ep);
	init_waitqueue_head(&ep->rep_connect_wait);
	INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker);

	cq_attr.cqe = ep->rep_attr.cap.max_send_wr + 1;
	sendcq = ib_create_cq(ia->ri_device, rpcrdma_sendcq_upcall,
			      rpcrdma_cq_async_error_upcall, NULL, &cq_attr);
	if (IS_ERR(sendcq)) {
		rc = PTR_ERR(sendcq);
		dprintk("RPC:       %s: failed to create send CQ: %i\n",
			__func__, rc);
		goto out1;
	}

	rc = ib_req_notify_cq(sendcq, IB_CQ_NEXT_COMP);
	if (rc) {
		dprintk("RPC:       %s: ib_req_notify_cq failed: %i\n",
			__func__, rc);
		goto out2;
	}

	cq_attr.cqe = ep->rep_attr.cap.max_recv_wr + 1;
	recvcq = ib_create_cq(ia->ri_device, rpcrdma_recvcq_upcall,
			      rpcrdma_cq_async_error_upcall, NULL, &cq_attr);
	if (IS_ERR(recvcq)) {
		rc = PTR_ERR(recvcq);
		dprintk("RPC:       %s: failed to create recv CQ: %i\n",
			__func__, rc);
		goto out2;
	}

	rc = ib_req_notify_cq(recvcq, IB_CQ_NEXT_COMP);
	if (rc) {
		dprintk("RPC:       %s: ib_req_notify_cq failed: %i\n",
			__func__, rc);
		ib_destroy_cq(recvcq);
		goto out2;
	}

	ep->rep_attr.send_cq = sendcq;
	ep->rep_attr.recv_cq = recvcq;

	/* Initialize cma parameters */

	/* RPC/RDMA does not use private data */
	ep->rep_remote_cma.private_data = NULL;
	ep->rep_remote_cma.private_data_len = 0;

	/* Client offers RDMA Read but does not initiate */
	ep->rep_remote_cma.initiator_depth = 0;
	if (devattr->max_qp_rd_atom > 32)	/* arbitrary but <= 255 */
		ep->rep_remote_cma.responder_resources = 32;
	else
		ep->rep_remote_cma.responder_resources =
						devattr->max_qp_rd_atom;

	ep->rep_remote_cma.retry_count = 7;
	ep->rep_remote_cma.flow_control = 0;
	ep->rep_remote_cma.rnr_retry_count = 0;

	return 0;

out2:
	err = ib_destroy_cq(sendcq);
	if (err)
		dprintk("RPC:       %s: ib_destroy_cq returned %i\n",
			__func__, err);
out1:
	if (ia->ri_dma_mr)
		ib_dereg_mr(ia->ri_dma_mr);
	return rc;
}
Esempio n. 27
0
static int krping_setup_buffers(struct krping_cb *cb)
{
	int ret;
	struct ib_phys_buf buf;
	u64 iovbase;

	DEBUG_LOG(PFX "krping_setup_buffers called on cb %p\n", cb);

	if (cb->use_dmamr) {
		cb->dma_mr = ib_get_dma_mr(cb->pd, IB_ACCESS_LOCAL_WRITE|
					   IB_ACCESS_REMOTE_READ|
				           IB_ACCESS_REMOTE_WRITE);
		if (IS_ERR(cb->dma_mr)) {
			log(LOG_ERR, "reg_dmamr failed\n");
			return PTR_ERR(cb->dma_mr);
		}
	} else {

		buf.addr = vtophys(&cb->recv_buf);
		buf.size = sizeof cb->recv_buf;
		iovbase = vtophys(&cb->recv_buf);
		cb->recv_mr = ib_reg_phys_mr(cb->pd, &buf, 1, 
					     IB_ACCESS_LOCAL_WRITE, 
					     &iovbase);

		if (IS_ERR(cb->recv_mr)) {
			log(LOG_ERR, "recv_buf reg_mr failed\n");
			return PTR_ERR(cb->recv_mr);
		}

		buf.addr = vtophys(&cb->send_buf);
		buf.size = sizeof cb->send_buf;
		iovbase = vtophys(&cb->send_buf);
		cb->send_mr = ib_reg_phys_mr(cb->pd, &buf, 1, 
					     0, &iovbase);

		if (IS_ERR(cb->send_mr)) {
			log(LOG_ERR, "send_buf reg_mr failed\n");
			ib_dereg_mr(cb->recv_mr);
			return PTR_ERR(cb->send_mr);
		}
	}

	cb->rdma_buf = contigmalloc(cb->size, M_DEVBUF, M_WAITOK, 0, -1UL,
		PAGE_SIZE, 0);

	if (!cb->rdma_buf) {
		log(LOG_ERR, "rdma_buf malloc failed\n");
		ret = ENOMEM;
		goto err1;
	}
	if (!cb->use_dmamr) {

		buf.addr = vtophys(cb->rdma_buf);
		buf.size = cb->size;
		iovbase = vtophys(cb->rdma_buf);
		cb->rdma_mr = ib_reg_phys_mr(cb->pd, &buf, 1, 
					     IB_ACCESS_REMOTE_READ| 
					     IB_ACCESS_REMOTE_WRITE, 
					     &iovbase);

		if (IS_ERR(cb->rdma_mr)) {
			log(LOG_ERR, "rdma_buf reg_mr failed\n");
			ret = PTR_ERR(cb->rdma_mr);
			goto err2;
		}
	}

	if (!cb->server || cb->wlat || cb->rlat || cb->bw) {
		cb->start_buf = contigmalloc(cb->size, M_DEVBUF, M_WAITOK,
			0, -1UL, PAGE_SIZE, 0);
		if (!cb->start_buf) {
			log(LOG_ERR, "start_buf malloc failed\n");
			ret = ENOMEM;
			goto err2;
		}
		if (!cb->use_dmamr) {
			unsigned flags = IB_ACCESS_REMOTE_READ;

			if (cb->wlat || cb->rlat || cb->bw) 
				flags |= IB_ACCESS_REMOTE_WRITE;
			buf.addr = vtophys(cb->start_buf);
			buf.size = cb->size;
			iovbase = vtophys(cb->start_buf);
			cb->start_mr = ib_reg_phys_mr(cb->pd, &buf, 1, 
					     flags,
					     &iovbase);

			if (IS_ERR(cb->start_mr)) {
				log(LOG_ERR, "start_buf reg_mr failed\n");
				ret = PTR_ERR(cb->start_mr);
				goto err3;
			}
		}
	}

	krping_setup_wr(cb);
	DEBUG_LOG(PFX "allocated & registered buffers...\n");
	return 0;
err3:
	contigfree(cb->start_buf, cb->size, M_DEVBUF);

	if (!cb->use_dmamr)
		ib_dereg_mr(cb->rdma_mr);
err2:
	contigfree(cb->rdma_buf, cb->size, M_DEVBUF);
err1:
	if (cb->use_dmamr)
		ib_dereg_mr(cb->dma_mr);
	else {
		ib_dereg_mr(cb->recv_mr);
		ib_dereg_mr(cb->send_mr);
	}
	return ret;
}
ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
			 const char __user *buf, int in_len,
			 int out_len)
{
	struct ib_uverbs_reg_mr      cmd;
	struct ib_uverbs_reg_mr_resp resp;
	struct ib_udata              udata;
	struct ib_umem_object       *obj;
	struct ib_pd                *pd;
	struct ib_mr                *mr;
	int                          ret;

	if (out_len < sizeof resp)
		return -ENOSPC;

	if (copy_from_user(&cmd, buf, sizeof cmd))
		return -EFAULT;

	INIT_UDATA(&udata, buf + sizeof cmd,
		   (unsigned long) cmd.response + sizeof resp,
		   in_len - sizeof cmd, out_len - sizeof resp);

	if ((cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK))
		return -EINVAL;

	/*
	 * Local write permission is required if remote write or
	 * remote atomic permission is also requested.
	 */
	if (cmd.access_flags & (IB_ACCESS_REMOTE_ATOMIC | IB_ACCESS_REMOTE_WRITE) &&
	    !(cmd.access_flags & IB_ACCESS_LOCAL_WRITE))
		return -EINVAL;

	obj = kmalloc(sizeof *obj, GFP_KERNEL);
	if (!obj)
		return -ENOMEM;

	obj->uobject.context = file->ucontext;

	/*
	 * We ask for writable memory if any access flags other than
	 * "remote read" are set.  "Local write" and "remote write"
	 * obviously require write access.  "Remote atomic" can do
	 * things like fetch and add, which will modify memory, and
	 * "MW bind" can change permissions by binding a window.
	 */
	ret = ib_umem_get(file->device->ib_dev, &obj->umem,
			  (void *) (unsigned long) cmd.start, cmd.length,
			  !!(cmd.access_flags & ~IB_ACCESS_REMOTE_READ));
	if (ret)
		goto err_free;

	obj->umem.virt_base = cmd.hca_va;

	mutex_lock(&ib_uverbs_idr_mutex);

	pd = idr_find(&ib_uverbs_pd_idr, cmd.pd_handle);
	if (!pd || pd->uobject->context != file->ucontext) {
		ret = -EINVAL;
		goto err_up;
	}

	if (!pd->device->reg_user_mr) {
		ret = -ENOSYS;
		goto err_up;
	}

	mr = pd->device->reg_user_mr(pd, &obj->umem, cmd.access_flags, &udata);
	if (IS_ERR(mr)) {
		ret = PTR_ERR(mr);
		goto err_up;
	}

	mr->device  = pd->device;
	mr->pd      = pd;
	mr->uobject = &obj->uobject;
	atomic_inc(&pd->usecnt);
	atomic_set(&mr->usecnt, 0);

	memset(&resp, 0, sizeof resp);
	resp.lkey = mr->lkey;
	resp.rkey = mr->rkey;

retry:
	if (!idr_pre_get(&ib_uverbs_mr_idr, GFP_KERNEL)) {
		ret = -ENOMEM;
		goto err_unreg;
	}

	ret = idr_get_new(&ib_uverbs_mr_idr, mr, &obj->uobject.id);

	if (ret == -EAGAIN)
		goto retry;
	if (ret)
		goto err_unreg;

	resp.mr_handle = obj->uobject.id;

	if (copy_to_user((void __user *) (unsigned long) cmd.response,
			 &resp, sizeof resp)) {
		ret = -EFAULT;
		goto err_idr;
	}

	mutex_lock(&file->mutex);
	list_add_tail(&obj->uobject.list, &file->ucontext->mr_list);
	mutex_unlock(&file->mutex);

	mutex_unlock(&ib_uverbs_idr_mutex);

	return in_len;

err_idr:
	idr_remove(&ib_uverbs_mr_idr, obj->uobject.id);

err_unreg:
	ib_dereg_mr(mr);
	atomic_dec(&pd->usecnt);

err_up:
	mutex_unlock(&ib_uverbs_idr_mutex);

	ib_umem_release(file->device->ib_dev, &obj->umem);

err_free:
	kfree(obj);
	return ret;
}
Esempio n. 29
0
/*
 * Unregister and destroy buffer memory. Need to deal with
 * partial initialization, so it's callable from failed create.
 * Must be called before destroying endpoint, as registrations
 * reference it.
 */
void
rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
{
	int rc, i;
	struct rpcrdma_ia *ia = rdmab_to_ia(buf);
	struct rpcrdma_mw *r;

	/* clean up in reverse order from create
	 *   1.  recv mr memory (mr free, then kfree)
	 *   1a. bind mw memory
	 *   2.  send mr memory (mr free, then kfree)
	 *   3.  padding (if any) [moved to rpcrdma_ep_destroy]
	 *   4.  arrays
	 */
	dprintk("RPC:       %s: entering\n", __func__);

	for (i = 0; i < buf->rb_max_requests; i++) {
		if (buf->rb_recv_bufs && buf->rb_recv_bufs[i]) {
			rpcrdma_deregister_internal(ia,
					buf->rb_recv_bufs[i]->rr_handle,
					&buf->rb_recv_bufs[i]->rr_iov);
			kfree(buf->rb_recv_bufs[i]);
		}
		if (buf->rb_send_bufs && buf->rb_send_bufs[i]) {
			while (!list_empty(&buf->rb_mws)) {
				r = list_entry(buf->rb_mws.next,
					struct rpcrdma_mw, mw_list);
				list_del(&r->mw_list);
				switch (ia->ri_memreg_strategy) {
				case RPCRDMA_FRMR:
					rc = ib_dereg_mr(r->r.frmr.fr_mr);
					if (rc)
						dprintk("RPC:       %s:"
							" ib_dereg_mr"
							" failed %i\n",
							__func__, rc);
					ib_free_fast_reg_page_list(r->r.frmr.fr_pgl);
					break;
				case RPCRDMA_MTHCAFMR:
					rc = ib_dealloc_fmr(r->r.fmr);
					if (rc)
						dprintk("RPC:       %s:"
							" ib_dealloc_fmr"
							" failed %i\n",
							__func__, rc);
					break;
				case RPCRDMA_MEMWINDOWS_ASYNC:
				case RPCRDMA_MEMWINDOWS:
					rc = ib_dealloc_mw(r->r.mw);
					if (rc)
						dprintk("RPC:       %s:"
							" ib_dealloc_mw"
							" failed %i\n",
							__func__, rc);
					break;
				default:
					break;
				}
			}
			rpcrdma_deregister_internal(ia,
					buf->rb_send_bufs[i]->rl_handle,
					&buf->rb_send_bufs[i]->rl_iov);
			kfree(buf->rb_send_bufs[i]);
		}
	}

	kfree(buf->rb_pool);
}
Esempio n. 30
0
/*
 * Create unconnected endpoint.
 */
int
rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
				struct rpcrdma_create_data_internal *cdata)
{
	struct ib_cq *sendcq, *recvcq;
	unsigned int max_qp_wr;
	int rc;

	if (ia->ri_device->attrs.max_sge < RPCRDMA_MAX_IOVS) {
		dprintk("RPC:       %s: insufficient sge's available\n",
			__func__);
		return -ENOMEM;
	}

	if (ia->ri_device->attrs.max_qp_wr <= RPCRDMA_BACKWARD_WRS) {
		dprintk("RPC:       %s: insufficient wqe's available\n",
			__func__);
		return -ENOMEM;
	}
	max_qp_wr = ia->ri_device->attrs.max_qp_wr - RPCRDMA_BACKWARD_WRS - 1;

	/* check provider's send/recv wr limits */
	if (cdata->max_requests > max_qp_wr)
		cdata->max_requests = max_qp_wr;

	ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall;
	ep->rep_attr.qp_context = ep;
	ep->rep_attr.srq = NULL;
	ep->rep_attr.cap.max_send_wr = cdata->max_requests;
	ep->rep_attr.cap.max_send_wr += RPCRDMA_BACKWARD_WRS;
	ep->rep_attr.cap.max_send_wr += 1;	/* drain cqe */
	rc = ia->ri_ops->ro_open(ia, ep, cdata);
	if (rc)
		return rc;
	ep->rep_attr.cap.max_recv_wr = cdata->max_requests;
	ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS;
	ep->rep_attr.cap.max_recv_wr += 1;	/* drain cqe */
	ep->rep_attr.cap.max_send_sge = RPCRDMA_MAX_IOVS;
	ep->rep_attr.cap.max_recv_sge = 1;
	ep->rep_attr.cap.max_inline_data = 0;
	ep->rep_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
	ep->rep_attr.qp_type = IB_QPT_RC;
	ep->rep_attr.port_num = ~0;

	dprintk("RPC:       %s: requested max: dtos: send %d recv %d; "
		"iovs: send %d recv %d\n",
		__func__,
		ep->rep_attr.cap.max_send_wr,
		ep->rep_attr.cap.max_recv_wr,
		ep->rep_attr.cap.max_send_sge,
		ep->rep_attr.cap.max_recv_sge);

	/* set trigger for requesting send completion */
	ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 - 1;
	if (ep->rep_cqinit <= 2)
		ep->rep_cqinit = 0;	/* always signal? */
	INIT_CQCOUNT(ep);
	init_waitqueue_head(&ep->rep_connect_wait);
	INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker);

	sendcq = ib_alloc_cq(ia->ri_device, NULL,
			     ep->rep_attr.cap.max_send_wr + 1,
			     0, IB_POLL_SOFTIRQ);
	if (IS_ERR(sendcq)) {
		rc = PTR_ERR(sendcq);
		dprintk("RPC:       %s: failed to create send CQ: %i\n",
			__func__, rc);
		goto out1;
	}

	recvcq = ib_alloc_cq(ia->ri_device, NULL,
			     ep->rep_attr.cap.max_recv_wr + 1,
			     0, IB_POLL_SOFTIRQ);
	if (IS_ERR(recvcq)) {
		rc = PTR_ERR(recvcq);
		dprintk("RPC:       %s: failed to create recv CQ: %i\n",
			__func__, rc);
		goto out2;
	}

	ep->rep_attr.send_cq = sendcq;
	ep->rep_attr.recv_cq = recvcq;

	/* Initialize cma parameters */
	memset(&ep->rep_remote_cma, 0, sizeof(ep->rep_remote_cma));

	/* RPC/RDMA does not use private data */
	ep->rep_remote_cma.private_data = NULL;
	ep->rep_remote_cma.private_data_len = 0;

	/* Client offers RDMA Read but does not initiate */
	ep->rep_remote_cma.initiator_depth = 0;
	if (ia->ri_device->attrs.max_qp_rd_atom > 32)	/* arbitrary but <= 255 */
		ep->rep_remote_cma.responder_resources = 32;
	else
		ep->rep_remote_cma.responder_resources =
						ia->ri_device->attrs.max_qp_rd_atom;

	/* Limit transport retries so client can detect server
	 * GID changes quickly. RPC layer handles re-establishing
	 * transport connection and retransmission.
	 */
	ep->rep_remote_cma.retry_count = 6;

	/* RPC-over-RDMA handles its own flow control. In addition,
	 * make all RNR NAKs visible so we know that RPC-over-RDMA
	 * flow control is working correctly (no NAKs should be seen).
	 */
	ep->rep_remote_cma.flow_control = 0;
	ep->rep_remote_cma.rnr_retry_count = 0;

	return 0;

out2:
	ib_free_cq(sendcq);
out1:
	if (ia->ri_dma_mr)
		ib_dereg_mr(ia->ri_dma_mr);
	return rc;
}