예제 #1
0
파일: verbs.c 프로젝트: 513855417/linux
/**
 * ib_alloc_pd - Allocates an unused protection domain.
 * @device: The device on which to allocate the protection domain.
 *
 * A protection domain object provides an association between QPs, shared
 * receive queues, address handles, memory regions, and memory windows.
 *
 * Every PD has a local_dma_lkey which can be used as the lkey value for local
 * memory operations.
 */
struct ib_pd *ib_alloc_pd(struct ib_device *device)
{
	struct ib_pd *pd;

	pd = device->alloc_pd(device, NULL, NULL);
	if (IS_ERR(pd))
		return pd;

	pd->device = device;
	pd->uobject = NULL;
	pd->local_mr = NULL;
	atomic_set(&pd->usecnt, 0);

	if (device->attrs.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)
		pd->local_dma_lkey = device->local_dma_lkey;
	else {
		struct ib_mr *mr;

		mr = ib_get_dma_mr(pd, IB_ACCESS_LOCAL_WRITE);
		if (IS_ERR(mr)) {
			ib_dealloc_pd(pd);
			return (struct ib_pd *)mr;
		}

		pd->local_mr = mr;
		pd->local_dma_lkey = pd->local_mr->lkey;
	}
	return pd;
}
예제 #2
0
/* it may done for device only (not per connection base) */
static int ib_sock_mem_init_common(struct IB_SOCK *sock)
{
	int 	ret;
	int	access_flags = IB_ACCESS_LOCAL_WRITE |
				IB_ACCESS_REMOTE_WRITE;

	/* create a protection domain. */
	sock->is_mem.ism_pd = ib_alloc_pd(sock->is_id->device);
	if (IS_ERR(sock->is_mem.ism_pd)) {
		printk("can't create a protection domain\n");
		ret = PTR_ERR(sock->is_mem.ism_pd);
		sock->is_mem.ism_pd = NULL;
		return ret;
	}

	sock->is_mem.ism_mr = ib_get_dma_mr(sock->is_mem.ism_pd, access_flags);
	if (IS_ERR(sock->is_mem.ism_mr)) {
		ret = PTR_ERR(sock->is_mem.ism_mr);
		sock->is_mem.ism_mr = NULL;
		printk("Failed ib_get_dma_mr : %d\n", ret);
		return ret;
	}

	return 0;
}
예제 #3
0
파일: iser_verbs.c 프로젝트: 710leo/LVS
/**
 * iser_create_device_ib_res - creates Protection Domain (PD), Completion
 * Queue (CQ), DMA Memory Region (DMA MR) with the device associated with
 * the adapator.
 *
 * returns 0 on success, -1 on failure
 */
static int iser_create_device_ib_res(struct iser_device *device)
{
	device->pd = ib_alloc_pd(device->ib_device);
	if (IS_ERR(device->pd))
		goto pd_err;

	device->rx_cq = ib_create_cq(device->ib_device,
				  iser_cq_callback,
				  iser_cq_event_callback,
				  (void *)device,
				  ISER_MAX_RX_CQ_LEN, 0);
	if (IS_ERR(device->rx_cq))
		goto rx_cq_err;

	device->tx_cq = ib_create_cq(device->ib_device,
				  NULL, iser_cq_event_callback,
				  (void *)device,
				  ISER_MAX_TX_CQ_LEN, 0);

	if (IS_ERR(device->tx_cq))
		goto tx_cq_err;

	if (ib_req_notify_cq(device->rx_cq, IB_CQ_NEXT_COMP))
		goto cq_arm_err;

	tasklet_init(&device->cq_tasklet,
		     iser_cq_tasklet_fn,
		     (unsigned long)device);

	device->mr = ib_get_dma_mr(device->pd, IB_ACCESS_LOCAL_WRITE |
				   IB_ACCESS_REMOTE_WRITE |
				   IB_ACCESS_REMOTE_READ);
	if (IS_ERR(device->mr))
		goto dma_mr_err;

	INIT_IB_EVENT_HANDLER(&device->event_handler, device->ib_device,
				iser_event_handler);
	if (ib_register_event_handler(&device->event_handler))
		goto handler_err;

	return 0;

handler_err:
	ib_dereg_mr(device->mr);
dma_mr_err:
	tasklet_kill(&device->cq_tasklet);
cq_arm_err:
	ib_destroy_cq(device->tx_cq);
tx_cq_err:
	ib_destroy_cq(device->rx_cq);
rx_cq_err:
	ib_dealloc_pd(device->pd);
pd_err:
	iser_err("failed to allocate an IB resource\n");
	return -1;
}
예제 #4
0
int fi_ib_mr_reg(struct fid *fid, const void *buf, size_t len,
		uint64_t access, uint64_t offset, uint64_t requested_key,
		uint64_t flags, struct fid_mr **mr, void *context)
{
	struct fid_domain	*domain = (struct fid_domain *)fid;
	struct fi_ib_mem_desc	*md;
	int			ret;

	print_trace("in\n");

	if (flags)
		return -FI_EBADFLAGS;

	md = kzalloc(sizeof(*md), GFP_KERNEL);
	if (!md) {
		print_err("kalloc failed!\n");
		return -FI_ENOMEM;
	}

	md->domain = (struct fi_ib_domain *) domain;

	md->mr_fid.fid.fclass = FI_CLASS_MR;
	md->mr_fid.fid.context = context;
	md->mr_fid.fid.ops = &fi_ib_mr_ops;

	md->mr = ib_get_dma_mr(md->domain->pd,
			       IB_ACCESS_LOCAL_WRITE
			       | IB_ACCESS_REMOTE_WRITE
			       | IB_ACCESS_REMOTE_READ);
	if (IS_ERR(md->mr)) {
		ret = PTR_ERR(md->mr);
		print_err("ib_get_dma_mr returned %d\n", ret);
		goto err;
	}

	md->mr_fid.mem_desc = (void *) (uintptr_t) md->mr->lkey;
	md->mr_fid.key = md->mr->rkey;

	*mr = &md->mr_fid;

	return 0;
err:
	kfree(md);

	return ret;
}
예제 #5
0
static int rdma_setup(rdma_ctx_t ctx)
{
    // create receive buffer
    ctx->rdma_recv_buffer = kmalloc(RDMA_BUFFER_SIZE, GFP_KERNEL);
    CHECK_MSG_RET(ctx->rdma_recv_buffer != 0, "Error kmalloc", -1);

    // create memory region
    ctx->mr = ib_get_dma_mr(ctx->pd, IB_ACCESS_REMOTE_READ | 
                                     IB_ACCESS_REMOTE_WRITE | 
                                     IB_ACCESS_LOCAL_WRITE);
    CHECK_MSG_RET(ctx->mr != 0, "Error creating MR", -1);

    ctx->rkey = ctx->mr->rkey;

    // get dma_addr
    ctx->dma_addr = ib_dma_map_single(rdma_ib_device.dev, ctx->rdma_recv_buffer, 
            RDMA_BUFFER_SIZE, DMA_BIDIRECTIONAL);
    CHECK_MSG_RET(ib_dma_mapping_error(rdma_ib_device.dev, ctx->dma_addr) == 0,
            "Error ib_dma_map_single", -1);

    // modify QP until RTS
    modify_qp(ctx);
    return 0;
}
예제 #6
0
파일: verbs.c 프로젝트: MaxChina/linux
/*
 * Open and initialize an Interface Adapter.
 *  o initializes fields of struct rpcrdma_ia, including
 *    interface and provider attributes and protection zone.
 */
int
rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
{
	int rc, mem_priv;
	struct ib_device_attr devattr;
	struct rpcrdma_ia *ia = &xprt->rx_ia;

	ia->ri_id = rpcrdma_create_id(xprt, ia, addr);
	if (IS_ERR(ia->ri_id)) {
		rc = PTR_ERR(ia->ri_id);
		goto out1;
	}

	ia->ri_pd = ib_alloc_pd(ia->ri_id->device);
	if (IS_ERR(ia->ri_pd)) {
		rc = PTR_ERR(ia->ri_pd);
		dprintk("RPC:       %s: ib_alloc_pd() failed %i\n",
			__func__, rc);
		goto out2;
	}

	/*
	 * Query the device to determine if the requested memory
	 * registration strategy is supported. If it isn't, set the
	 * strategy to a globally supported model.
	 */
	rc = ib_query_device(ia->ri_id->device, &devattr);
	if (rc) {
		dprintk("RPC:       %s: ib_query_device failed %d\n",
			__func__, rc);
		goto out2;
	}

	if (devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) {
		ia->ri_have_dma_lkey = 1;
		ia->ri_dma_lkey = ia->ri_id->device->local_dma_lkey;
	}

	if (memreg == RPCRDMA_FRMR) {
		/* Requires both frmr reg and local dma lkey */
		if ((devattr.device_cap_flags &
		     (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) !=
		    (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) {
			dprintk("RPC:       %s: FRMR registration "
				"not supported by HCA\n", __func__);
			memreg = RPCRDMA_MTHCAFMR;
		} else {
			/* Mind the ia limit on FRMR page list depth */
			ia->ri_max_frmr_depth = min_t(unsigned int,
				RPCRDMA_MAX_DATA_SEGS,
				devattr.max_fast_reg_page_list_len);
		}
	}
	if (memreg == RPCRDMA_MTHCAFMR) {
		if (!ia->ri_id->device->alloc_fmr) {
			dprintk("RPC:       %s: MTHCAFMR registration "
				"not supported by HCA\n", __func__);
#if RPCRDMA_PERSISTENT_REGISTRATION
			memreg = RPCRDMA_ALLPHYSICAL;
#else
			rc = -ENOMEM;
			goto out2;
#endif
		}
	}

	/*
	 * Optionally obtain an underlying physical identity mapping in
	 * order to do a memory window-based bind. This base registration
	 * is protected from remote access - that is enabled only by binding
	 * for the specific bytes targeted during each RPC operation, and
	 * revoked after the corresponding completion similar to a storage
	 * adapter.
	 */
	switch (memreg) {
	case RPCRDMA_FRMR:
		break;
#if RPCRDMA_PERSISTENT_REGISTRATION
	case RPCRDMA_ALLPHYSICAL:
		mem_priv = IB_ACCESS_LOCAL_WRITE |
				IB_ACCESS_REMOTE_WRITE |
				IB_ACCESS_REMOTE_READ;
		goto register_setup;
#endif
	case RPCRDMA_MTHCAFMR:
		if (ia->ri_have_dma_lkey)
			break;
		mem_priv = IB_ACCESS_LOCAL_WRITE;
#if RPCRDMA_PERSISTENT_REGISTRATION
	register_setup:
#endif
		ia->ri_bind_mem = ib_get_dma_mr(ia->ri_pd, mem_priv);
		if (IS_ERR(ia->ri_bind_mem)) {
			printk(KERN_ALERT "%s: ib_get_dma_mr for "
				"phys register failed with %lX\n",
				__func__, PTR_ERR(ia->ri_bind_mem));
			rc = -ENOMEM;
			goto out2;
		}
		break;
	default:
		printk(KERN_ERR "RPC: Unsupported memory "
				"registration mode: %d\n", memreg);
		rc = -ENOMEM;
		goto out2;
	}
	dprintk("RPC:       %s: memory registration strategy is %d\n",
		__func__, memreg);

	/* Else will do memory reg/dereg for each chunk */
	ia->ri_memreg_strategy = memreg;

	return 0;
out2:
	rdma_destroy_id(ia->ri_id);
	ia->ri_id = NULL;
out1:
	return rc;
}
예제 #7
0
static int p9_rdma_bind_privport(struct p9_trans_rdma *rdma)
{
	struct sockaddr_in cl = {
		.sin_family = AF_INET,
		.sin_addr.s_addr = htonl(INADDR_ANY),
	};
	int port, err = -EINVAL;

	for (port = P9_DEF_MAX_RESVPORT; port >= P9_DEF_MIN_RESVPORT; port--) {
		cl.sin_port = htons((ushort)port);
		err = rdma_bind_addr(rdma->cm_id, (struct sockaddr *)&cl);
		if (err != -EADDRINUSE)
			break;
	}
	return err;
}

/**
 * trans_create_rdma - Transport method for creating atransport instance
 * @client: client instance
 * @addr: IP address string
 * @args: Mount options string
 */
static int
rdma_create_trans(struct p9_client *client, const char *addr, char *args)
{
	int err;
	struct p9_rdma_opts opts;
	struct p9_trans_rdma *rdma;
	struct rdma_conn_param conn_param;
	struct ib_qp_init_attr qp_attr;
	struct ib_device_attr devattr;
	struct ib_cq_init_attr cq_attr = {};

	/* Parse the transport specific mount options */
	err = parse_opts(args, &opts);
	if (err < 0)
		return err;

	/* Create and initialize the RDMA transport structure */
	rdma = alloc_rdma(&opts);
	if (!rdma)
		return -ENOMEM;

	/* Create the RDMA CM ID */
	rdma->cm_id = rdma_create_id(p9_cm_event_handler, client, RDMA_PS_TCP,
				     IB_QPT_RC);
	if (IS_ERR(rdma->cm_id))
		goto error;

	/* Associate the client with the transport */
	client->trans = rdma;

	/* Bind to a privileged port if we need to */
	if (opts.privport) {
		err = p9_rdma_bind_privport(rdma);
		if (err < 0) {
			pr_err("%s (%d): problem binding to privport: %d\n",
			       __func__, task_pid_nr(current), -err);
			goto error;
		}
	}

	/* Resolve the server's address */
	rdma->addr.sin_family = AF_INET;
	rdma->addr.sin_addr.s_addr = in_aton(addr);
	rdma->addr.sin_port = htons(opts.port);
	err = rdma_resolve_addr(rdma->cm_id, NULL,
				(struct sockaddr *)&rdma->addr,
				rdma->timeout);
	if (err)
		goto error;
	err = wait_for_completion_interruptible(&rdma->cm_done);
	if (err || (rdma->state != P9_RDMA_ADDR_RESOLVED))
		goto error;

	/* Resolve the route to the server */
	err = rdma_resolve_route(rdma->cm_id, rdma->timeout);
	if (err)
		goto error;
	err = wait_for_completion_interruptible(&rdma->cm_done);
	if (err || (rdma->state != P9_RDMA_ROUTE_RESOLVED))
		goto error;

	/* Query the device attributes */
	err = ib_query_device(rdma->cm_id->device, &devattr);
	if (err)
		goto error;

	/* Create the Completion Queue */
	cq_attr.cqe = opts.sq_depth + opts.rq_depth + 1;
	rdma->cq = ib_create_cq(rdma->cm_id->device, cq_comp_handler,
				cq_event_handler, client,
				&cq_attr);
	if (IS_ERR(rdma->cq))
		goto error;
	ib_req_notify_cq(rdma->cq, IB_CQ_NEXT_COMP);

	/* Create the Protection Domain */
	rdma->pd = ib_alloc_pd(rdma->cm_id->device);
	if (IS_ERR(rdma->pd))
		goto error;

	/* Cache the DMA lkey in the transport */
	rdma->dma_mr = NULL;
	if (devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)
		rdma->lkey = rdma->cm_id->device->local_dma_lkey;
	else {
		rdma->dma_mr = ib_get_dma_mr(rdma->pd, IB_ACCESS_LOCAL_WRITE);
		if (IS_ERR(rdma->dma_mr))
			goto error;
		rdma->lkey = rdma->dma_mr->lkey;
	}

	/* Create the Queue Pair */
	memset(&qp_attr, 0, sizeof qp_attr);
	qp_attr.event_handler = qp_event_handler;
	qp_attr.qp_context = client;
	qp_attr.cap.max_send_wr = opts.sq_depth;
	qp_attr.cap.max_recv_wr = opts.rq_depth;
	qp_attr.cap.max_send_sge = P9_RDMA_SEND_SGE;
	qp_attr.cap.max_recv_sge = P9_RDMA_RECV_SGE;
	qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
	qp_attr.qp_type = IB_QPT_RC;
	qp_attr.send_cq = rdma->cq;
	qp_attr.recv_cq = rdma->cq;
	err = rdma_create_qp(rdma->cm_id, rdma->pd, &qp_attr);
	if (err)
		goto error;
	rdma->qp = rdma->cm_id->qp;

	/* Request a connection */
	memset(&conn_param, 0, sizeof(conn_param));
	conn_param.private_data = NULL;
	conn_param.private_data_len = 0;
	conn_param.responder_resources = P9_RDMA_IRD;
	conn_param.initiator_depth = P9_RDMA_ORD;
	err = rdma_connect(rdma->cm_id, &conn_param);
	if (err)
		goto error;
	err = wait_for_completion_interruptible(&rdma->cm_done);
	if (err || (rdma->state != P9_RDMA_CONNECTED))
		goto error;

	client->status = Connected;

	return 0;

error:
	rdma_destroy_trans(rdma);
	return -ENOTCONN;
}
예제 #8
0
/**
 * trans_create_rdma - Transport method for creating atransport instance
 * @client: client instance
 * @addr: IP address string
 * @args: Mount options string
 */
static int
rdma_create_trans(struct p9_client *client, const char *addr, char *args)
{
	int err;
	struct p9_rdma_opts opts;
	struct p9_trans_rdma *rdma;
	struct rdma_conn_param conn_param;
	struct ib_qp_init_attr qp_attr;
	struct ib_device_attr devattr;

	/* Parse the transport specific mount options */
	err = parse_opts(args, &opts);
	if (err < 0)
		return err;

	/* Create and initialize the RDMA transport structure */
	rdma = alloc_rdma(&opts);
	if (!rdma)
		return -ENOMEM;

	/* Create the RDMA CM ID */
	rdma->cm_id = rdma_create_id(p9_cm_event_handler, client, RDMA_PS_TCP);
	if (IS_ERR(rdma->cm_id))
		goto error;

	/* Associate the client with the transport */
	client->trans = rdma;

	/* Resolve the server's address */
	rdma->addr.sin_family = AF_INET;
	rdma->addr.sin_addr.s_addr = in_aton(addr);
	rdma->addr.sin_port = htons(opts.port);
	err = rdma_resolve_addr(rdma->cm_id, NULL,
				(struct sockaddr *)&rdma->addr,
				rdma->timeout);
	if (err)
		goto error;
	err = wait_for_completion_interruptible(&rdma->cm_done);
	if (err || (rdma->state != P9_RDMA_ADDR_RESOLVED))
		goto error;

	/* Resolve the route to the server */
	err = rdma_resolve_route(rdma->cm_id, rdma->timeout);
	if (err)
		goto error;
	err = wait_for_completion_interruptible(&rdma->cm_done);
	if (err || (rdma->state != P9_RDMA_ROUTE_RESOLVED))
		goto error;

	/* Query the device attributes */
	err = ib_query_device(rdma->cm_id->device, &devattr);
	if (err)
		goto error;

	/* Create the Completion Queue */
	rdma->cq = ib_create_cq(rdma->cm_id->device, cq_comp_handler,
				cq_event_handler, client,
				opts.sq_depth + opts.rq_depth + 1, 0);
	if (IS_ERR(rdma->cq))
		goto error;
	ib_req_notify_cq(rdma->cq, IB_CQ_NEXT_COMP);

	/* Create the Protection Domain */
	rdma->pd = ib_alloc_pd(rdma->cm_id->device);
	if (IS_ERR(rdma->pd))
		goto error;

	/* Cache the DMA lkey in the transport */
	rdma->dma_mr = NULL;
	if (devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)
		rdma->lkey = rdma->cm_id->device->local_dma_lkey;
	else {
		rdma->dma_mr = ib_get_dma_mr(rdma->pd, IB_ACCESS_LOCAL_WRITE);
		if (IS_ERR(rdma->dma_mr))
			goto error;
		rdma->lkey = rdma->dma_mr->lkey;
	}

	/* Create the Queue Pair */
	memset(&qp_attr, 0, sizeof qp_attr);
	qp_attr.event_handler = qp_event_handler;
	qp_attr.qp_context = client;
	qp_attr.cap.max_send_wr = opts.sq_depth;
	qp_attr.cap.max_recv_wr = opts.rq_depth;
	qp_attr.cap.max_send_sge = P9_RDMA_SEND_SGE;
	qp_attr.cap.max_recv_sge = P9_RDMA_RECV_SGE;
	qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
	qp_attr.qp_type = IB_QPT_RC;
	qp_attr.send_cq = rdma->cq;
	qp_attr.recv_cq = rdma->cq;
	err = rdma_create_qp(rdma->cm_id, rdma->pd, &qp_attr);
	if (err)
		goto error;
	rdma->qp = rdma->cm_id->qp;

	/* Request a connection */
	memset(&conn_param, 0, sizeof(conn_param));
	conn_param.private_data = NULL;
	conn_param.private_data_len = 0;
	conn_param.responder_resources = P9_RDMA_IRD;
	conn_param.initiator_depth = P9_RDMA_ORD;
	err = rdma_connect(rdma->cm_id, &conn_param);
	if (err)
		goto error;
	err = wait_for_completion_interruptible(&rdma->cm_done);
	if (err || (rdma->state != P9_RDMA_CONNECTED))
		goto error;

	client->status = Connected;

	return 0;

error:
	rdma_destroy_trans(rdma);
	return -ENOTCONN;
}
예제 #9
0
/* A vanilla 2.6.19 or older kernel without backported OFED kernel headers. */
static void isert_cq_comp_work_cb(void *ctx)
{
	struct isert_cq *cq_desc = ctx;
#else
static void isert_cq_comp_work_cb(struct work_struct *work)
{
	struct isert_cq *cq_desc =
		container_of(work, struct isert_cq, cq_comp_work);
#endif
	int ret;

	TRACE_ENTRY();

	ret = isert_poll_cq(cq_desc);
	if (unlikely(ret < 0)) { /* poll error */
		pr_err("ib_poll_cq failed\n");
		goto out;
	}

	ib_req_notify_cq(cq_desc->cq,
			 IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
	/*
	 * not all HCAs support IB_CQ_REPORT_MISSED_EVENTS,
	 * so we need to make sure we don't miss any events between
	 * last call to ib_poll_cq() and ib_req_notify_cq()
	 */
	isert_poll_cq(cq_desc);

out:
	TRACE_EXIT();
	return;
}

static void isert_cq_comp_handler(struct ib_cq *cq, void *context)
{
	struct isert_cq *cq_desc = context;

#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20)
	queue_work(cq_desc->cq_workqueue, &cq_desc->cq_comp_work);
#else
	queue_work_on(smp_processor_id(), cq_desc->cq_workqueue,
		      &cq_desc->cq_comp_work);
#endif
}

static const char *ib_event_type_str(enum ib_event_type ev_type)
{
	switch (ev_type) {
	case IB_EVENT_COMM_EST:
		return "COMM_EST";
	case IB_EVENT_QP_FATAL:
		return "QP_FATAL";
	case IB_EVENT_QP_REQ_ERR:
		return "QP_REQ_ERR";
	case IB_EVENT_QP_ACCESS_ERR:
		return "QP_ACCESS_ERR";
	case IB_EVENT_SQ_DRAINED:
		return "SQ_DRAINED";
	case IB_EVENT_PATH_MIG:
		return "PATH_MIG";
	case IB_EVENT_PATH_MIG_ERR:
		return "PATH_MIG_ERR";
	case IB_EVENT_QP_LAST_WQE_REACHED:
		return "QP_LAST_WQE_REACHED";
	case IB_EVENT_CQ_ERR:
		return "CQ_ERR";
	case IB_EVENT_SRQ_ERR:
		return "SRQ_ERR";
	case IB_EVENT_SRQ_LIMIT_REACHED:
		return "SRQ_LIMIT_REACHED";
	case IB_EVENT_PORT_ACTIVE:
		return "PORT_ACTIVE";
	case IB_EVENT_PORT_ERR:
		return "PORT_ERR";
	case IB_EVENT_LID_CHANGE:
		return "LID_CHANGE";
	case IB_EVENT_PKEY_CHANGE:
		return "PKEY_CHANGE";
	case IB_EVENT_SM_CHANGE:
		return "SM_CHANGE";
	case IB_EVENT_CLIENT_REREGISTER:
		return "CLIENT_REREGISTER";
	case IB_EVENT_DEVICE_FATAL:
		return "DEVICE_FATAL";
	default:
		return "UNKNOWN";
	}
}

static void isert_async_evt_handler(struct ib_event *async_ev, void *context)
{
	struct isert_cq *cq = context;
	struct isert_device *isert_dev = cq->dev;
	struct ib_device *ib_dev = isert_dev->ib_dev;
	char *dev_name = ib_dev->name;
	enum ib_event_type ev_type = async_ev->event;
	struct isert_connection *isert_conn;

	TRACE_ENTRY();

	switch (ev_type) {
	case IB_EVENT_COMM_EST:
		isert_conn = async_ev->element.qp->qp_context;
		pr_info("conn:0x%p cm_id:0x%p dev:%s, QP evt: %s\n",
			isert_conn, isert_conn->cm_id, dev_name,
			ib_event_type_str(IB_EVENT_COMM_EST));
		/* force "connection established" event */
		rdma_notify(isert_conn->cm_id, IB_EVENT_COMM_EST);
		break;

	/* rest of QP-related events */
	case IB_EVENT_QP_FATAL:
	case IB_EVENT_QP_REQ_ERR:
	case IB_EVENT_QP_ACCESS_ERR:
	case IB_EVENT_SQ_DRAINED:
	case IB_EVENT_PATH_MIG:
	case IB_EVENT_PATH_MIG_ERR:
	case IB_EVENT_QP_LAST_WQE_REACHED:
		isert_conn = async_ev->element.qp->qp_context;
		pr_err("conn:0x%p cm_id:0x%p dev:%s, QP evt: %s\n",
		       isert_conn, isert_conn->cm_id, dev_name,
		       ib_event_type_str(ev_type));
		break;

	/* CQ-related events */
	case IB_EVENT_CQ_ERR:
		pr_err("dev:%s CQ evt: %s\n", dev_name,
		       ib_event_type_str(ev_type));
		break;

	/* SRQ events */
	case IB_EVENT_SRQ_ERR:
	case IB_EVENT_SRQ_LIMIT_REACHED:
		pr_err("dev:%s SRQ evt: %s\n", dev_name,
		       ib_event_type_str(ev_type));
		break;

	/* Port events */
	case IB_EVENT_PORT_ACTIVE:
	case IB_EVENT_PORT_ERR:
	case IB_EVENT_LID_CHANGE:
	case IB_EVENT_PKEY_CHANGE:
	case IB_EVENT_SM_CHANGE:
	case IB_EVENT_CLIENT_REREGISTER:
		pr_err("dev:%s port:%d evt: %s\n",
		       dev_name, async_ev->element.port_num,
		       ib_event_type_str(ev_type));
		break;

	/* HCA events */
	case IB_EVENT_DEVICE_FATAL:
		pr_err("dev:%s HCA evt: %s\n", dev_name,
		       ib_event_type_str(ev_type));
		break;

	default:
		pr_err("dev:%s evt: %s\n", dev_name,
		       ib_event_type_str(ev_type));
		break;
	}

	TRACE_EXIT();
}

static struct isert_device *isert_device_create(struct ib_device *ib_dev)
{
	struct isert_device *isert_dev;
	struct ib_device_attr *dev_attr;
	int cqe_num, err;
	struct ib_pd *pd;
	struct ib_mr *mr;
	struct ib_cq *cq;
	char wq_name[64];
	int i, j;

	TRACE_ENTRY();

	isert_dev = kzalloc(sizeof(*isert_dev), GFP_KERNEL);
	if (unlikely(isert_dev == NULL)) {
		pr_err("Failed to allocate iser dev\n");
		err = -ENOMEM;
		goto out;
	}

	dev_attr = &isert_dev->device_attr;
	err = ib_query_device(ib_dev, dev_attr);
	if (unlikely(err)) {
		pr_err("Failed to query device, err: %d\n", err);
		goto fail_query;
	}

	isert_dev->num_cqs = min_t(int, num_online_cpus(),
				   ib_dev->num_comp_vectors);

	isert_dev->cq_qps = kzalloc(sizeof(*isert_dev->cq_qps) * isert_dev->num_cqs,
				    GFP_KERNEL);
	if (unlikely(isert_dev->cq_qps == NULL)) {
		pr_err("Failed to allocate iser cq_qps\n");
		err = -ENOMEM;
		goto fail_cq_qps;
	}

	isert_dev->cq_desc = vmalloc(sizeof(*isert_dev->cq_desc) * isert_dev->num_cqs);
	if (unlikely(isert_dev->cq_desc == NULL)) {
		pr_err("Failed to allocate %ld bytes for iser cq_desc\n",
		       sizeof(*isert_dev->cq_desc) * isert_dev->num_cqs);
		err = -ENOMEM;
		goto fail_alloc_cq_desc;
	}

	pd = ib_alloc_pd(ib_dev);
	if (unlikely(IS_ERR(pd))) {
		err = PTR_ERR(pd);
		pr_err("Failed to alloc iser dev pd, err:%d\n", err);
		goto fail_pd;
	}

	mr = ib_get_dma_mr(pd, IB_ACCESS_LOCAL_WRITE);
	if (unlikely(IS_ERR(mr))) {
		err = PTR_ERR(mr);
		pr_err("Failed to get dma mr, err: %d\n", err);
		goto fail_mr;
	}

	cqe_num = min(isert_dev->device_attr.max_cqe, ISER_CQ_ENTRIES);
	cqe_num = cqe_num / isert_dev->num_cqs;

#ifdef CONFIG_SCST_EXTRACHECKS
	if (isert_dev->device_attr.max_cqe == 0)
		pr_err("Zero max_cqe encountered: you may have a compilation problem\n");
#endif

	for (i = 0; i < isert_dev->num_cqs; ++i) {
		struct isert_cq *cq_desc = &isert_dev->cq_desc[i];

		cq_desc->dev = isert_dev;
		cq_desc->idx = i;
#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20)
		INIT_WORK(&cq_desc->cq_comp_work, isert_cq_comp_work_cb, NULL);
#else
		INIT_WORK(&cq_desc->cq_comp_work, isert_cq_comp_work_cb);
#endif

		snprintf(wq_name, sizeof(wq_name), "isert_cq_%p", cq_desc);
#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 36)
		cq_desc->cq_workqueue = create_singlethread_workqueue(wq_name);
#else
#if LINUX_VERSION_CODE == KERNEL_VERSION(2, 6, 36)
		cq_desc->cq_workqueue = alloc_workqueue(wq_name,
							WQ_CPU_INTENSIVE|
							WQ_RESCUER, 1);
#else
		cq_desc->cq_workqueue = alloc_workqueue(wq_name,
							WQ_CPU_INTENSIVE|
							WQ_MEM_RECLAIM, 1);
#endif
#endif
		if (unlikely(!cq_desc->cq_workqueue)) {
			pr_err("Failed to alloc iser cq work queue for dev:%s\n",
			       ib_dev->name);
			err = -ENOMEM;
			goto fail_cq;
		}

#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 2, 0)
		cq = ib_create_cq(ib_dev,
				  isert_cq_comp_handler,
				  isert_async_evt_handler,
				  cq_desc, /* context */
				  cqe_num,
				  i); /* completion vector */
#else
		{
		struct ib_cq_init_attr ia = {
			.cqe		 = cqe_num,
			.comp_vector	 = i,
		};
		cq = ib_create_cq(ib_dev,
				  isert_cq_comp_handler,
				  isert_async_evt_handler,
				  cq_desc, /* context */
				  &ia);
		}
#endif
		if (unlikely(IS_ERR(cq))) {
			cq_desc->cq = NULL;
			err = PTR_ERR(cq);
			pr_err("Failed to create iser dev cq, err:%d\n", err);
			goto fail_cq;
		}

		cq_desc->cq = cq;
		err = ib_req_notify_cq(cq, IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
		if (unlikely(err)) {
			pr_err("Failed to request notify cq, err: %d\n", err);
			goto fail_cq;
		}
	}

	isert_dev->ib_dev = ib_dev;
	isert_dev->pd = pd;
	isert_dev->mr = mr;

	INIT_LIST_HEAD(&isert_dev->conn_list);

	lockdep_assert_held(&dev_list_mutex);

	isert_dev_list_add(isert_dev);

	pr_info("iser created device:%p\n", isert_dev);
	return isert_dev;

fail_cq:
	for (j = 0; j <= i; ++j) {
		if (isert_dev->cq_desc[j].cq)
			ib_destroy_cq(isert_dev->cq_desc[j].cq);
		if (isert_dev->cq_desc[j].cq_workqueue)
			destroy_workqueue(isert_dev->cq_desc[j].cq_workqueue);
	}
	ib_dereg_mr(mr);
fail_mr:
	ib_dealloc_pd(pd);
fail_pd:
	vfree(isert_dev->cq_desc);
fail_alloc_cq_desc:
	kfree(isert_dev->cq_qps);
fail_cq_qps:
fail_query:
	kfree(isert_dev);
out:
	TRACE_EXIT_RES(err);
	return ERR_PTR(err);
}

static void isert_device_release(struct isert_device *isert_dev)
{
	int err, i;

	TRACE_ENTRY();

	lockdep_assert_held(&dev_list_mutex);

	isert_dev_list_remove(isert_dev); /* remove from global list */

	for (i = 0; i < isert_dev->num_cqs; ++i) {
		struct isert_cq *cq_desc = &isert_dev->cq_desc[i];

#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 22)
		/*
		 * cancel_work_sync() was introduced in 2.6.22. We can
		 * only wait until all scheduled work is done.
		 */
		flush_workqueue(cq_desc->cq_workqueue);
#else
		cancel_work_sync(&cq_desc->cq_comp_work);
#endif

		err = ib_destroy_cq(cq_desc->cq);
		if (unlikely(err))
			pr_err("Failed to destroy cq, err:%d\n", err);

		destroy_workqueue(cq_desc->cq_workqueue);
	}

	err = ib_dereg_mr(isert_dev->mr);
	if (unlikely(err))
		pr_err("Failed to destroy mr, err:%d\n", err);
	err = ib_dealloc_pd(isert_dev->pd);
	if (unlikely(err))
		pr_err("Failed to destroy pd, err:%d\n", err);

	vfree(isert_dev->cq_desc);
	isert_dev->cq_desc = NULL;

	kfree(isert_dev->cq_qps);
	isert_dev->cq_qps = NULL;

	kfree(isert_dev);

	TRACE_EXIT();
}
예제 #10
0
/*
 * Open and initialize an Interface Adapter.
 *  o initializes fields of struct rpcrdma_ia, including
 *    interface and provider attributes and protection zone.
 */
int
rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
{
	int rc, mem_priv;
	struct ib_device_attr devattr;
	struct rpcrdma_ia *ia = &xprt->rx_ia;

	ia->ri_id = rpcrdma_create_id(xprt, ia, addr);
	if (IS_ERR(ia->ri_id)) {
		rc = PTR_ERR(ia->ri_id);
		goto out1;
	}

	ia->ri_pd = ib_alloc_pd(ia->ri_id->device);
	if (IS_ERR(ia->ri_pd)) {
		rc = PTR_ERR(ia->ri_pd);
		dprintk("RPC:       %s: ib_alloc_pd() failed %i\n",
			__func__, rc);
		goto out2;
	}

	/*
	 * Query the device to determine if the requested memory
	 * registration strategy is supported. If it isn't, set the
	 * strategy to a globally supported model.
	 */
	rc = ib_query_device(ia->ri_id->device, &devattr);
	if (rc) {
		dprintk("RPC:       %s: ib_query_device failed %d\n",
			__func__, rc);
		goto out2;
	}

	if (devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) {
		ia->ri_have_dma_lkey = 1;
		ia->ri_dma_lkey = ia->ri_id->device->local_dma_lkey;
	}

	switch (memreg) {
	case RPCRDMA_MEMWINDOWS:
	case RPCRDMA_MEMWINDOWS_ASYNC:
		if (!(devattr.device_cap_flags & IB_DEVICE_MEM_WINDOW)) {
			dprintk("RPC:       %s: MEMWINDOWS registration "
				"specified but not supported by adapter, "
				"using slower RPCRDMA_REGISTER\n",
				__func__);
			memreg = RPCRDMA_REGISTER;
		}
		break;
	case RPCRDMA_MTHCAFMR:
		if (!ia->ri_id->device->alloc_fmr) {
#if RPCRDMA_PERSISTENT_REGISTRATION
			dprintk("RPC:       %s: MTHCAFMR registration "
				"specified but not supported by adapter, "
				"using riskier RPCRDMA_ALLPHYSICAL\n",
				__func__);
			memreg = RPCRDMA_ALLPHYSICAL;
#else
			dprintk("RPC:       %s: MTHCAFMR registration "
				"specified but not supported by adapter, "
				"using slower RPCRDMA_REGISTER\n",
				__func__);
			memreg = RPCRDMA_REGISTER;
#endif
		}
		break;
	case RPCRDMA_FRMR:
		/* Requires both frmr reg and local dma lkey */
		if ((devattr.device_cap_flags &
		     (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) !=
		    (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) {
#if RPCRDMA_PERSISTENT_REGISTRATION
			dprintk("RPC:       %s: FRMR registration "
				"specified but not supported by adapter, "
				"using riskier RPCRDMA_ALLPHYSICAL\n",
				__func__);
			memreg = RPCRDMA_ALLPHYSICAL;
#else
			dprintk("RPC:       %s: FRMR registration "
				"specified but not supported by adapter, "
				"using slower RPCRDMA_REGISTER\n",
				__func__);
			memreg = RPCRDMA_REGISTER;
#endif
		}
		break;
	}

	/*
	 * Optionally obtain an underlying physical identity mapping in
	 * order to do a memory window-based bind. This base registration
	 * is protected from remote access - that is enabled only by binding
	 * for the specific bytes targeted during each RPC operation, and
	 * revoked after the corresponding completion similar to a storage
	 * adapter.
	 */
	switch (memreg) {
	case RPCRDMA_BOUNCEBUFFERS:
	case RPCRDMA_REGISTER:
	case RPCRDMA_FRMR:
		break;
#if RPCRDMA_PERSISTENT_REGISTRATION
	case RPCRDMA_ALLPHYSICAL:
		mem_priv = IB_ACCESS_LOCAL_WRITE |
				IB_ACCESS_REMOTE_WRITE |
				IB_ACCESS_REMOTE_READ;
		goto register_setup;
#endif
	case RPCRDMA_MEMWINDOWS_ASYNC:
	case RPCRDMA_MEMWINDOWS:
		mem_priv = IB_ACCESS_LOCAL_WRITE |
				IB_ACCESS_MW_BIND;
		goto register_setup;
	case RPCRDMA_MTHCAFMR:
		if (ia->ri_have_dma_lkey)
			break;
		mem_priv = IB_ACCESS_LOCAL_WRITE;
	register_setup:
		ia->ri_bind_mem = ib_get_dma_mr(ia->ri_pd, mem_priv);
		if (IS_ERR(ia->ri_bind_mem)) {
			printk(KERN_ALERT "%s: ib_get_dma_mr for "
				"phys register failed with %lX\n\t"
				"Will continue with degraded performance\n",
				__func__, PTR_ERR(ia->ri_bind_mem));
			memreg = RPCRDMA_REGISTER;
			ia->ri_bind_mem = NULL;
		}
		break;
	default:
		printk(KERN_ERR "%s: invalid memory registration mode %d\n",
				__func__, memreg);
		rc = -EINVAL;
		goto out2;
	}
	dprintk("RPC:       %s: memory registration strategy is %d\n",
		__func__, memreg);

	/* Else will do memory reg/dereg for each chunk */
	ia->ri_memreg_strategy = memreg;

	return 0;
out2:
	rdma_destroy_id(ia->ri_id);
	ia->ri_id = NULL;
out1:
	return rc;
}
예제 #11
0
파일: verbs.c 프로젝트: 274914765/C
/*
 * Open and initialize an Interface Adapter.
 *  o initializes fields of struct rpcrdma_ia, including
 *    interface and provider attributes and protection zone.
 */
int
rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
{
    int rc;
    struct rpcrdma_ia *ia = &xprt->rx_ia;

    init_completion(&ia->ri_done);

    ia->ri_id = rpcrdma_create_id(xprt, ia, addr);
    if (IS_ERR(ia->ri_id)) {
        rc = PTR_ERR(ia->ri_id);
        goto out1;
    }

    ia->ri_pd = ib_alloc_pd(ia->ri_id->device);
    if (IS_ERR(ia->ri_pd)) {
        rc = PTR_ERR(ia->ri_pd);
        dprintk("RPC:       %s: ib_alloc_pd() failed %i\n",
            __func__, rc);
        goto out2;
    }

    /*
     * Optionally obtain an underlying physical identity mapping in
     * order to do a memory window-based bind. This base registration
     * is protected from remote access - that is enabled only by binding
     * for the specific bytes targeted during each RPC operation, and
     * revoked after the corresponding completion similar to a storage
     * adapter.
     */
    if (memreg > RPCRDMA_REGISTER) {
        int mem_priv = IB_ACCESS_LOCAL_WRITE;
        switch (memreg) {
#if RPCRDMA_PERSISTENT_REGISTRATION
        case RPCRDMA_ALLPHYSICAL:
            mem_priv |= IB_ACCESS_REMOTE_WRITE;
            mem_priv |= IB_ACCESS_REMOTE_READ;
            break;
#endif
        case RPCRDMA_MEMWINDOWS_ASYNC:
        case RPCRDMA_MEMWINDOWS:
            mem_priv |= IB_ACCESS_MW_BIND;
            break;
        default:
            break;
        }
        ia->ri_bind_mem = ib_get_dma_mr(ia->ri_pd, mem_priv);
        if (IS_ERR(ia->ri_bind_mem)) {
            printk(KERN_ALERT "%s: ib_get_dma_mr for "
                "phys register failed with %lX\n\t"
                "Will continue with degraded performance\n",
                __func__, PTR_ERR(ia->ri_bind_mem));
            memreg = RPCRDMA_REGISTER;
            ia->ri_bind_mem = NULL;
        }
    }

    /* Else will do memory reg/dereg for each chunk */
    ia->ri_memreg_strategy = memreg;

    return 0;
out2:
    rdma_destroy_id(ia->ri_id);
out1:
    return rc;
}
예제 #12
0
static int krping_setup_buffers(struct krping_cb *cb)
{
	int ret;
	struct ib_phys_buf buf;
	u64 iovbase;

	DEBUG_LOG(PFX "krping_setup_buffers called on cb %p\n", cb);

	if (cb->use_dmamr) {
		cb->dma_mr = ib_get_dma_mr(cb->pd, IB_ACCESS_LOCAL_WRITE|
					   IB_ACCESS_REMOTE_READ|
				           IB_ACCESS_REMOTE_WRITE);
		if (IS_ERR(cb->dma_mr)) {
			log(LOG_ERR, "reg_dmamr failed\n");
			return PTR_ERR(cb->dma_mr);
		}
	} else {

		buf.addr = vtophys(&cb->recv_buf);
		buf.size = sizeof cb->recv_buf;
		iovbase = vtophys(&cb->recv_buf);
		cb->recv_mr = ib_reg_phys_mr(cb->pd, &buf, 1, 
					     IB_ACCESS_LOCAL_WRITE, 
					     &iovbase);

		if (IS_ERR(cb->recv_mr)) {
			log(LOG_ERR, "recv_buf reg_mr failed\n");
			return PTR_ERR(cb->recv_mr);
		}

		buf.addr = vtophys(&cb->send_buf);
		buf.size = sizeof cb->send_buf;
		iovbase = vtophys(&cb->send_buf);
		cb->send_mr = ib_reg_phys_mr(cb->pd, &buf, 1, 
					     0, &iovbase);

		if (IS_ERR(cb->send_mr)) {
			log(LOG_ERR, "send_buf reg_mr failed\n");
			ib_dereg_mr(cb->recv_mr);
			return PTR_ERR(cb->send_mr);
		}
	}

	/* RNIC adapters have a limit upto which it can register physical memory
	 * If DMA-MR memory mode is set then normally driver registers maximum
	 * supported memory. After that if contigmalloc allocates memory beyond the
	 * specified RNIC limit then Krping may not work.
	 */
	if (cb->use_dmamr && cb->memlimit)
		cb->rdma_buf = contigmalloc(cb->size, M_DEVBUF, M_WAITOK, 0, cb->memlimit,
					    PAGE_SIZE, 0);
	else 
		cb->rdma_buf = contigmalloc(cb->size, M_DEVBUF, M_WAITOK, 0, -1UL,
					    PAGE_SIZE, 0);

	if (!cb->rdma_buf) {
		log(LOG_ERR, "rdma_buf malloc failed\n");
		ret = ENOMEM;
		goto err1;
	}
	if (!cb->use_dmamr) {

		buf.addr = vtophys(cb->rdma_buf);
		buf.size = cb->size;
		iovbase = vtophys(cb->rdma_buf);
		cb->rdma_mr = ib_reg_phys_mr(cb->pd, &buf, 1, 
					     IB_ACCESS_REMOTE_READ| 
					     IB_ACCESS_REMOTE_WRITE, 
					     &iovbase);

		if (IS_ERR(cb->rdma_mr)) {
			log(LOG_ERR, "rdma_buf reg_mr failed\n");
			ret = PTR_ERR(cb->rdma_mr);
			goto err2;
		}
	}

	if (!cb->server || cb->wlat || cb->rlat || cb->bw) {
		if (cb->use_dmamr && cb->memlimit)
			cb->start_buf = contigmalloc(cb->size, M_DEVBUF, M_WAITOK,
						     0, cb->memlimit, PAGE_SIZE, 0);
		else
			cb->start_buf = contigmalloc(cb->size, M_DEVBUF, M_WAITOK,
						     0, -1UL, PAGE_SIZE, 0);
		if (!cb->start_buf) {
			log(LOG_ERR, "start_buf malloc failed\n");
			ret = ENOMEM;
			goto err2;
		}
		if (!cb->use_dmamr) {
			unsigned flags = IB_ACCESS_REMOTE_READ;

			if (cb->wlat || cb->rlat || cb->bw) 
				flags |= IB_ACCESS_REMOTE_WRITE;
			buf.addr = vtophys(cb->start_buf);
			buf.size = cb->size;
			iovbase = vtophys(cb->start_buf);
			cb->start_mr = ib_reg_phys_mr(cb->pd, &buf, 1, 
					     flags,
					     &iovbase);

			if (IS_ERR(cb->start_mr)) {
				log(LOG_ERR, "start_buf reg_mr failed\n");
				ret = PTR_ERR(cb->start_mr);
				goto err3;
			}
		}
	}

	krping_setup_wr(cb);
	DEBUG_LOG(PFX "allocated & registered buffers...\n");
	return 0;
err3:
	contigfree(cb->start_buf, cb->size, M_DEVBUF);

	if (!cb->use_dmamr)
		ib_dereg_mr(cb->rdma_mr);
err2:
	contigfree(cb->rdma_buf, cb->size, M_DEVBUF);
err1:
	if (cb->use_dmamr)
		ib_dereg_mr(cb->dma_mr);
	else {
		ib_dereg_mr(cb->recv_mr);
		ib_dereg_mr(cb->send_mr);
	}
	return ret;
}
예제 #13
0
파일: iser_rdma.c 프로젝트: qtsky89/scst
static struct isert_device *isert_device_create(struct ib_device *ib_dev)
{
	struct isert_device *isert_dev;
	struct ib_device_attr *dev_attr;
	int cqe_num, err;
	struct ib_pd *pd;
	struct ib_mr *mr;
	struct ib_cq *cq;
	char wq_name[64];
	int i, j;

	TRACE_ENTRY();

	isert_dev = kzalloc(sizeof(*isert_dev), GFP_KERNEL);
	if (unlikely(isert_dev == NULL)) {
		pr_err("Failed to allocate iser dev\n");
		err = -ENOMEM;
		goto out;
	}

	dev_attr = &isert_dev->device_attr;
	err = ib_query_device(ib_dev, dev_attr);
	if (unlikely(err)) {
		pr_err("Failed to query device, err: %d\n", err);
		goto fail_query;
	}

	isert_dev->num_cqs = min_t(int, num_online_cpus(),
				   ib_dev->num_comp_vectors);

	isert_dev->cq_qps = kzalloc(sizeof(*isert_dev->cq_qps) * isert_dev->num_cqs,
				    GFP_KERNEL);
	if (unlikely(isert_dev->cq_qps == NULL)) {
		pr_err("Failed to allocate iser cq_qps\n");
		err = -ENOMEM;
		goto fail_cq_qps;
	}

	isert_dev->cq_desc = vmalloc(sizeof(*isert_dev->cq_desc) * isert_dev->num_cqs);
	if (unlikely(isert_dev->cq_desc == NULL)) {
		pr_err("Failed to allocate %ld bytes for iser cq_desc\n",
		       sizeof(*isert_dev->cq_desc) * isert_dev->num_cqs);
		err = -ENOMEM;
		goto fail_alloc_cq_desc;
	}

	pd = ib_alloc_pd(ib_dev);
	if (unlikely(IS_ERR(pd))) {
		err = PTR_ERR(pd);
		pr_err("Failed to alloc iser dev pd, err:%d\n", err);
		goto fail_pd;
	}

	mr = ib_get_dma_mr(pd, IB_ACCESS_LOCAL_WRITE);
	if (unlikely(IS_ERR(mr))) {
		err = PTR_ERR(mr);
		pr_err("Failed to get dma mr, err: %d\n", err);
		goto fail_mr;
	}

	cqe_num = min(isert_dev->device_attr.max_cqe, ISER_CQ_ENTRIES);
	cqe_num = cqe_num / isert_dev->num_cqs;

#ifdef CONFIG_SCST_EXTRACHECKS
	if (isert_dev->device_attr.max_cqe == 0)
		pr_err("Zero max_cqe encountered: you may have a compilation problem\n");
#endif

	for (i = 0; i < isert_dev->num_cqs; ++i) {
		struct isert_cq *cq_desc = &isert_dev->cq_desc[i];

		cq_desc->dev = isert_dev;
		cq_desc->idx = i;
#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20)
		INIT_WORK(&cq_desc->cq_comp_work, isert_cq_comp_work_cb, NULL);
#else
		INIT_WORK(&cq_desc->cq_comp_work, isert_cq_comp_work_cb);
#endif

		snprintf(wq_name, sizeof(wq_name), "isert_cq_%p", cq_desc);
#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 36)
		cq_desc->cq_workqueue = create_singlethread_workqueue(wq_name);
#else
#if LINUX_VERSION_CODE == KERNEL_VERSION(2, 6, 36)
		cq_desc->cq_workqueue = alloc_workqueue(wq_name,
							WQ_CPU_INTENSIVE|
							WQ_RESCUER, 1);
#else
		cq_desc->cq_workqueue = alloc_workqueue(wq_name,
							WQ_CPU_INTENSIVE|
							WQ_MEM_RECLAIM, 1);
#endif
#endif
		if (unlikely(!cq_desc->cq_workqueue)) {
			pr_err("Failed to alloc iser cq work queue for dev:%s\n",
			       ib_dev->name);
			err = -ENOMEM;
			goto fail_cq;
		}

		cq = ib_create_cq(ib_dev,
				  isert_cq_comp_handler,
				  isert_async_evt_handler,
				  cq_desc, /* context */
				  cqe_num,
				  i); /* completion vector */
		if (unlikely(IS_ERR(cq))) {
			cq_desc->cq = NULL;
			err = PTR_ERR(cq);
			pr_err("Failed to create iser dev cq, err:%d\n", err);
			goto fail_cq;
		}

		cq_desc->cq = cq;
		err = ib_req_notify_cq(cq, IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
		if (unlikely(err)) {
			pr_err("Failed to request notify cq, err: %d\n", err);
			goto fail_cq;
		}
	}

	isert_dev->ib_dev = ib_dev;
	isert_dev->pd = pd;
	isert_dev->mr = mr;

	INIT_LIST_HEAD(&isert_dev->conn_list);

	lockdep_assert_held(&dev_list_mutex);

	isert_dev_list_add(isert_dev);

	pr_info("iser created device:%p\n", isert_dev);
	return isert_dev;

fail_cq:
	for (j = 0; j <= i; ++j) {
		if (isert_dev->cq_desc[j].cq)
			ib_destroy_cq(isert_dev->cq_desc[j].cq);
		if (isert_dev->cq_desc[j].cq_workqueue)
			destroy_workqueue(isert_dev->cq_desc[j].cq_workqueue);
	}
	ib_dereg_mr(mr);
fail_mr:
	ib_dealloc_pd(pd);
fail_pd:
	vfree(isert_dev->cq_desc);
fail_alloc_cq_desc:
	kfree(isert_dev->cq_qps);
fail_cq_qps:
fail_query:
	kfree(isert_dev);
out:
	TRACE_EXIT_RES(err);
	return ERR_PTR(err);
}
예제 #14
0
파일: verbs.c 프로젝트: Shmuma/sample_mods
static void verbs_add_device (struct ib_device *dev)
{
	int ret;
	struct ib_qp_init_attr attrs;

	if (ib_dev)
		return;

	/* durty hack for ib_dma_map_single not to segfault */
	dev->dma_ops = NULL;
	ib_dev = dev;

	printk (KERN_INFO "IB add device called. Name = %s\n", dev->name);

	ret = ib_query_device (dev, &dev_attr);
	if (ret) {
		printk (KERN_INFO "ib_quer_device failed: %d\n", ret);
		return;
	}

	printk (KERN_INFO "IB device caps: max_qp %d, max_mcast_grp: %d, max_pkeys: %d\n",
		dev_attr.max_qp, dev_attr.max_mcast_grp, (int)dev_attr.max_pkeys);

	/* We'll work with first port. It's a sample module, anyway. Who is that moron which decided
	 * to count ports from one? */
	ret = ib_query_port (dev, 1, &port_attr);
	if (ret) {
		printk (KERN_INFO "ib_query_port failed: %d\n", ret);
		return;
	}

	printk (KERN_INFO "Port info: lid: %u, sm_lid: %u, max_msg_size: %u\n",
		(unsigned)port_attr.lid, (unsigned)port_attr.sm_lid, port_attr.max_msg_sz);

	pd = ib_alloc_pd (dev);
	if (IS_ERR (pd)) {
		ret = PTR_ERR (pd);
		printk (KERN_INFO "pd allocation failed: %d\n", ret);
		return;
	}

	printk (KERN_INFO "PD allocated\n");

	mr = ib_get_dma_mr (pd, IB_ACCESS_LOCAL_WRITE);
	if (IS_ERR (mr)) {
		ret = PTR_ERR (mr);
		printk (KERN_INFO "get_dma_mr failed: %d\n", ret);
		return;
	}

	send_cq = ib_create_cq (dev, NULL, NULL, NULL, 1, 1);
	if (IS_ERR (send_cq)) {
		ret = PTR_ERR (send_cq);
		printk (KERN_INFO "ib_create_cq failed: %d\n", ret);
		return;
	}

	recv_cq = ib_create_cq (dev, verbs_comp_handler_recv, NULL, NULL, 1, 1);
	if (IS_ERR (recv_cq)) {
		ret = PTR_ERR (recv_cq);
		printk (KERN_INFO "ib_create_cq failed: %d\n", ret);
		return;
	}

	ib_req_notify_cq (recv_cq, IB_CQ_NEXT_COMP);
	printk (KERN_INFO "CQs allocated\n");

	ib_query_pkey (dev, 1, 0, &pkey);

	/* allocate memory */
	send_buf = kmalloc (buf_size + 40, GFP_KERNEL);
	recv_buf = kmalloc (buf_size + 40, GFP_KERNEL);

	if (!send_buf || !recv_buf) {
		printk (KERN_INFO "Memory allocation error\n");
		return;
	}

	printk (KERN_INFO "Trying to register regions\n");
	if (ib_dev->dma_ops)
		printk (KERN_INFO "DMA ops are defined\n");

	memset (send_buf, 0, buf_size+40);
	memset (send_buf, 0, buf_size+40);

	send_key = ib_dma_map_single (ib_dev, send_buf, buf_size, DMA_FROM_DEVICE);
	printk (KERN_INFO "send_key obtained %llx\n", send_key);
	recv_key = ib_dma_map_single (ib_dev, recv_buf, buf_size, DMA_TO_DEVICE);
	printk (KERN_INFO "recv_key obtained %llx\n", recv_key);

	if (ib_dma_mapping_error (ib_dev, send_key)) {
		printk (KERN_INFO "Error mapping send buffer\n");
		return;
	}

	if (ib_dma_mapping_error (ib_dev, recv_key)) {
		printk (KERN_INFO "Error mapping recv buffer\n");
		return;
	}

	memset (&attrs, 0, sizeof (attrs));
	attrs.qp_type = IB_QPT_UD;
	attrs.sq_sig_type = IB_SIGNAL_ALL_WR;
	attrs.event_handler = verbs_qp_event;
	attrs.cap.max_send_wr = CQ_SIZE;
	attrs.cap.max_recv_wr = CQ_SIZE;
	attrs.cap.max_send_sge = 1;
	attrs.cap.max_recv_sge = 1;
	attrs.send_cq = send_cq;
	attrs.recv_cq = recv_cq;

	qp = ib_create_qp (pd, &attrs);
	if (IS_ERR (qp)) {
		ret = PTR_ERR (qp);
		printk (KERN_INFO "qp allocation failed: %d\n", ret);
		return;
	}

	printk (KERN_INFO "Create QP with num %x\n", qp->qp_num);

	if (init_qp (qp)) {
		printk (KERN_INFO "Failed to initialize QP\n");
		return;
	}

	ret = ib_query_gid (ib_dev, 1, 0, &local_info.gid);
	if (ret) {
		printk (KERN_INFO "query_gid failed %d\n", ret);
		return;
	}

	local_info.qp_num = qp->qp_num;
	local_info.lid = port_attr.lid;

	/* now we are ready to send our QP number and other stuff to other party */
	if (!server_addr) {
		schedule_work (&sock_accept);
		flush_scheduled_work ();
	}
	else
		exchange_info (server_addr);

	if (!have_remote_info) {
		printk (KERN_INFO "Have no remote info, give up\n");
		return;
	}

	ret = path_rec_lookup_start ();
	if (ret) {
		printk (KERN_INFO "path_rec lookup start failed: %d\n", ret);
		return;
	}

	/* post receive request */
	verbs_post_recv_req ();

	mod_timer (&verbs_timer, NEXTJIFF(1));
}
예제 #15
0
static int krping_setup_buffers(struct krping_cb *cb)
{
	int ret;
	struct ib_phys_buf buf;
	u64 iovbase;

	DEBUG_LOG(PFX "krping_setup_buffers called on cb %p\n", cb);

	if (cb->use_dmamr) {
		cb->dma_mr = ib_get_dma_mr(cb->pd, IB_ACCESS_LOCAL_WRITE|
					   IB_ACCESS_REMOTE_READ|
				           IB_ACCESS_REMOTE_WRITE);
		if (IS_ERR(cb->dma_mr)) {
			log(LOG_ERR, "reg_dmamr failed\n");
			return PTR_ERR(cb->dma_mr);
		}
	} else {

		buf.addr = vtophys(&cb->recv_buf);
		buf.size = sizeof cb->recv_buf;
		iovbase = vtophys(&cb->recv_buf);
		cb->recv_mr = ib_reg_phys_mr(cb->pd, &buf, 1, 
					     IB_ACCESS_LOCAL_WRITE, 
					     &iovbase);

		if (IS_ERR(cb->recv_mr)) {
			log(LOG_ERR, "recv_buf reg_mr failed\n");
			return PTR_ERR(cb->recv_mr);
		}

		buf.addr = vtophys(&cb->send_buf);
		buf.size = sizeof cb->send_buf;
		iovbase = vtophys(&cb->send_buf);
		cb->send_mr = ib_reg_phys_mr(cb->pd, &buf, 1, 
					     0, &iovbase);

		if (IS_ERR(cb->send_mr)) {
			log(LOG_ERR, "send_buf reg_mr failed\n");
			ib_dereg_mr(cb->recv_mr);
			return PTR_ERR(cb->send_mr);
		}
	}

	cb->rdma_buf = contigmalloc(cb->size, M_DEVBUF, M_WAITOK, 0, -1UL,
		PAGE_SIZE, 0);

	if (!cb->rdma_buf) {
		log(LOG_ERR, "rdma_buf malloc failed\n");
		ret = ENOMEM;
		goto err1;
	}
	if (!cb->use_dmamr) {

		buf.addr = vtophys(cb->rdma_buf);
		buf.size = cb->size;
		iovbase = vtophys(cb->rdma_buf);
		cb->rdma_mr = ib_reg_phys_mr(cb->pd, &buf, 1, 
					     IB_ACCESS_REMOTE_READ| 
					     IB_ACCESS_REMOTE_WRITE, 
					     &iovbase);

		if (IS_ERR(cb->rdma_mr)) {
			log(LOG_ERR, "rdma_buf reg_mr failed\n");
			ret = PTR_ERR(cb->rdma_mr);
			goto err2;
		}
	}

	if (!cb->server || cb->wlat || cb->rlat || cb->bw) {
		cb->start_buf = contigmalloc(cb->size, M_DEVBUF, M_WAITOK,
			0, -1UL, PAGE_SIZE, 0);
		if (!cb->start_buf) {
			log(LOG_ERR, "start_buf malloc failed\n");
			ret = ENOMEM;
			goto err2;
		}
		if (!cb->use_dmamr) {
			unsigned flags = IB_ACCESS_REMOTE_READ;

			if (cb->wlat || cb->rlat || cb->bw) 
				flags |= IB_ACCESS_REMOTE_WRITE;
			buf.addr = vtophys(cb->start_buf);
			buf.size = cb->size;
			iovbase = vtophys(cb->start_buf);
			cb->start_mr = ib_reg_phys_mr(cb->pd, &buf, 1, 
					     flags,
					     &iovbase);

			if (IS_ERR(cb->start_mr)) {
				log(LOG_ERR, "start_buf reg_mr failed\n");
				ret = PTR_ERR(cb->start_mr);
				goto err3;
			}
		}
	}

	krping_setup_wr(cb);
	DEBUG_LOG(PFX "allocated & registered buffers...\n");
	return 0;
err3:
	contigfree(cb->start_buf, cb->size, M_DEVBUF);

	if (!cb->use_dmamr)
		ib_dereg_mr(cb->rdma_mr);
err2:
	contigfree(cb->rdma_buf, cb->size, M_DEVBUF);
err1:
	if (cb->use_dmamr)
		ib_dereg_mr(cb->dma_mr);
	else {
		ib_dereg_mr(cb->recv_mr);
		ib_dereg_mr(cb->send_mr);
	}
	return ret;
}
예제 #16
0
파일: verbs.c 프로젝트: hw-claudio/linux
/*
 * Open and initialize an Interface Adapter.
 *  o initializes fields of struct rpcrdma_ia, including
 *    interface and provider attributes and protection zone.
 */
int
rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
{
	int rc, mem_priv;
	struct rpcrdma_ia *ia = &xprt->rx_ia;
	struct ib_device_attr *devattr = &ia->ri_devattr;

	ia->ri_id = rpcrdma_create_id(xprt, ia, addr);
	if (IS_ERR(ia->ri_id)) {
		rc = PTR_ERR(ia->ri_id);
		goto out1;
	}

	ia->ri_pd = ib_alloc_pd(ia->ri_id->device);
	if (IS_ERR(ia->ri_pd)) {
		rc = PTR_ERR(ia->ri_pd);
		dprintk("RPC:       %s: ib_alloc_pd() failed %i\n",
			__func__, rc);
		goto out2;
	}

	rc = ib_query_device(ia->ri_id->device, devattr);
	if (rc) {
		dprintk("RPC:       %s: ib_query_device failed %d\n",
			__func__, rc);
		goto out3;
	}

	if (devattr->device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) {
		ia->ri_have_dma_lkey = 1;
		ia->ri_dma_lkey = ia->ri_id->device->local_dma_lkey;
	}

	if (memreg == RPCRDMA_FRMR) {
		/* Requires both frmr reg and local dma lkey */
		if (((devattr->device_cap_flags &
		     (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) !=
		    (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) ||
		      (devattr->max_fast_reg_page_list_len == 0)) {
			dprintk("RPC:       %s: FRMR registration "
				"not supported by HCA\n", __func__);
			memreg = RPCRDMA_MTHCAFMR;
		}
	}
	if (memreg == RPCRDMA_MTHCAFMR) {
		if (!ia->ri_id->device->alloc_fmr) {
			dprintk("RPC:       %s: MTHCAFMR registration "
				"not supported by HCA\n", __func__);
			memreg = RPCRDMA_ALLPHYSICAL;
		}
	}

	/*
	 * Optionally obtain an underlying physical identity mapping in
	 * order to do a memory window-based bind. This base registration
	 * is protected from remote access - that is enabled only by binding
	 * for the specific bytes targeted during each RPC operation, and
	 * revoked after the corresponding completion similar to a storage
	 * adapter.
	 */
	switch (memreg) {
	case RPCRDMA_FRMR:
		ia->ri_ops = &rpcrdma_frwr_memreg_ops;
		break;
	case RPCRDMA_ALLPHYSICAL:
		ia->ri_ops = &rpcrdma_physical_memreg_ops;
		mem_priv = IB_ACCESS_LOCAL_WRITE |
				IB_ACCESS_REMOTE_WRITE |
				IB_ACCESS_REMOTE_READ;
		goto register_setup;
	case RPCRDMA_MTHCAFMR:
		ia->ri_ops = &rpcrdma_fmr_memreg_ops;
		if (ia->ri_have_dma_lkey)
			break;
		mem_priv = IB_ACCESS_LOCAL_WRITE;
	register_setup:
		ia->ri_bind_mem = ib_get_dma_mr(ia->ri_pd, mem_priv);
		if (IS_ERR(ia->ri_bind_mem)) {
			printk(KERN_ALERT "%s: ib_get_dma_mr for "
				"phys register failed with %lX\n",
				__func__, PTR_ERR(ia->ri_bind_mem));
			rc = -ENOMEM;
			goto out3;
		}
		break;
	default:
		printk(KERN_ERR "RPC: Unsupported memory "
				"registration mode: %d\n", memreg);
		rc = -ENOMEM;
		goto out3;
	}
	dprintk("RPC:       %s: memory registration strategy is '%s'\n",
		__func__, ia->ri_ops->ro_displayname);

	/* Else will do memory reg/dereg for each chunk */
	ia->ri_memreg_strategy = memreg;

	rwlock_init(&ia->ri_qplock);
	return 0;

out3:
	ib_dealloc_pd(ia->ri_pd);
	ia->ri_pd = NULL;
out2:
	rdma_destroy_id(ia->ri_id);
	ia->ri_id = NULL;
out1:
	return rc;
}