Esempio n. 1
0
/*
 * rpcrdma_ep_destroy
 *
 * Disconnect and destroy endpoint. After this, the only
 * valid operations on the ep are to free it (if dynamically
 * allocated) or re-create it.
 */
void
rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
{
	int rc;

	dprintk("RPC:       %s: entering, connected is %d\n",
		__func__, ep->rep_connected);

	cancel_delayed_work_sync(&ep->rep_connect_worker);

	if (ia->ri_id->qp) {
		rpcrdma_ep_disconnect(ep, ia);
		rdma_destroy_qp(ia->ri_id);
		ia->ri_id->qp = NULL;
	}

	rpcrdma_free_regbuf(ia, ep->rep_padbuf);

	rpcrdma_clean_cq(ep->rep_attr.recv_cq);
	rc = ib_destroy_cq(ep->rep_attr.recv_cq);
	if (rc)
		dprintk("RPC:       %s: ib_destroy_cq returned %i\n",
			__func__, rc);

	rpcrdma_clean_cq(ep->rep_attr.send_cq);
	rc = ib_destroy_cq(ep->rep_attr.send_cq);
	if (rc)
		dprintk("RPC:       %s: ib_destroy_cq returned %i\n",
			__func__, rc);
}
Esempio n. 2
0
/* The rq_rcv_buf is used only if a Reply chunk is necessary.
 * The decision to use a Reply chunk is made later in
 * rpcrdma_marshal_req. This buffer is registered at that time.
 *
 * Otherwise, the associated RPC Reply arrives in a separate
 * Receive buffer, arbitrarily chosen by the HCA. The buffer
 * allocated here for the RPC Reply is not utilized in that
 * case. See rpcrdma_inline_fixup.
 *
 * A regbuf is used here to remember the buffer size.
 */
static bool
rpcrdma_get_recvbuf(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
		    size_t size, gfp_t flags)
{
	struct rpcrdma_regbuf *rb;

	if (req->rl_recvbuf && rdmab_length(req->rl_recvbuf) >= size)
		return true;

	rb = rpcrdma_alloc_regbuf(size, DMA_NONE, flags);
	if (IS_ERR(rb))
		return false;

	rpcrdma_free_regbuf(req->rl_recvbuf);
	r_xprt->rx_stats.hardway_register_count += size;
	req->rl_recvbuf = rb;
	return true;
}
Esempio n. 3
0
/*
 * The RDMA allocate/free functions need the task structure as a place
 * to hide the struct rpcrdma_req, which is necessary for the actual send/recv
 * sequence.
 *
 * The RPC layer allocates both send and receive buffers in the same call
 * (rq_send_buf and rq_rcv_buf are both part of a single contiguous buffer).
 * We may register rq_rcv_buf when using reply chunks.
 */
static void *
xprt_rdma_allocate(struct rpc_task *task, size_t size)
{
	struct rpc_xprt *xprt = task->tk_rqstp->rq_xprt;
	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
	struct rpcrdma_regbuf *rb;
	struct rpcrdma_req *req;
	size_t min_size;
	gfp_t flags;

	req = rpcrdma_buffer_get(&r_xprt->rx_buf);
	if (req == NULL)
		return NULL;

	flags = GFP_NOIO | __GFP_NOWARN;
	if (RPC_IS_SWAPPER(task))
		flags = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN;

	if (req->rl_rdmabuf == NULL)
		goto out_rdmabuf;
	if (req->rl_sendbuf == NULL)
		goto out_sendbuf;
	if (size > req->rl_sendbuf->rg_size)
		goto out_sendbuf;

out:
	dprintk("RPC:       %s: size %zd, request 0x%p\n", __func__, size, req);
	req->rl_connect_cookie = 0;	/* our reserved value */
	return req->rl_sendbuf->rg_base;

out_rdmabuf:
	min_size = RPCRDMA_INLINE_WRITE_THRESHOLD(task->tk_rqstp);
	rb = rpcrdma_alloc_regbuf(&r_xprt->rx_ia, min_size, flags);
	if (IS_ERR(rb))
		goto out_fail;
	req->rl_rdmabuf = rb;

out_sendbuf:
	/* XDR encoding and RPC/RDMA marshaling of this request has not
	 * yet occurred. Thus a lower bound is needed to prevent buffer
	 * overrun during marshaling.
	 *
	 * RPC/RDMA marshaling may choose to send payload bearing ops
	 * inline, if the result is smaller than the inline threshold.
	 * The value of the "size" argument accounts for header
	 * requirements but not for the payload in these cases.
	 *
	 * Likewise, allocate enough space to receive a reply up to the
	 * size of the inline threshold.
	 *
	 * It's unlikely that both the send header and the received
	 * reply will be large, but slush is provided here to allow
	 * flexibility when marshaling.
	 */
	min_size = RPCRDMA_INLINE_READ_THRESHOLD(task->tk_rqstp);
	min_size += RPCRDMA_INLINE_WRITE_THRESHOLD(task->tk_rqstp);
	if (size < min_size)
		size = min_size;

	rb = rpcrdma_alloc_regbuf(&r_xprt->rx_ia, size, flags);
	if (IS_ERR(rb))
		goto out_fail;
	rb->rg_owner = req;

	r_xprt->rx_stats.hardway_register_count += size;
	rpcrdma_free_regbuf(&r_xprt->rx_ia, req->rl_sendbuf);
	req->rl_sendbuf = rb;
	goto out;

out_fail:
	rpcrdma_buffer_put(req);
	r_xprt->rx_stats.failed_marshal_count++;
	return NULL;
}
Esempio n. 4
0
/*
 * Create unconnected endpoint.
 */
int
rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
				struct rpcrdma_create_data_internal *cdata)
{
	struct ib_device_attr *devattr = &ia->ri_devattr;
	struct ib_cq *sendcq, *recvcq;
	struct ib_cq_init_attr cq_attr = {};
	int rc, err;

	/* check provider's send/recv wr limits */
	if (cdata->max_requests > devattr->max_qp_wr)
		cdata->max_requests = devattr->max_qp_wr;

	ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall;
	ep->rep_attr.qp_context = ep;
	ep->rep_attr.srq = NULL;
	ep->rep_attr.cap.max_send_wr = cdata->max_requests;
	rc = ia->ri_ops->ro_open(ia, ep, cdata);
	if (rc)
		return rc;
	ep->rep_attr.cap.max_recv_wr = cdata->max_requests;
	ep->rep_attr.cap.max_send_sge = (cdata->padding ? 4 : 2);
	ep->rep_attr.cap.max_recv_sge = 1;
	ep->rep_attr.cap.max_inline_data = 0;
	ep->rep_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
	ep->rep_attr.qp_type = IB_QPT_RC;
	ep->rep_attr.port_num = ~0;

	if (cdata->padding) {
		ep->rep_padbuf = rpcrdma_alloc_regbuf(ia, cdata->padding,
						      GFP_KERNEL);
		if (IS_ERR(ep->rep_padbuf))
			return PTR_ERR(ep->rep_padbuf);
	} else
		ep->rep_padbuf = NULL;

	dprintk("RPC:       %s: requested max: dtos: send %d recv %d; "
		"iovs: send %d recv %d\n",
		__func__,
		ep->rep_attr.cap.max_send_wr,
		ep->rep_attr.cap.max_recv_wr,
		ep->rep_attr.cap.max_send_sge,
		ep->rep_attr.cap.max_recv_sge);

	/* set trigger for requesting send completion */
	ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 - 1;
	if (ep->rep_cqinit > RPCRDMA_MAX_UNSIGNALED_SENDS)
		ep->rep_cqinit = RPCRDMA_MAX_UNSIGNALED_SENDS;
	else if (ep->rep_cqinit <= 2)
		ep->rep_cqinit = 0;
	INIT_CQCOUNT(ep);
	init_waitqueue_head(&ep->rep_connect_wait);
	INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker);

	cq_attr.cqe = ep->rep_attr.cap.max_send_wr + 1;
	sendcq = ib_create_cq(ia->ri_id->device, rpcrdma_sendcq_upcall,
				  rpcrdma_cq_async_error_upcall, ep, &cq_attr);
	if (IS_ERR(sendcq)) {
		rc = PTR_ERR(sendcq);
		dprintk("RPC:       %s: failed to create send CQ: %i\n",
			__func__, rc);
		goto out1;
	}

	rc = ib_req_notify_cq(sendcq, IB_CQ_NEXT_COMP);
	if (rc) {
		dprintk("RPC:       %s: ib_req_notify_cq failed: %i\n",
			__func__, rc);
		goto out2;
	}

	cq_attr.cqe = ep->rep_attr.cap.max_recv_wr + 1;
	recvcq = ib_create_cq(ia->ri_id->device, rpcrdma_recvcq_upcall,
				  rpcrdma_cq_async_error_upcall, ep, &cq_attr);
	if (IS_ERR(recvcq)) {
		rc = PTR_ERR(recvcq);
		dprintk("RPC:       %s: failed to create recv CQ: %i\n",
			__func__, rc);
		goto out2;
	}

	rc = ib_req_notify_cq(recvcq, IB_CQ_NEXT_COMP);
	if (rc) {
		dprintk("RPC:       %s: ib_req_notify_cq failed: %i\n",
			__func__, rc);
		ib_destroy_cq(recvcq);
		goto out2;
	}

	ep->rep_attr.send_cq = sendcq;
	ep->rep_attr.recv_cq = recvcq;

	/* Initialize cma parameters */

	/* RPC/RDMA does not use private data */
	ep->rep_remote_cma.private_data = NULL;
	ep->rep_remote_cma.private_data_len = 0;

	/* Client offers RDMA Read but does not initiate */
	ep->rep_remote_cma.initiator_depth = 0;
	if (devattr->max_qp_rd_atom > 32)	/* arbitrary but <= 255 */
		ep->rep_remote_cma.responder_resources = 32;
	else
		ep->rep_remote_cma.responder_resources =
						devattr->max_qp_rd_atom;

	ep->rep_remote_cma.retry_count = 7;
	ep->rep_remote_cma.flow_control = 0;
	ep->rep_remote_cma.rnr_retry_count = 0;

	return 0;

out2:
	err = ib_destroy_cq(sendcq);
	if (err)
		dprintk("RPC:       %s: ib_destroy_cq returned %i\n",
			__func__, err);
out1:
	rpcrdma_free_regbuf(ia, ep->rep_padbuf);
	return rc;
}