/* * rpcrdma_ep_destroy * * Disconnect and destroy endpoint. After this, the only * valid operations on the ep are to free it (if dynamically * allocated) or re-create it. */ void rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) { int rc; dprintk("RPC: %s: entering, connected is %d\n", __func__, ep->rep_connected); cancel_delayed_work_sync(&ep->rep_connect_worker); if (ia->ri_id->qp) { rpcrdma_ep_disconnect(ep, ia); rdma_destroy_qp(ia->ri_id); ia->ri_id->qp = NULL; } rpcrdma_free_regbuf(ia, ep->rep_padbuf); rpcrdma_clean_cq(ep->rep_attr.recv_cq); rc = ib_destroy_cq(ep->rep_attr.recv_cq); if (rc) dprintk("RPC: %s: ib_destroy_cq returned %i\n", __func__, rc); rpcrdma_clean_cq(ep->rep_attr.send_cq); rc = ib_destroy_cq(ep->rep_attr.send_cq); if (rc) dprintk("RPC: %s: ib_destroy_cq returned %i\n", __func__, rc); }
/* The rq_rcv_buf is used only if a Reply chunk is necessary. * The decision to use a Reply chunk is made later in * rpcrdma_marshal_req. This buffer is registered at that time. * * Otherwise, the associated RPC Reply arrives in a separate * Receive buffer, arbitrarily chosen by the HCA. The buffer * allocated here for the RPC Reply is not utilized in that * case. See rpcrdma_inline_fixup. * * A regbuf is used here to remember the buffer size. */ static bool rpcrdma_get_recvbuf(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, size_t size, gfp_t flags) { struct rpcrdma_regbuf *rb; if (req->rl_recvbuf && rdmab_length(req->rl_recvbuf) >= size) return true; rb = rpcrdma_alloc_regbuf(size, DMA_NONE, flags); if (IS_ERR(rb)) return false; rpcrdma_free_regbuf(req->rl_recvbuf); r_xprt->rx_stats.hardway_register_count += size; req->rl_recvbuf = rb; return true; }
/* * The RDMA allocate/free functions need the task structure as a place * to hide the struct rpcrdma_req, which is necessary for the actual send/recv * sequence. * * The RPC layer allocates both send and receive buffers in the same call * (rq_send_buf and rq_rcv_buf are both part of a single contiguous buffer). * We may register rq_rcv_buf when using reply chunks. */ static void * xprt_rdma_allocate(struct rpc_task *task, size_t size) { struct rpc_xprt *xprt = task->tk_rqstp->rq_xprt; struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); struct rpcrdma_regbuf *rb; struct rpcrdma_req *req; size_t min_size; gfp_t flags; req = rpcrdma_buffer_get(&r_xprt->rx_buf); if (req == NULL) return NULL; flags = GFP_NOIO | __GFP_NOWARN; if (RPC_IS_SWAPPER(task)) flags = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN; if (req->rl_rdmabuf == NULL) goto out_rdmabuf; if (req->rl_sendbuf == NULL) goto out_sendbuf; if (size > req->rl_sendbuf->rg_size) goto out_sendbuf; out: dprintk("RPC: %s: size %zd, request 0x%p\n", __func__, size, req); req->rl_connect_cookie = 0; /* our reserved value */ return req->rl_sendbuf->rg_base; out_rdmabuf: min_size = RPCRDMA_INLINE_WRITE_THRESHOLD(task->tk_rqstp); rb = rpcrdma_alloc_regbuf(&r_xprt->rx_ia, min_size, flags); if (IS_ERR(rb)) goto out_fail; req->rl_rdmabuf = rb; out_sendbuf: /* XDR encoding and RPC/RDMA marshaling of this request has not * yet occurred. Thus a lower bound is needed to prevent buffer * overrun during marshaling. * * RPC/RDMA marshaling may choose to send payload bearing ops * inline, if the result is smaller than the inline threshold. * The value of the "size" argument accounts for header * requirements but not for the payload in these cases. * * Likewise, allocate enough space to receive a reply up to the * size of the inline threshold. * * It's unlikely that both the send header and the received * reply will be large, but slush is provided here to allow * flexibility when marshaling. */ min_size = RPCRDMA_INLINE_READ_THRESHOLD(task->tk_rqstp); min_size += RPCRDMA_INLINE_WRITE_THRESHOLD(task->tk_rqstp); if (size < min_size) size = min_size; rb = rpcrdma_alloc_regbuf(&r_xprt->rx_ia, size, flags); if (IS_ERR(rb)) goto out_fail; rb->rg_owner = req; r_xprt->rx_stats.hardway_register_count += size; rpcrdma_free_regbuf(&r_xprt->rx_ia, req->rl_sendbuf); req->rl_sendbuf = rb; goto out; out_fail: rpcrdma_buffer_put(req); r_xprt->rx_stats.failed_marshal_count++; return NULL; }
/* * Create unconnected endpoint. */ int rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, struct rpcrdma_create_data_internal *cdata) { struct ib_device_attr *devattr = &ia->ri_devattr; struct ib_cq *sendcq, *recvcq; struct ib_cq_init_attr cq_attr = {}; int rc, err; /* check provider's send/recv wr limits */ if (cdata->max_requests > devattr->max_qp_wr) cdata->max_requests = devattr->max_qp_wr; ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall; ep->rep_attr.qp_context = ep; ep->rep_attr.srq = NULL; ep->rep_attr.cap.max_send_wr = cdata->max_requests; rc = ia->ri_ops->ro_open(ia, ep, cdata); if (rc) return rc; ep->rep_attr.cap.max_recv_wr = cdata->max_requests; ep->rep_attr.cap.max_send_sge = (cdata->padding ? 4 : 2); ep->rep_attr.cap.max_recv_sge = 1; ep->rep_attr.cap.max_inline_data = 0; ep->rep_attr.sq_sig_type = IB_SIGNAL_REQ_WR; ep->rep_attr.qp_type = IB_QPT_RC; ep->rep_attr.port_num = ~0; if (cdata->padding) { ep->rep_padbuf = rpcrdma_alloc_regbuf(ia, cdata->padding, GFP_KERNEL); if (IS_ERR(ep->rep_padbuf)) return PTR_ERR(ep->rep_padbuf); } else ep->rep_padbuf = NULL; dprintk("RPC: %s: requested max: dtos: send %d recv %d; " "iovs: send %d recv %d\n", __func__, ep->rep_attr.cap.max_send_wr, ep->rep_attr.cap.max_recv_wr, ep->rep_attr.cap.max_send_sge, ep->rep_attr.cap.max_recv_sge); /* set trigger for requesting send completion */ ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 - 1; if (ep->rep_cqinit > RPCRDMA_MAX_UNSIGNALED_SENDS) ep->rep_cqinit = RPCRDMA_MAX_UNSIGNALED_SENDS; else if (ep->rep_cqinit <= 2) ep->rep_cqinit = 0; INIT_CQCOUNT(ep); init_waitqueue_head(&ep->rep_connect_wait); INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker); cq_attr.cqe = ep->rep_attr.cap.max_send_wr + 1; sendcq = ib_create_cq(ia->ri_id->device, rpcrdma_sendcq_upcall, rpcrdma_cq_async_error_upcall, ep, &cq_attr); if (IS_ERR(sendcq)) { rc = PTR_ERR(sendcq); dprintk("RPC: %s: failed to create send CQ: %i\n", __func__, rc); goto out1; } rc = ib_req_notify_cq(sendcq, IB_CQ_NEXT_COMP); if (rc) { dprintk("RPC: %s: ib_req_notify_cq failed: %i\n", __func__, rc); goto out2; } cq_attr.cqe = ep->rep_attr.cap.max_recv_wr + 1; recvcq = ib_create_cq(ia->ri_id->device, rpcrdma_recvcq_upcall, rpcrdma_cq_async_error_upcall, ep, &cq_attr); if (IS_ERR(recvcq)) { rc = PTR_ERR(recvcq); dprintk("RPC: %s: failed to create recv CQ: %i\n", __func__, rc); goto out2; } rc = ib_req_notify_cq(recvcq, IB_CQ_NEXT_COMP); if (rc) { dprintk("RPC: %s: ib_req_notify_cq failed: %i\n", __func__, rc); ib_destroy_cq(recvcq); goto out2; } ep->rep_attr.send_cq = sendcq; ep->rep_attr.recv_cq = recvcq; /* Initialize cma parameters */ /* RPC/RDMA does not use private data */ ep->rep_remote_cma.private_data = NULL; ep->rep_remote_cma.private_data_len = 0; /* Client offers RDMA Read but does not initiate */ ep->rep_remote_cma.initiator_depth = 0; if (devattr->max_qp_rd_atom > 32) /* arbitrary but <= 255 */ ep->rep_remote_cma.responder_resources = 32; else ep->rep_remote_cma.responder_resources = devattr->max_qp_rd_atom; ep->rep_remote_cma.retry_count = 7; ep->rep_remote_cma.flow_control = 0; ep->rep_remote_cma.rnr_retry_count = 0; return 0; out2: err = ib_destroy_cq(sendcq); if (err) dprintk("RPC: %s: ib_destroy_cq returned %i\n", __func__, err); out1: rpcrdma_free_regbuf(ia, ep->rep_padbuf); return rc; }