/** * xprt_rdma_destroy - Full tear down of transport * @xprt: doomed transport context * * Caller guarantees there will be no more calls to us with * this @xprt. */ static void xprt_rdma_destroy(struct rpc_xprt *xprt) { struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); trace_xprtrdma_destroy(r_xprt); cancel_delayed_work_sync(&r_xprt->rx_connect_worker); rpcrdma_ep_destroy(&r_xprt->rx_ep, &r_xprt->rx_ia); rpcrdma_buffer_destroy(&r_xprt->rx_buf); rpcrdma_ia_close(&r_xprt->rx_ia); xprt_rdma_free_addresses(xprt); xprt_free(xprt); module_put(THIS_MODULE); }
/* * xprt_rdma_destroy * * Destroy the xprt. * Free all memory associated with the object, including its own. * NOTE: none of the *destroy methods free memory for their top-level * objects, even though they may have allocated it (they do free * private memory). It's up to the caller to handle it. In this * case (RDMA transport), all structure memory is inlined with the * struct rpcrdma_xprt. */ static void xprt_rdma_destroy(struct rpc_xprt *xprt) { struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); dprintk("RPC: %s: called\n", __func__); cancel_delayed_work_sync(&r_xprt->rx_connect_worker); xprt_clear_connected(xprt); rpcrdma_buffer_destroy(&r_xprt->rx_buf); rpcrdma_ep_destroy(&r_xprt->rx_ep, &r_xprt->rx_ia); rpcrdma_ia_close(&r_xprt->rx_ia); xprt_rdma_free_addresses(xprt); xprt_free(xprt); dprintk("RPC: %s: returning\n", __func__); module_put(THIS_MODULE); }
/** * xprt_setup_rdma - Set up transport to use RDMA * * @args: rpc transport arguments */ static struct rpc_xprt * xprt_setup_rdma(struct xprt_create *args) { struct rpcrdma_create_data_internal cdata; struct rpc_xprt *xprt; struct rpcrdma_xprt *new_xprt; struct rpcrdma_ep *new_ep; struct sockaddr *sap; int rc; if (args->addrlen > sizeof(xprt->addr)) { dprintk("RPC: %s: address too large\n", __func__); return ERR_PTR(-EBADF); } xprt = xprt_alloc(args->net, sizeof(struct rpcrdma_xprt), 0, 0); if (xprt == NULL) { dprintk("RPC: %s: couldn't allocate rpcrdma_xprt\n", __func__); return ERR_PTR(-ENOMEM); } /* 60 second timeout, no retries */ xprt->timeout = &xprt_rdma_default_timeout; xprt->bind_timeout = RPCRDMA_BIND_TO; xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO; xprt->idle_timeout = RPCRDMA_IDLE_DISC_TO; xprt->resvport = 0; /* privileged port not needed */ xprt->tsh_size = 0; /* RPC-RDMA handles framing */ xprt->ops = &xprt_rdma_procs; /* * Set up RDMA-specific connect data. */ sap = args->dstaddr; /* Ensure xprt->addr holds valid server TCP (not RDMA) * address, for any side protocols which peek at it */ xprt->prot = IPPROTO_TCP; xprt->addrlen = args->addrlen; memcpy(&xprt->addr, sap, xprt->addrlen); if (rpc_get_port(sap)) xprt_set_bound(xprt); xprt_rdma_format_addresses(xprt, sap); cdata.max_requests = xprt_rdma_slot_table_entries; cdata.rsize = RPCRDMA_MAX_SEGS * PAGE_SIZE; /* RDMA write max */ cdata.wsize = RPCRDMA_MAX_SEGS * PAGE_SIZE; /* RDMA read max */ cdata.inline_wsize = xprt_rdma_max_inline_write; if (cdata.inline_wsize > cdata.wsize) cdata.inline_wsize = cdata.wsize; cdata.inline_rsize = xprt_rdma_max_inline_read; if (cdata.inline_rsize > cdata.rsize) cdata.inline_rsize = cdata.rsize; /* * Create new transport instance, which includes initialized * o ia * o endpoint * o buffers */ new_xprt = rpcx_to_rdmax(xprt); rc = rpcrdma_ia_open(new_xprt); if (rc) goto out1; /* * initialize and create ep */ new_xprt->rx_data = cdata; new_ep = &new_xprt->rx_ep; rc = rpcrdma_ep_create(&new_xprt->rx_ep, &new_xprt->rx_ia, &new_xprt->rx_data); if (rc) goto out2; rc = rpcrdma_buffer_create(new_xprt); if (rc) goto out3; INIT_DELAYED_WORK(&new_xprt->rx_connect_worker, xprt_rdma_connect_worker); xprt->max_payload = new_xprt->rx_ia.ri_ops->ro_maxpages(new_xprt); if (xprt->max_payload == 0) goto out4; xprt->max_payload <<= PAGE_SHIFT; dprintk("RPC: %s: transport data payload maximum: %zu bytes\n", __func__, xprt->max_payload); if (!try_module_get(THIS_MODULE)) goto out4; dprintk("RPC: %s: %s:%s\n", __func__, xprt->address_strings[RPC_DISPLAY_ADDR], xprt->address_strings[RPC_DISPLAY_PORT]); trace_xprtrdma_create(new_xprt); return xprt; out4: rpcrdma_buffer_destroy(&new_xprt->rx_buf); rc = -ENODEV; out3: rpcrdma_ep_destroy(new_ep, &new_xprt->rx_ia); out2: rpcrdma_ia_close(&new_xprt->rx_ia); out1: trace_xprtrdma_destroy(new_xprt); xprt_rdma_free_addresses(xprt); xprt_free(xprt); return ERR_PTR(rc); }
/* * Initialize buffer memory */ int rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, struct rpcrdma_create_data_internal *cdata) { char *p; size_t len; int i, rc; struct rpcrdma_mw *r; buf->rb_max_requests = cdata->max_requests; spin_lock_init(&buf->rb_lock); atomic_set(&buf->rb_credits, 1); /* Need to allocate: * 1. arrays for send and recv pointers * 2. arrays of struct rpcrdma_req to fill in pointers * 3. array of struct rpcrdma_rep for replies * 4. padding, if any * 5. mw's, fmr's or frmr's, if any * Send/recv buffers in req/rep need to be registered */ len = buf->rb_max_requests * (sizeof(struct rpcrdma_req *) + sizeof(struct rpcrdma_rep *)); len += cdata->padding; switch (ia->ri_memreg_strategy) { case RPCRDMA_FRMR: len += buf->rb_max_requests * RPCRDMA_MAX_SEGS * sizeof(struct rpcrdma_mw); break; case RPCRDMA_MTHCAFMR: /* TBD we are perhaps overallocating here */ len += (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS * sizeof(struct rpcrdma_mw); break; case RPCRDMA_MEMWINDOWS_ASYNC: case RPCRDMA_MEMWINDOWS: len += (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS * sizeof(struct rpcrdma_mw); break; default: break; } /* allocate 1, 4 and 5 in one shot */ p = kzalloc(len, GFP_KERNEL); if (p == NULL) { dprintk("RPC: %s: req_t/rep_t/pad kzalloc(%zd) failed\n", __func__, len); rc = -ENOMEM; goto out; } buf->rb_pool = p; /* for freeing it later */ buf->rb_send_bufs = (struct rpcrdma_req **) p; p = (char *) &buf->rb_send_bufs[buf->rb_max_requests]; buf->rb_recv_bufs = (struct rpcrdma_rep **) p; p = (char *) &buf->rb_recv_bufs[buf->rb_max_requests]; /* * Register the zeroed pad buffer, if any. */ if (cdata->padding) { rc = rpcrdma_register_internal(ia, p, cdata->padding, &ep->rep_pad_mr, &ep->rep_pad); if (rc) goto out; } p += cdata->padding; /* * Allocate the fmr's, or mw's for mw_bind chunk registration. * We "cycle" the mw's in order to minimize rkey reuse, * and also reduce unbind-to-bind collision. */ INIT_LIST_HEAD(&buf->rb_mws); r = (struct rpcrdma_mw *)p; switch (ia->ri_memreg_strategy) { case RPCRDMA_FRMR: for (i = buf->rb_max_requests * RPCRDMA_MAX_SEGS; i; i--) { r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd, RPCRDMA_MAX_SEGS); if (IS_ERR(r->r.frmr.fr_mr)) { rc = PTR_ERR(r->r.frmr.fr_mr); dprintk("RPC: %s: ib_alloc_fast_reg_mr" " failed %i\n", __func__, rc); goto out; } r->r.frmr.fr_pgl = ib_alloc_fast_reg_page_list(ia->ri_id->device, RPCRDMA_MAX_SEGS); if (IS_ERR(r->r.frmr.fr_pgl)) { rc = PTR_ERR(r->r.frmr.fr_pgl); dprintk("RPC: %s: " "ib_alloc_fast_reg_page_list " "failed %i\n", __func__, rc); goto out; } list_add(&r->mw_list, &buf->rb_mws); ++r; } break; case RPCRDMA_MTHCAFMR: /* TBD we are perhaps overallocating here */ for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) { static struct ib_fmr_attr fa = { RPCRDMA_MAX_DATA_SEGS, 1, PAGE_SHIFT }; r->r.fmr = ib_alloc_fmr(ia->ri_pd, IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ, &fa); if (IS_ERR(r->r.fmr)) { rc = PTR_ERR(r->r.fmr); dprintk("RPC: %s: ib_alloc_fmr" " failed %i\n", __func__, rc); goto out; } list_add(&r->mw_list, &buf->rb_mws); ++r; } break; case RPCRDMA_MEMWINDOWS_ASYNC: case RPCRDMA_MEMWINDOWS: /* Allocate one extra request's worth, for full cycling */ for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) { r->r.mw = ib_alloc_mw(ia->ri_pd); if (IS_ERR(r->r.mw)) { rc = PTR_ERR(r->r.mw); dprintk("RPC: %s: ib_alloc_mw" " failed %i\n", __func__, rc); goto out; } list_add(&r->mw_list, &buf->rb_mws); ++r; } break; default: break; } /* * Allocate/init the request/reply buffers. Doing this * using kmalloc for now -- one for each buf. */ for (i = 0; i < buf->rb_max_requests; i++) { struct rpcrdma_req *req; struct rpcrdma_rep *rep; len = cdata->inline_wsize + sizeof(struct rpcrdma_req); /* RPC layer requests *double* size + 1K RPC_SLACK_SPACE! */ /* Typical ~2400b, so rounding up saves work later */ if (len < 4096) len = 4096; req = kmalloc(len, GFP_KERNEL); if (req == NULL) { dprintk("RPC: %s: request buffer %d alloc" " failed\n", __func__, i); rc = -ENOMEM; goto out; } memset(req, 0, sizeof(struct rpcrdma_req)); buf->rb_send_bufs[i] = req; buf->rb_send_bufs[i]->rl_buffer = buf; rc = rpcrdma_register_internal(ia, req->rl_base, len - offsetof(struct rpcrdma_req, rl_base), &buf->rb_send_bufs[i]->rl_handle, &buf->rb_send_bufs[i]->rl_iov); if (rc) goto out; buf->rb_send_bufs[i]->rl_size = len-sizeof(struct rpcrdma_req); len = cdata->inline_rsize + sizeof(struct rpcrdma_rep); rep = kmalloc(len, GFP_KERNEL); if (rep == NULL) { dprintk("RPC: %s: reply buffer %d alloc failed\n", __func__, i); rc = -ENOMEM; goto out; } memset(rep, 0, sizeof(struct rpcrdma_rep)); buf->rb_recv_bufs[i] = rep; buf->rb_recv_bufs[i]->rr_buffer = buf; init_waitqueue_head(&rep->rr_unbind); rc = rpcrdma_register_internal(ia, rep->rr_base, len - offsetof(struct rpcrdma_rep, rr_base), &buf->rb_recv_bufs[i]->rr_handle, &buf->rb_recv_bufs[i]->rr_iov); if (rc) goto out; } dprintk("RPC: %s: max_requests %d\n", __func__, buf->rb_max_requests); /* done */ return 0; out: rpcrdma_buffer_destroy(buf); return rc; }