/** * xprt_rdma_close - close a transport connection * @xprt: transport context * * Called during transport shutdown, reconnect, or device removal. * Caller holds @xprt's send lock to prevent activity on this * transport while the connection is torn down. */ static void xprt_rdma_close(struct rpc_xprt *xprt) { struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); struct rpcrdma_ep *ep = &r_xprt->rx_ep; struct rpcrdma_ia *ia = &r_xprt->rx_ia; dprintk("RPC: %s: closing xprt %p\n", __func__, xprt); if (test_and_clear_bit(RPCRDMA_IAF_REMOVING, &ia->ri_flags)) { xprt_clear_connected(xprt); rpcrdma_ia_remove(ia); return; } if (ep->rep_connected == -ENODEV) return; if (ep->rep_connected > 0) xprt->reestablish_timeout = 0; xprt_disconnect_done(xprt); rpcrdma_ep_disconnect(ep, ia); /* Prepare @xprt for the next connection by reinitializing * its credit grant to one (see RFC 8166, Section 3.3.3). */ r_xprt->rx_buf.rb_credits = 1; xprt->cwnd = RPC_CWNDSHIFT; }
/** * xprt_rdma_allocate - allocate transport resources for an RPC * @task: RPC task * * Return values: * 0: Success; rq_buffer points to RPC buffer to use * ENOMEM: Out of memory, call again later * EIO: A permanent error occurred, do not retry */ static int xprt_rdma_allocate(struct rpc_task *task) { struct rpc_rqst *rqst = task->tk_rqstp; struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt); struct rpcrdma_req *req = rpcr_to_rdmar(rqst); gfp_t flags; flags = RPCRDMA_DEF_GFP; if (RPC_IS_SWAPPER(task)) flags = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN; if (!rpcrdma_get_sendbuf(r_xprt, req, rqst->rq_callsize, flags)) goto out_fail; if (!rpcrdma_get_recvbuf(r_xprt, req, rqst->rq_rcvsize, flags)) goto out_fail; rqst->rq_buffer = req->rl_sendbuf->rg_base; rqst->rq_rbuffer = req->rl_recvbuf->rg_base; trace_xprtrdma_allocate(task, req); return 0; out_fail: trace_xprtrdma_allocate(task, NULL); return -ENOMEM; }
static int rpcrdma_bc_send_request(struct svcxprt_rdma *rdma, struct rpc_rqst *rqst) { struct rpc_xprt *xprt = rqst->rq_xprt; struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); __be32 *p; int rc; /* Space in the send buffer for an RPC/RDMA header is reserved * via xprt->tsh_size. */ p = rqst->rq_buffer; *p++ = rqst->rq_xid; *p++ = rpcrdma_version; *p++ = cpu_to_be32(r_xprt->rx_buf.rb_bc_max_requests); *p++ = rdma_msg; *p++ = xdr_zero; *p++ = xdr_zero; *p = xdr_zero; #ifdef SVCRDMA_BACKCHANNEL_DEBUG pr_info("%s: %*ph\n", __func__, 64, rqst->rq_buffer); #endif rc = svc_rdma_bc_sendto(rdma, rqst); if (rc) goto drop_connection; return rc; drop_connection: dprintk("svcrdma: failed to send bc call\n"); xprt_disconnect_done(xprt); return -ENOTCONN; }
static void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) { struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); long idle_time = 0; if (xprt_connected(xprt)) idle_time = (long)(jiffies - xprt->last_used) / HZ; seq_printf(seq, "\txprt:\trdma %u %lu %lu %lu %ld %lu %lu %lu %Lu %Lu " "%lu %lu %lu %Lu %Lu %Lu %Lu %lu %lu %lu\n", 0, /* need a local port? */ xprt->stat.bind_count, xprt->stat.connect_count, xprt->stat.connect_time, idle_time, xprt->stat.sends, xprt->stat.recvs, xprt->stat.bad_xids, xprt->stat.req_u, xprt->stat.bklog_u, r_xprt->rx_stats.read_chunk_count, r_xprt->rx_stats.write_chunk_count, r_xprt->rx_stats.reply_chunk_count, r_xprt->rx_stats.total_rdma_request, r_xprt->rx_stats.total_rdma_reply, r_xprt->rx_stats.pullup_copy_count, r_xprt->rx_stats.fixup_copy_count, r_xprt->rx_stats.hardway_register_count, r_xprt->rx_stats.failed_marshal_count, r_xprt->rx_stats.bad_reply_count); }
/* It shouldn't matter if the number of backchannel session slots * doesn't match the number of RPC/RDMA credits. That just means * one or the other will have extra slots that aren't used. */ static struct rpc_xprt * xprt_setup_rdma_bc(struct xprt_create *args) { struct rpc_xprt *xprt; struct rpcrdma_xprt *new_xprt; if (args->addrlen > sizeof(xprt->addr)) { dprintk("RPC: %s: address too large\n", __func__); return ERR_PTR(-EBADF); } xprt = xprt_alloc(args->net, sizeof(*new_xprt), RPCRDMA_MAX_BC_REQUESTS, RPCRDMA_MAX_BC_REQUESTS); if (!xprt) { dprintk("RPC: %s: couldn't allocate rpc_xprt\n", __func__); return ERR_PTR(-ENOMEM); } xprt->timeout = &xprt_rdma_bc_timeout; xprt_set_bound(xprt); xprt_set_connected(xprt); xprt->bind_timeout = RPCRDMA_BIND_TO; xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO; xprt->idle_timeout = RPCRDMA_IDLE_DISC_TO; xprt->prot = XPRT_TRANSPORT_BC_RDMA; xprt->tsh_size = RPCRDMA_HDRLEN_MIN / sizeof(__be32); xprt->ops = &xprt_rdma_bc_procs; memcpy(&xprt->addr, args->dstaddr, args->addrlen); xprt->addrlen = args->addrlen; xprt_rdma_format_addresses(xprt, (struct sockaddr *)&xprt->addr); xprt->resvport = 0; xprt->max_payload = xprt_rdma_max_inline_read; new_xprt = rpcx_to_rdmax(xprt); new_xprt->rx_buf.rb_bc_max_requests = xprt->max_reqs; xprt_get(xprt); args->bc_xprt->xpt_bc_xprt = xprt; xprt->bc_xprt = args->bc_xprt; if (!try_module_get(THIS_MODULE)) goto out_fail; /* Final put for backchannel xprt is in __svc_rdma_free */ xprt_get(xprt); return xprt; out_fail: xprt_rdma_free_addresses(xprt); args->bc_xprt->xpt_bc_xprt = NULL; args->bc_xprt->xpt_bc_xps = NULL; xprt_put(xprt); xprt_free(xprt); return ERR_PTR(-EINVAL); }
/** * xprt_rdma_inject_disconnect - inject a connection fault * @xprt: transport context * * If @xprt is connected, disconnect it to simulate spurious connection * loss. */ static void xprt_rdma_inject_disconnect(struct rpc_xprt *xprt) { struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); trace_xprtrdma_inject_dsc(r_xprt); rdma_disconnect(r_xprt->rx_ia.ri_id); }
/* * Close a connection, during shutdown or timeout/reconnect */ static void xprt_rdma_close(struct rpc_xprt *xprt) { struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); dprintk("RPC: %s: closing\n", __func__); if (r_xprt->rx_ep.rep_connected > 0) xprt->reestablish_timeout = 0; xprt_disconnect_done(xprt); rpcrdma_ep_disconnect(&r_xprt->rx_ep, &r_xprt->rx_ia); }
/** * xprt_rdma_free - release resources allocated by xprt_rdma_allocate * @task: RPC task * * Caller guarantees rqst->rq_buffer is non-NULL. */ static void xprt_rdma_free(struct rpc_task *task) { struct rpc_rqst *rqst = task->tk_rqstp; struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt); struct rpcrdma_req *req = rpcr_to_rdmar(rqst); if (test_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags)) rpcrdma_release_rqst(r_xprt, req); trace_xprtrdma_rpc_done(task, req); }
/** * xprt_rdma_send_request - marshal and send an RPC request * @rqst: RPC message in rq_snd_buf * * Caller holds the transport's write lock. * * Returns: * %0 if the RPC message has been sent * %-ENOTCONN if the caller should reconnect and call again * %-EAGAIN if the caller should call again * %-ENOBUFS if the caller should call again after a delay * %-EIO if a permanent error occurred and the request was not * sent. Do not try to send this message again. */ static int xprt_rdma_send_request(struct rpc_rqst *rqst) { struct rpc_xprt *xprt = rqst->rq_xprt; struct rpcrdma_req *req = rpcr_to_rdmar(rqst); struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); int rc = 0; #if defined(CONFIG_SUNRPC_BACKCHANNEL) if (unlikely(!rqst->rq_buffer)) return xprt_rdma_bc_send_reply(rqst); #endif /* CONFIG_SUNRPC_BACKCHANNEL */ if (!xprt_connected(xprt)) goto drop_connection; if (!xprt_request_get_cong(xprt, rqst)) return -EBADSLT; rc = rpcrdma_marshal_req(r_xprt, rqst); if (rc < 0) goto failed_marshal; /* Must suppress retransmit to maintain credits */ if (rqst->rq_connect_cookie == xprt->connect_cookie) goto drop_connection; rqst->rq_xtime = ktime_get(); __set_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags); if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req)) goto drop_connection; rqst->rq_xmit_bytes_sent += rqst->rq_snd_buf.len; rqst->rq_bytes_sent = 0; /* An RPC with no reply will throw off credit accounting, * so drop the connection to reset the credit grant. */ if (!rpc_reply_expected(rqst->rq_task)) goto drop_connection; return 0; failed_marshal: if (rc != -ENOTCONN) return rc; drop_connection: xprt_disconnect_done(xprt); return -ENOTCONN; /* implies disconnect */ }
static int xprt_rdma_send_request(struct rpc_task *task) { struct rpc_rqst *rqst = task->tk_rqstp; struct rpc_xprt *xprt = rqst->rq_xprt; struct rpcrdma_req *req = rpcr_to_rdmar(rqst); struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); int rc = 0; if (req->rl_niovs == 0) rc = rpcrdma_marshal_req(rqst); else if (r_xprt->rx_ia.ri_memreg_strategy != RPCRDMA_ALLPHYSICAL) rc = rpcrdma_marshal_chunks(rqst, 0); if (rc < 0) goto failed_marshal; if (req->rl_reply == NULL) /* e.g. reconnection */ rpcrdma_recv_buffer_get(req); if (req->rl_reply) { req->rl_reply->rr_func = rpcrdma_reply_handler; /* this need only be done once, but... */ req->rl_reply->rr_xprt = xprt; } /* Must suppress retransmit to maintain credits */ if (req->rl_connect_cookie == xprt->connect_cookie) goto drop_connection; req->rl_connect_cookie = xprt->connect_cookie; if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req)) goto drop_connection; rqst->rq_xmit_bytes_sent += rqst->rq_snd_buf.len; rqst->rq_bytes_sent = 0; return 0; failed_marshal: r_xprt->rx_stats.failed_marshal_count++; dprintk("RPC: %s: rpcrdma_marshal_req failed, status %i\n", __func__, rc); if (rc == -EIO) return -EIO; drop_connection: xprt_disconnect_done(xprt); return -ENOTCONN; /* implies disconnect */ }
/** * xprt_rdma_alloc_slot - allocate an rpc_rqst * @xprt: controlling RPC transport * @task: RPC task requesting a fresh rpc_rqst * * tk_status values: * %0 if task->tk_rqstp points to a fresh rpc_rqst * %-EAGAIN if no rpc_rqst is available; queued on backlog */ static void xprt_rdma_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task) { struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); struct rpcrdma_req *req; req = rpcrdma_buffer_get(&r_xprt->rx_buf); if (!req) goto out_sleep; task->tk_rqstp = &req->rl_slot; task->tk_status = 0; return; out_sleep: rpc_sleep_on(&xprt->backlog, task, NULL); task->tk_status = -EAGAIN; }
/** * xprt_rdma_destroy - Full tear down of transport * @xprt: doomed transport context * * Caller guarantees there will be no more calls to us with * this @xprt. */ static void xprt_rdma_destroy(struct rpc_xprt *xprt) { struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); trace_xprtrdma_destroy(r_xprt); cancel_delayed_work_sync(&r_xprt->rx_connect_worker); rpcrdma_ep_destroy(&r_xprt->rx_ep, &r_xprt->rx_ia); rpcrdma_buffer_destroy(&r_xprt->rx_buf); rpcrdma_ia_close(&r_xprt->rx_ia); xprt_rdma_free_addresses(xprt); xprt_free(xprt); module_put(THIS_MODULE); }
/** * xprt_rdma_connect - try to establish a transport connection * @xprt: transport state * @task: RPC scheduler context * */ static void xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task) { struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); if (r_xprt->rx_ep.rep_connected != 0) { /* Reconnect */ schedule_delayed_work(&r_xprt->rx_connect_worker, xprt->reestablish_timeout); xprt->reestablish_timeout <<= 1; if (xprt->reestablish_timeout > RPCRDMA_MAX_REEST_TO) xprt->reestablish_timeout = RPCRDMA_MAX_REEST_TO; else if (xprt->reestablish_timeout < RPCRDMA_INIT_REEST_TO) xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO; } else { schedule_delayed_work(&r_xprt->rx_connect_worker, 0); if (!RPC_IS_ASYNC(task)) flush_delayed_work(&r_xprt->rx_connect_worker); } }
void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) { struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); long idle_time = 0; if (xprt_connected(xprt)) idle_time = (long)(jiffies - xprt->last_used) / HZ; seq_puts(seq, "\txprt:\trdma "); seq_printf(seq, "%u %lu %lu %lu %ld %lu %lu %lu %llu %llu ", 0, /* need a local port? */ xprt->stat.bind_count, xprt->stat.connect_count, xprt->stat.connect_time / HZ, idle_time, xprt->stat.sends, xprt->stat.recvs, xprt->stat.bad_xids, xprt->stat.req_u, xprt->stat.bklog_u); seq_printf(seq, "%lu %lu %lu %llu %llu %llu %llu %lu %lu %lu %lu ", r_xprt->rx_stats.read_chunk_count, r_xprt->rx_stats.write_chunk_count, r_xprt->rx_stats.reply_chunk_count, r_xprt->rx_stats.total_rdma_request, r_xprt->rx_stats.total_rdma_reply, r_xprt->rx_stats.pullup_copy_count, r_xprt->rx_stats.fixup_copy_count, r_xprt->rx_stats.hardway_register_count, r_xprt->rx_stats.failed_marshal_count, r_xprt->rx_stats.bad_reply_count, r_xprt->rx_stats.nomsg_call_count); seq_printf(seq, "%lu %lu %lu %lu %lu %lu\n", r_xprt->rx_stats.mrs_recycled, r_xprt->rx_stats.mrs_orphaned, r_xprt->rx_stats.mrs_allocated, r_xprt->rx_stats.local_inv_needed, r_xprt->rx_stats.empty_sendctx_q, r_xprt->rx_stats.reply_waits_for_send); }
static int xprt_rdma_send_request(struct rpc_task *task) { struct rpc_rqst *rqst = task->tk_rqstp; struct rpc_xprt *xprt = rqst->rq_xprt; struct rpcrdma_req *req = rpcr_to_rdmar(rqst); struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); int rc = 0; rc = rpcrdma_marshal_req(rqst); if (rc < 0) goto failed_marshal; if (req->rl_reply == NULL) /* e.g. reconnection */ rpcrdma_recv_buffer_get(req); /* Must suppress retransmit to maintain credits */ if (req->rl_connect_cookie == xprt->connect_cookie) goto drop_connection; req->rl_connect_cookie = xprt->connect_cookie; if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req)) goto drop_connection; rqst->rq_xmit_bytes_sent += rqst->rq_snd_buf.len; rqst->rq_bytes_sent = 0; return 0; failed_marshal: r_xprt->rx_stats.failed_marshal_count++; dprintk("RPC: %s: rpcrdma_marshal_req failed, status %i\n", __func__, rc); if (rc == -EIO) return -EIO; drop_connection: xprt_disconnect_done(xprt); return -ENOTCONN; /* implies disconnect */ }
static int rpcrdma_bc_send_request(struct svcxprt_rdma *rdma, struct rpc_rqst *rqst) { struct rpc_xprt *xprt = rqst->rq_xprt; struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); struct svc_rdma_send_ctxt *ctxt; __be32 *p; int rc; ctxt = svc_rdma_send_ctxt_get(rdma); if (!ctxt) goto drop_connection; p = ctxt->sc_xprt_buf; *p++ = rqst->rq_xid; *p++ = rpcrdma_version; *p++ = cpu_to_be32(r_xprt->rx_buf.rb_bc_max_requests); *p++ = rdma_msg; *p++ = xdr_zero; *p++ = xdr_zero; *p = xdr_zero; svc_rdma_sync_reply_hdr(rdma, ctxt, RPCRDMA_HDRLEN_MIN); #ifdef SVCRDMA_BACKCHANNEL_DEBUG pr_info("%s: %*ph\n", __func__, 64, rqst->rq_buffer); #endif rc = svc_rdma_bc_sendto(rdma, rqst, ctxt); if (rc) { svc_rdma_send_ctxt_put(rdma, ctxt); goto drop_connection; } return 0; drop_connection: dprintk("svcrdma: failed to send bc call\n"); return -ENOTCONN; }
/* * xprt_rdma_destroy * * Destroy the xprt. * Free all memory associated with the object, including its own. * NOTE: none of the *destroy methods free memory for their top-level * objects, even though they may have allocated it (they do free * private memory). It's up to the caller to handle it. In this * case (RDMA transport), all structure memory is inlined with the * struct rpcrdma_xprt. */ static void xprt_rdma_destroy(struct rpc_xprt *xprt) { struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); dprintk("RPC: %s: called\n", __func__); cancel_delayed_work_sync(&r_xprt->rx_connect_worker); xprt_clear_connected(xprt); rpcrdma_buffer_destroy(&r_xprt->rx_buf); rpcrdma_ep_destroy(&r_xprt->rx_ep, &r_xprt->rx_ia); rpcrdma_ia_close(&r_xprt->rx_ia); xprt_rdma_free_addresses(xprt); xprt_free(xprt); dprintk("RPC: %s: returning\n", __func__); module_put(THIS_MODULE); }
static int rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event) { struct rpcrdma_xprt *xprt = id->context; struct rpcrdma_ia *ia = &xprt->rx_ia; struct rpcrdma_ep *ep = &xprt->rx_ep; struct sockaddr_in *addr = (struct sockaddr_in *) &ep->rep_remote_addr; struct ib_qp_attr attr; struct ib_qp_init_attr iattr; int connstate = 0; switch (event->event) { case RDMA_CM_EVENT_ADDR_RESOLVED: case RDMA_CM_EVENT_ROUTE_RESOLVED: complete(&ia->ri_done); break; case RDMA_CM_EVENT_ADDR_ERROR: ia->ri_async_rc = -EHOSTUNREACH; dprintk("RPC: %s: CM address resolution error, ep 0x%p\n", __func__, ep); complete(&ia->ri_done); break; case RDMA_CM_EVENT_ROUTE_ERROR: ia->ri_async_rc = -ENETUNREACH; dprintk("RPC: %s: CM route resolution error, ep 0x%p\n", __func__, ep); complete(&ia->ri_done); break; case RDMA_CM_EVENT_ESTABLISHED: connstate = 1; ib_query_qp(ia->ri_id->qp, &attr, IB_QP_MAX_QP_RD_ATOMIC | IB_QP_MAX_DEST_RD_ATOMIC, &iattr); dprintk("RPC: %s: %d responder resources" " (%d initiator)\n", __func__, attr.max_dest_rd_atomic, attr.max_rd_atomic); goto connected; case RDMA_CM_EVENT_CONNECT_ERROR: connstate = -ENOTCONN; goto connected; case RDMA_CM_EVENT_UNREACHABLE: connstate = -ENETDOWN; goto connected; case RDMA_CM_EVENT_REJECTED: connstate = -ECONNREFUSED; goto connected; case RDMA_CM_EVENT_DISCONNECTED: connstate = -ECONNABORTED; goto connected; case RDMA_CM_EVENT_DEVICE_REMOVAL: connstate = -ENODEV; connected: dprintk("RPC: %s: %s: %u.%u.%u.%u:%u" " (ep 0x%p event 0x%x)\n", __func__, (event->event <= 11) ? conn[event->event] : "unknown connection error", NIPQUAD(addr->sin_addr.s_addr), ntohs(addr->sin_port), ep, event->event); atomic_set(&rpcx_to_rdmax(ep->rep_xprt)->rx_buf.rb_credits, 1); dprintk("RPC: %s: %sconnected\n", __func__, connstate > 0 ? "" : "dis"); ep->rep_connected = connstate; ep->rep_func(ep); wake_up_all(&ep->rep_connect_wait); break; default: ia->ri_async_rc = -EINVAL; dprintk("RPC: %s: unexpected CM event %X\n", __func__, event->event); complete(&ia->ri_done); break; } return 0; }
/** * xprt_setup_rdma - Set up transport to use RDMA * * @args: rpc transport arguments */ static struct rpc_xprt * xprt_setup_rdma(struct xprt_create *args) { struct rpcrdma_create_data_internal cdata; struct rpc_xprt *xprt; struct rpcrdma_xprt *new_xprt; struct rpcrdma_ep *new_ep; struct sockaddr_in *sin; int rc; if (args->addrlen > sizeof(xprt->addr)) { dprintk("RPC: %s: address too large\n", __func__); return ERR_PTR(-EBADF); } xprt = xprt_alloc(args->net, sizeof(struct rpcrdma_xprt), xprt_rdma_slot_table_entries, xprt_rdma_slot_table_entries); if (xprt == NULL) { dprintk("RPC: %s: couldn't allocate rpcrdma_xprt\n", __func__); return ERR_PTR(-ENOMEM); } /* 60 second timeout, no retries */ xprt->timeout = &xprt_rdma_default_timeout; xprt->bind_timeout = RPCRDMA_BIND_TO; xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO; xprt->idle_timeout = RPCRDMA_IDLE_DISC_TO; xprt->resvport = 0; /* privileged port not needed */ xprt->tsh_size = 0; /* RPC-RDMA handles framing */ xprt->ops = &xprt_rdma_procs; /* * Set up RDMA-specific connect data. */ /* Put server RDMA address in local cdata */ memcpy(&cdata.addr, args->dstaddr, args->addrlen); /* Ensure xprt->addr holds valid server TCP (not RDMA) * address, for any side protocols which peek at it */ xprt->prot = IPPROTO_TCP; xprt->addrlen = args->addrlen; memcpy(&xprt->addr, &cdata.addr, xprt->addrlen); sin = (struct sockaddr_in *)&cdata.addr; if (ntohs(sin->sin_port) != 0) xprt_set_bound(xprt); dprintk("RPC: %s: %pI4:%u\n", __func__, &sin->sin_addr.s_addr, ntohs(sin->sin_port)); /* Set max requests */ cdata.max_requests = xprt->max_reqs; /* Set some length limits */ cdata.rsize = RPCRDMA_MAX_SEGS * PAGE_SIZE; /* RDMA write max */ cdata.wsize = RPCRDMA_MAX_SEGS * PAGE_SIZE; /* RDMA read max */ cdata.inline_wsize = xprt_rdma_max_inline_write; if (cdata.inline_wsize > cdata.wsize) cdata.inline_wsize = cdata.wsize; cdata.inline_rsize = xprt_rdma_max_inline_read; if (cdata.inline_rsize > cdata.rsize) cdata.inline_rsize = cdata.rsize; cdata.padding = xprt_rdma_inline_write_padding; /* * Create new transport instance, which includes initialized * o ia * o endpoint * o buffers */ new_xprt = rpcx_to_rdmax(xprt); rc = rpcrdma_ia_open(new_xprt, (struct sockaddr *) &cdata.addr, xprt_rdma_memreg_strategy); if (rc) goto out1; /* * initialize and create ep */ new_xprt->rx_data = cdata; new_ep = &new_xprt->rx_ep; new_ep->rep_remote_addr = cdata.addr; rc = rpcrdma_ep_create(&new_xprt->rx_ep, &new_xprt->rx_ia, &new_xprt->rx_data); if (rc) goto out2; /* * Allocate pre-registered send and receive buffers for headers and * any inline data. Also specify any padding which will be provided * from a preregistered zero buffer. */ rc = rpcrdma_buffer_create(new_xprt); if (rc) goto out3; /* * Register a callback for connection events. This is necessary because * connection loss notification is async. We also catch connection loss * when reaping receives. */ INIT_DELAYED_WORK(&new_xprt->rx_connect_worker, xprt_rdma_connect_worker); xprt_rdma_format_addresses(xprt); xprt->max_payload = rpcrdma_max_payload(new_xprt); dprintk("RPC: %s: transport data payload maximum: %zu bytes\n", __func__, xprt->max_payload); if (!try_module_get(THIS_MODULE)) goto out4; return xprt; out4: xprt_rdma_free_addresses(xprt); rc = -EINVAL; out3: rpcrdma_ep_destroy(new_ep, &new_xprt->rx_ia); out2: rpcrdma_ia_close(&new_xprt->rx_ia); out1: xprt_free(xprt); return ERR_PTR(rc); }
/* * The RDMA allocate/free functions need the task structure as a place * to hide the struct rpcrdma_req, which is necessary for the actual send/recv * sequence. * * The RPC layer allocates both send and receive buffers in the same call * (rq_send_buf and rq_rcv_buf are both part of a single contiguous buffer). * We may register rq_rcv_buf when using reply chunks. */ static void * xprt_rdma_allocate(struct rpc_task *task, size_t size) { struct rpc_xprt *xprt = task->tk_rqstp->rq_xprt; struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); struct rpcrdma_regbuf *rb; struct rpcrdma_req *req; size_t min_size; gfp_t flags; req = rpcrdma_buffer_get(&r_xprt->rx_buf); if (req == NULL) return NULL; flags = GFP_NOIO | __GFP_NOWARN; if (RPC_IS_SWAPPER(task)) flags = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN; if (req->rl_rdmabuf == NULL) goto out_rdmabuf; if (req->rl_sendbuf == NULL) goto out_sendbuf; if (size > req->rl_sendbuf->rg_size) goto out_sendbuf; out: dprintk("RPC: %s: size %zd, request 0x%p\n", __func__, size, req); req->rl_connect_cookie = 0; /* our reserved value */ return req->rl_sendbuf->rg_base; out_rdmabuf: min_size = RPCRDMA_INLINE_WRITE_THRESHOLD(task->tk_rqstp); rb = rpcrdma_alloc_regbuf(&r_xprt->rx_ia, min_size, flags); if (IS_ERR(rb)) goto out_fail; req->rl_rdmabuf = rb; out_sendbuf: /* XDR encoding and RPC/RDMA marshaling of this request has not * yet occurred. Thus a lower bound is needed to prevent buffer * overrun during marshaling. * * RPC/RDMA marshaling may choose to send payload bearing ops * inline, if the result is smaller than the inline threshold. * The value of the "size" argument accounts for header * requirements but not for the payload in these cases. * * Likewise, allocate enough space to receive a reply up to the * size of the inline threshold. * * It's unlikely that both the send header and the received * reply will be large, but slush is provided here to allow * flexibility when marshaling. */ min_size = RPCRDMA_INLINE_READ_THRESHOLD(task->tk_rqstp); min_size += RPCRDMA_INLINE_WRITE_THRESHOLD(task->tk_rqstp); if (size < min_size) size = min_size; rb = rpcrdma_alloc_regbuf(&r_xprt->rx_ia, size, flags); if (IS_ERR(rb)) goto out_fail; rb->rg_owner = req; r_xprt->rx_stats.hardway_register_count += size; rpcrdma_free_regbuf(&r_xprt->rx_ia, req->rl_sendbuf); req->rl_sendbuf = rb; goto out; out_fail: rpcrdma_buffer_put(req); r_xprt->rx_stats.failed_marshal_count++; return NULL; }
int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, struct rpcrdma_msg *rmsgp, struct xdr_buf *rcvbuf) { struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); struct kvec *dst, *src = &rcvbuf->head[0]; struct rpc_rqst *req; unsigned long cwnd; u32 credits; size_t len; __be32 xid; __be32 *p; int ret; p = (__be32 *)src->iov_base; len = src->iov_len; xid = rmsgp->rm_xid; #ifdef SVCRDMA_BACKCHANNEL_DEBUG pr_info("%s: xid=%08x, length=%zu\n", __func__, be32_to_cpu(xid), len); pr_info("%s: RPC/RDMA: %*ph\n", __func__, (int)RPCRDMA_HDRLEN_MIN, rmsgp); pr_info("%s: RPC: %*ph\n", __func__, (int)len, p); #endif ret = -EAGAIN; if (src->iov_len < 24) goto out_shortreply; spin_lock_bh(&xprt->transport_lock); req = xprt_lookup_rqst(xprt, xid); if (!req) goto out_notfound; dst = &req->rq_private_buf.head[0]; memcpy(&req->rq_private_buf, &req->rq_rcv_buf, sizeof(struct xdr_buf)); if (dst->iov_len < len) goto out_unlock; memcpy(dst->iov_base, p, len); credits = be32_to_cpu(rmsgp->rm_credit); if (credits == 0) credits = 1; /* don't deadlock */ else if (credits > r_xprt->rx_buf.rb_bc_max_requests) credits = r_xprt->rx_buf.rb_bc_max_requests; cwnd = xprt->cwnd; xprt->cwnd = credits << RPC_CWNDSHIFT; if (xprt->cwnd > cwnd) xprt_release_rqst_cong(req->rq_task); ret = 0; xprt_complete_rqst(req->rq_task, rcvbuf->len); rcvbuf->len = 0; out_unlock: spin_unlock_bh(&xprt->transport_lock); out: return ret; out_shortreply: dprintk("svcrdma: short bc reply: xprt=%p, len=%zu\n", xprt, src->iov_len); goto out; out_notfound: dprintk("svcrdma: unrecognized bc reply: xprt=%p, xid=%08x\n", xprt, be32_to_cpu(xid)); goto out_unlock; }
/** * xprt_setup_rdma - Set up transport to use RDMA * * @args: rpc transport arguments */ static struct rpc_xprt * xprt_setup_rdma(struct xprt_create *args) { struct rpcrdma_create_data_internal cdata; struct rpc_xprt *xprt; struct rpcrdma_xprt *new_xprt; struct rpcrdma_ep *new_ep; struct sockaddr *sap; int rc; if (args->addrlen > sizeof(xprt->addr)) { dprintk("RPC: %s: address too large\n", __func__); return ERR_PTR(-EBADF); } xprt = xprt_alloc(args->net, sizeof(struct rpcrdma_xprt), 0, 0); if (xprt == NULL) { dprintk("RPC: %s: couldn't allocate rpcrdma_xprt\n", __func__); return ERR_PTR(-ENOMEM); } /* 60 second timeout, no retries */ xprt->timeout = &xprt_rdma_default_timeout; xprt->bind_timeout = RPCRDMA_BIND_TO; xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO; xprt->idle_timeout = RPCRDMA_IDLE_DISC_TO; xprt->resvport = 0; /* privileged port not needed */ xprt->tsh_size = 0; /* RPC-RDMA handles framing */ xprt->ops = &xprt_rdma_procs; /* * Set up RDMA-specific connect data. */ sap = args->dstaddr; /* Ensure xprt->addr holds valid server TCP (not RDMA) * address, for any side protocols which peek at it */ xprt->prot = IPPROTO_TCP; xprt->addrlen = args->addrlen; memcpy(&xprt->addr, sap, xprt->addrlen); if (rpc_get_port(sap)) xprt_set_bound(xprt); xprt_rdma_format_addresses(xprt, sap); cdata.max_requests = xprt_rdma_slot_table_entries; cdata.rsize = RPCRDMA_MAX_SEGS * PAGE_SIZE; /* RDMA write max */ cdata.wsize = RPCRDMA_MAX_SEGS * PAGE_SIZE; /* RDMA read max */ cdata.inline_wsize = xprt_rdma_max_inline_write; if (cdata.inline_wsize > cdata.wsize) cdata.inline_wsize = cdata.wsize; cdata.inline_rsize = xprt_rdma_max_inline_read; if (cdata.inline_rsize > cdata.rsize) cdata.inline_rsize = cdata.rsize; /* * Create new transport instance, which includes initialized * o ia * o endpoint * o buffers */ new_xprt = rpcx_to_rdmax(xprt); rc = rpcrdma_ia_open(new_xprt); if (rc) goto out1; /* * initialize and create ep */ new_xprt->rx_data = cdata; new_ep = &new_xprt->rx_ep; rc = rpcrdma_ep_create(&new_xprt->rx_ep, &new_xprt->rx_ia, &new_xprt->rx_data); if (rc) goto out2; rc = rpcrdma_buffer_create(new_xprt); if (rc) goto out3; INIT_DELAYED_WORK(&new_xprt->rx_connect_worker, xprt_rdma_connect_worker); xprt->max_payload = new_xprt->rx_ia.ri_ops->ro_maxpages(new_xprt); if (xprt->max_payload == 0) goto out4; xprt->max_payload <<= PAGE_SHIFT; dprintk("RPC: %s: transport data payload maximum: %zu bytes\n", __func__, xprt->max_payload); if (!try_module_get(THIS_MODULE)) goto out4; dprintk("RPC: %s: %s:%s\n", __func__, xprt->address_strings[RPC_DISPLAY_ADDR], xprt->address_strings[RPC_DISPLAY_PORT]); trace_xprtrdma_create(new_xprt); return xprt; out4: rpcrdma_buffer_destroy(&new_xprt->rx_buf); rc = -ENODEV; out3: rpcrdma_ep_destroy(new_ep, &new_xprt->rx_ia); out2: rpcrdma_ia_close(&new_xprt->rx_ia); out1: trace_xprtrdma_destroy(new_xprt); xprt_rdma_free_addresses(xprt); xprt_free(xprt); return ERR_PTR(rc); }
static int rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event) { struct rpcrdma_xprt *xprt = id->context; struct rpcrdma_ia *ia = &xprt->rx_ia; struct rpcrdma_ep *ep = &xprt->rx_ep; #ifdef RPC_DEBUG struct sockaddr_in *addr = (struct sockaddr_in *) &ep->rep_remote_addr; #endif struct ib_qp_attr attr; struct ib_qp_init_attr iattr; int connstate = 0; switch (event->event) { case RDMA_CM_EVENT_ADDR_RESOLVED: case RDMA_CM_EVENT_ROUTE_RESOLVED: ia->ri_async_rc = 0; complete(&ia->ri_done); break; case RDMA_CM_EVENT_ADDR_ERROR: ia->ri_async_rc = -EHOSTUNREACH; dprintk("RPC: %s: CM address resolution error, ep 0x%p\n", __func__, ep); complete(&ia->ri_done); break; case RDMA_CM_EVENT_ROUTE_ERROR: ia->ri_async_rc = -ENETUNREACH; dprintk("RPC: %s: CM route resolution error, ep 0x%p\n", __func__, ep); complete(&ia->ri_done); break; case RDMA_CM_EVENT_ESTABLISHED: connstate = 1; ib_query_qp(ia->ri_id->qp, &attr, IB_QP_MAX_QP_RD_ATOMIC | IB_QP_MAX_DEST_RD_ATOMIC, &iattr); dprintk("RPC: %s: %d responder resources" " (%d initiator)\n", __func__, attr.max_dest_rd_atomic, attr.max_rd_atomic); goto connected; case RDMA_CM_EVENT_CONNECT_ERROR: connstate = -ENOTCONN; goto connected; case RDMA_CM_EVENT_UNREACHABLE: connstate = -ENETDOWN; goto connected; case RDMA_CM_EVENT_REJECTED: connstate = -ECONNREFUSED; goto connected; case RDMA_CM_EVENT_DISCONNECTED: connstate = -ECONNABORTED; goto connected; case RDMA_CM_EVENT_DEVICE_REMOVAL: connstate = -ENODEV; connected: dprintk("RPC: %s: %s: %pI4:%u (ep 0x%p event 0x%x)\n", __func__, (event->event <= 11) ? conn[event->event] : "unknown connection error", &addr->sin_addr.s_addr, ntohs(addr->sin_port), ep, event->event); atomic_set(&rpcx_to_rdmax(ep->rep_xprt)->rx_buf.rb_credits, 1); dprintk("RPC: %s: %sconnected\n", __func__, connstate > 0 ? "" : "dis"); ep->rep_connected = connstate; ep->rep_func(ep); wake_up_all(&ep->rep_connect_wait); break; default: dprintk("RPC: %s: unexpected CM event %d\n", __func__, event->event); break; } #ifdef RPC_DEBUG if (connstate == 1) { int ird = attr.max_dest_rd_atomic; int tird = ep->rep_remote_cma.responder_resources; printk(KERN_INFO "rpcrdma: connection to %pI4:%u " "on %s, memreg %d slots %d ird %d%s\n", &addr->sin_addr.s_addr, ntohs(addr->sin_port), ia->ri_id->device->name, ia->ri_memreg_strategy, xprt->rx_buf.rb_max_requests, ird, ird < 4 && ird < tird / 2 ? " (low!)" : ""); } else if (connstate < 0) { printk(KERN_INFO "rpcrdma: connection to %pI4:%u closed (%d)\n", &addr->sin_addr.s_addr, ntohs(addr->sin_port), connstate); } #endif return 0; }