/* * Send Queue Completion Handler - potentially called on interrupt context. * * Note that caller must hold a transport reference. */ static void sq_cq_reap(struct svcxprt_rdma *xprt) { struct svc_rdma_op_ctxt *ctxt = NULL; struct ib_wc wc; struct ib_cq *cq = xprt->sc_sq_cq; int ret; if (!test_and_clear_bit(RDMAXPRT_SQ_PENDING, &xprt->sc_flags)) return; ib_req_notify_cq(xprt->sc_sq_cq, IB_CQ_NEXT_COMP); atomic_inc(&rdma_stat_sq_poll); while ((ret = ib_poll_cq(cq, 1, &wc)) > 0) { if (wc.status != IB_WC_SUCCESS) /* Close the transport */ set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); /* Decrement used SQ WR count */ atomic_dec(&xprt->sc_sq_count); wake_up(&xprt->sc_send_wait); ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id; if (ctxt) process_context(xprt, ctxt); svc_xprt_put(&xprt->sc_xprt); } if (ctxt) atomic_inc(&rdma_stat_sq_prod); }
/** * svc_rdma_wc_read - Invoked by RDMA provider for each polled Read WC * @cq: completion queue * @wc: completed WR * */ void svc_rdma_wc_read(struct ib_cq *cq, struct ib_wc *wc) { struct svcxprt_rdma *xprt = cq->cq_context; struct ib_cqe *cqe = wc->wr_cqe; struct svc_rdma_op_ctxt *ctxt; svc_rdma_send_wc_common(xprt, wc, "read"); ctxt = container_of(cqe, struct svc_rdma_op_ctxt, cqe); svc_rdma_unmap_dma(ctxt); svc_rdma_put_frmr(xprt, ctxt->frmr); if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) { struct svc_rdma_op_ctxt *read_hdr; read_hdr = ctxt->read_hdr; spin_lock(&xprt->sc_rq_dto_lock); list_add_tail(&read_hdr->dto_q, &xprt->sc_read_complete_q); spin_unlock(&xprt->sc_rq_dto_lock); set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags); svc_xprt_enqueue(&xprt->sc_xprt); } svc_rdma_put_context(ctxt, 0); svc_xprt_put(&xprt->sc_xprt); }
static void svc_rdma_send_wc_common_put(struct ib_cq *cq, struct ib_wc *wc, const char *opname) { struct svcxprt_rdma *xprt = cq->cq_context; svc_rdma_send_wc_common(xprt, wc, opname); svc_xprt_put(&xprt->sc_xprt); }
/* * rq_cq_reap - Process the RQ CQ. * * Take all completing WC off the CQE and enqueue the associated DTO * context on the dto_q for the transport. * * Note that caller must hold a transport reference. */ static void rq_cq_reap(struct svcxprt_rdma *xprt) { int ret; struct ib_wc wc; struct svc_rdma_op_ctxt *ctxt = NULL; if (!test_and_clear_bit(RDMAXPRT_RQ_PENDING, &xprt->sc_flags)) return; ib_req_notify_cq(xprt->sc_rq_cq, IB_CQ_NEXT_COMP); atomic_inc(&rdma_stat_rq_poll); while ((ret = ib_poll_cq(xprt->sc_rq_cq, 1, &wc)) > 0) { ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id; ctxt->wc_status = wc.status; ctxt->byte_len = wc.byte_len; svc_rdma_unmap_dma(ctxt); if (wc.status != IB_WC_SUCCESS) { /* Close the transport */ dprintk("svcrdma: transport closing putting ctxt %p\n", ctxt); set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); svc_rdma_put_context(ctxt, 1); svc_xprt_put(&xprt->sc_xprt); continue; } spin_lock_bh(&xprt->sc_rq_dto_lock); list_add_tail(&ctxt->dto_q, &xprt->sc_rq_dto_q); spin_unlock_bh(&xprt->sc_rq_dto_lock); svc_xprt_put(&xprt->sc_xprt); } if (ctxt) atomic_inc(&rdma_stat_rq_prod); set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags); /* * If data arrived before established event, * don't enqueue. This defers RPC I/O until the * RDMA connection is complete. */ if (!test_bit(RDMAXPRT_CONN_PENDING, &xprt->sc_flags)) svc_xprt_enqueue(&xprt->sc_xprt); }
static int svc_rdma_post_recv(struct svcxprt_rdma *xprt) { struct ib_recv_wr recv_wr, *bad_recv_wr; struct svc_rdma_op_ctxt *ctxt; struct page *page; dma_addr_t pa; int sge_no; int buflen; int ret; ctxt = svc_rdma_get_context(xprt); buflen = 0; ctxt->direction = DMA_FROM_DEVICE; ctxt->cqe.done = svc_rdma_wc_receive; for (sge_no = 0; buflen < xprt->sc_max_req_size; sge_no++) { if (sge_no >= xprt->sc_max_sge) { pr_err("svcrdma: Too many sges (%d)\n", sge_no); goto err_put_ctxt; } page = alloc_page(GFP_KERNEL); if (!page) goto err_put_ctxt; ctxt->pages[sge_no] = page; pa = ib_dma_map_page(xprt->sc_cm_id->device, page, 0, PAGE_SIZE, DMA_FROM_DEVICE); if (ib_dma_mapping_error(xprt->sc_cm_id->device, pa)) goto err_put_ctxt; svc_rdma_count_mappings(xprt, ctxt); ctxt->sge[sge_no].addr = pa; ctxt->sge[sge_no].length = PAGE_SIZE; ctxt->sge[sge_no].lkey = xprt->sc_pd->local_dma_lkey; ctxt->count = sge_no + 1; buflen += PAGE_SIZE; } recv_wr.next = NULL; recv_wr.sg_list = &ctxt->sge[0]; recv_wr.num_sge = ctxt->count; recv_wr.wr_cqe = &ctxt->cqe; svc_xprt_get(&xprt->sc_xprt); ret = ib_post_recv(xprt->sc_qp, &recv_wr, &bad_recv_wr); if (ret) { svc_rdma_unmap_dma(ctxt); svc_rdma_put_context(ctxt, 1); svc_xprt_put(&xprt->sc_xprt); } return ret; err_put_ctxt: svc_rdma_unmap_dma(ctxt); svc_rdma_put_context(ctxt, 1); return -ENOMEM; }
static int rdma_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) { struct svc_xprt *xprt = cma_id->context; struct svcxprt_rdma *rdma = container_of(xprt, struct svcxprt_rdma, sc_xprt); switch (event->event) { case RDMA_CM_EVENT_ESTABLISHED: /* Accept complete */ svc_xprt_get(xprt); dprintk("svcrdma: Connection completed on DTO xprt=%p, " "cm_id=%p\n", xprt, cma_id); clear_bit(RDMAXPRT_CONN_PENDING, &rdma->sc_flags); svc_xprt_enqueue(xprt); break; case RDMA_CM_EVENT_DISCONNECTED: dprintk("svcrdma: Disconnect on DTO xprt=%p, cm_id=%p\n", xprt, cma_id); if (xprt) { set_bit(XPT_CLOSE, &xprt->xpt_flags); svc_xprt_enqueue(xprt); svc_xprt_put(xprt); } break; case RDMA_CM_EVENT_DEVICE_REMOVAL: dprintk("svcrdma: Device removal cma_id=%p, xprt = %p, " "event = %s (%d)\n", cma_id, xprt, rdma_event_msg(event->event), event->event); if (xprt) { set_bit(XPT_CLOSE, &xprt->xpt_flags); svc_xprt_enqueue(xprt); svc_xprt_put(xprt); } break; default: dprintk("svcrdma: Unexpected event on DTO endpoint %p, " "event = %s (%d)\n", cma_id, rdma_event_msg(event->event), event->event); break; } return 0; }
static int create_lockd_listener(struct svc_serv *serv, const char *name, const int family, const unsigned short port) { struct svc_xprt *xprt; xprt = svc_find_xprt(serv, name, family, 0); if (xprt == NULL) return svc_create_xprt(serv, name, family, port, SVC_SOCK_DEFAULTS); svc_xprt_put(xprt); return 0; }
int svc_rdma_post_recv(struct svcxprt_rdma *xprt) { struct ib_recv_wr recv_wr, *bad_recv_wr; struct svc_rdma_op_ctxt *ctxt; struct page *page; dma_addr_t pa; int sge_no; int buflen; int ret; ctxt = svc_rdma_get_context(xprt); buflen = 0; ctxt->direction = DMA_FROM_DEVICE; for (sge_no = 0; buflen < xprt->sc_max_req_size; sge_no++) { BUG_ON(sge_no >= xprt->sc_max_sge); page = svc_rdma_get_page(); ctxt->pages[sge_no] = page; pa = ib_dma_map_page(xprt->sc_cm_id->device, page, 0, PAGE_SIZE, DMA_FROM_DEVICE); if (ib_dma_mapping_error(xprt->sc_cm_id->device, pa)) goto err_put_ctxt; atomic_inc(&xprt->sc_dma_used); ctxt->sge[sge_no].addr = pa; ctxt->sge[sge_no].length = PAGE_SIZE; ctxt->sge[sge_no].lkey = xprt->sc_dma_lkey; ctxt->count = sge_no + 1; buflen += PAGE_SIZE; } recv_wr.next = NULL; recv_wr.sg_list = &ctxt->sge[0]; recv_wr.num_sge = ctxt->count; recv_wr.wr_id = (u64)(unsigned long)ctxt; svc_xprt_get(&xprt->sc_xprt); ret = ib_post_recv(xprt->sc_qp, &recv_wr, &bad_recv_wr); if (ret) { svc_rdma_unmap_dma(ctxt); svc_rdma_put_context(ctxt, 1); svc_xprt_put(&xprt->sc_xprt); } return ret; err_put_ctxt: svc_rdma_unmap_dma(ctxt); svc_rdma_put_context(ctxt, 1); return -ENOMEM; }
static int __svc_rdma_post_recv(struct svcxprt_rdma *rdma, struct svc_rdma_recv_ctxt *ctxt) { int ret; svc_xprt_get(&rdma->sc_xprt); ret = ib_post_recv(rdma->sc_qp, &ctxt->rc_recv_wr, NULL); trace_svcrdma_post_recv(&ctxt->rc_recv_wr, ret); if (ret) goto err_post; return 0; err_post: svc_rdma_recv_ctxt_put(rdma, ctxt); svc_xprt_put(&rdma->sc_xprt); return ret; }
/* * Send Queue Completion Handler - potentially called on interrupt context. * * Note that caller must hold a transport reference. */ static void sq_cq_reap(struct svcxprt_rdma *xprt) { struct svc_rdma_op_ctxt *ctxt = NULL; struct ib_wc wc_a[6]; struct ib_wc *wc; struct ib_cq *cq = xprt->sc_sq_cq; int ret; memset(wc_a, 0, sizeof(wc_a)); if (!test_and_clear_bit(RDMAXPRT_SQ_PENDING, &xprt->sc_flags)) return; ib_req_notify_cq(xprt->sc_sq_cq, IB_CQ_NEXT_COMP); atomic_inc(&rdma_stat_sq_poll); while ((ret = ib_poll_cq(cq, ARRAY_SIZE(wc_a), wc_a)) > 0) { int i; for (i = 0; i < ret; i++) { wc = &wc_a[i]; if (wc->status != IB_WC_SUCCESS) { dprintk("svcrdma: sq wc err status %s (%d)\n", ib_wc_status_msg(wc->status), wc->status); /* Close the transport */ set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); } /* Decrement used SQ WR count */ atomic_dec(&xprt->sc_sq_count); wake_up(&xprt->sc_send_wait); ctxt = (struct svc_rdma_op_ctxt *) (unsigned long)wc->wr_id; if (ctxt) process_context(xprt, ctxt); svc_xprt_put(&xprt->sc_xprt); } } if (ctxt) atomic_inc(&rdma_stat_sq_prod); }
/** * svc_rdma_wc_receive - Invoked by RDMA provider for each polled Receive WC * @cq: Completion Queue context * @wc: Work Completion object * * NB: The svc_xprt/svcxprt_rdma is pinned whenever it's possible that * the Receive completion handler could be running. */ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc) { struct svcxprt_rdma *rdma = cq->cq_context; struct ib_cqe *cqe = wc->wr_cqe; struct svc_rdma_recv_ctxt *ctxt; trace_svcrdma_wc_receive(wc); /* WARNING: Only wc->wr_cqe and wc->status are reliable */ ctxt = container_of(cqe, struct svc_rdma_recv_ctxt, rc_cqe); if (wc->status != IB_WC_SUCCESS) goto flushed; if (svc_rdma_post_recv(rdma)) goto post_err; /* All wc fields are now known to be valid */ ctxt->rc_byte_len = wc->byte_len; ib_dma_sync_single_for_cpu(rdma->sc_pd->device, ctxt->rc_recv_sge.addr, wc->byte_len, DMA_FROM_DEVICE); spin_lock(&rdma->sc_rq_dto_lock); list_add_tail(&ctxt->rc_list, &rdma->sc_rq_dto_q); spin_unlock(&rdma->sc_rq_dto_lock); set_bit(XPT_DATA, &rdma->sc_xprt.xpt_flags); if (!test_bit(RDMAXPRT_CONN_PENDING, &rdma->sc_flags)) svc_xprt_enqueue(&rdma->sc_xprt); goto out; flushed: if (wc->status != IB_WC_WR_FLUSH_ERR) pr_err("svcrdma: Recv: %s (%u/0x%x)\n", ib_wc_status_msg(wc->status), wc->status, wc->vendor_err); post_err: svc_rdma_recv_ctxt_put(rdma, ctxt); set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags); svc_xprt_enqueue(&rdma->sc_xprt); out: svc_xprt_put(&rdma->sc_xprt); }
/* * A transport listener is removed by writing a "-", it's transport * name, and it's port number. */ static ssize_t __write_ports_delxprt(char *buf) { struct svc_xprt *xprt; char transport[16]; int port; if (sscanf(&buf[1], "%15s %4u", transport, &port) != 2) return -EINVAL; if (port < 1 || port > USHRT_MAX || nfsd_serv == NULL) return -EINVAL; xprt = svc_find_xprt(nfsd_serv, transport, AF_UNSPEC, port); if (xprt == NULL) return -ENOTCONN; svc_close_xprt(xprt); svc_xprt_put(xprt); return 0; }
/* * Data Transfer Operation Tasklet * * Walks a list of transports with I/O pending, removing entries as * they are added to the server's I/O pending list. Two bits indicate * if SQ, RQ, or both have I/O pending. The dto_lock is an irqsave * spinlock that serializes access to the transport list with the RQ * and SQ interrupt handlers. */ static void dto_tasklet_func(unsigned long data) { struct svcxprt_rdma *xprt; unsigned long flags; spin_lock_irqsave(&dto_lock, flags); while (!list_empty(&dto_xprt_q)) { xprt = list_entry(dto_xprt_q.next, struct svcxprt_rdma, sc_dto_q); list_del_init(&xprt->sc_dto_q); spin_unlock_irqrestore(&dto_lock, flags); rq_cq_reap(xprt); sq_cq_reap(xprt); svc_xprt_put(&xprt->sc_xprt); spin_lock_irqsave(&dto_lock, flags); } spin_unlock_irqrestore(&dto_lock, flags); }
static ssize_t __write_ports_addxprt(char *buf) { char transport[16]; struct svc_xprt *xprt; int port, err; struct net *net = &init_net; if (sscanf(buf, "%15s %4u", transport, &port) != 2) return -EINVAL; if (port < 1 || port > USHRT_MAX) return -EINVAL; err = nfsd_create_serv(); if (err != 0) return err; err = svc_create_xprt(nfsd_serv, transport, net, PF_INET, port, SVC_SOCK_ANONYMOUS); if (err < 0) goto out_err; err = svc_create_xprt(nfsd_serv, transport, net, PF_INET6, port, SVC_SOCK_ANONYMOUS); if (err < 0 && err != -EAFNOSUPPORT) goto out_close; nfsd_serv->sv_nrthreads--; return 0; out_close: xprt = svc_find_xprt(nfsd_serv, transport, net, PF_INET, port); if (xprt != NULL) { svc_close_xprt(xprt); svc_xprt_put(xprt); } out_err: nfsd_destroy(net); return err; }
/** * svc_rdma_wc_receive - Invoked by RDMA provider for each polled Receive WC * @cq: completion queue * @wc: completed WR * */ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc) { struct svcxprt_rdma *xprt = cq->cq_context; struct ib_cqe *cqe = wc->wr_cqe; struct svc_rdma_op_ctxt *ctxt; /* WARNING: Only wc->wr_cqe and wc->status are reliable */ ctxt = container_of(cqe, struct svc_rdma_op_ctxt, cqe); svc_rdma_unmap_dma(ctxt); if (wc->status != IB_WC_SUCCESS) goto flushed; /* All wc fields are now known to be valid */ ctxt->byte_len = wc->byte_len; spin_lock(&xprt->sc_rq_dto_lock); list_add_tail(&ctxt->list, &xprt->sc_rq_dto_q); spin_unlock(&xprt->sc_rq_dto_lock); svc_rdma_post_recv(xprt); set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags); if (test_bit(RDMAXPRT_CONN_PENDING, &xprt->sc_flags)) goto out; goto out_enqueue; flushed: if (wc->status != IB_WC_WR_FLUSH_ERR) pr_err("svcrdma: Recv: %s (%u/0x%x)\n", ib_wc_status_msg(wc->status), wc->status, wc->vendor_err); set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); svc_rdma_put_context(ctxt, 1); out_enqueue: svc_xprt_enqueue(&xprt->sc_xprt); out: svc_xprt_put(&xprt->sc_xprt); }
/** * svc_rdma_wc_send - Invoked by RDMA provider for each polled Send WC * @cq: completion queue * @wc: completed WR * */ void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc) { struct svcxprt_rdma *xprt = cq->cq_context; struct ib_cqe *cqe = wc->wr_cqe; struct svc_rdma_op_ctxt *ctxt; atomic_inc(&xprt->sc_sq_avail); wake_up(&xprt->sc_send_wait); ctxt = container_of(cqe, struct svc_rdma_op_ctxt, cqe); svc_rdma_unmap_dma(ctxt); svc_rdma_put_context(ctxt, 1); if (unlikely(wc->status != IB_WC_SUCCESS)) { set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); svc_xprt_enqueue(&xprt->sc_xprt); if (wc->status != IB_WC_WR_FLUSH_ERR) pr_err("svcrdma: Send: %s (%u/0x%x)\n", ib_wc_status_msg(wc->status), wc->status, wc->vendor_err); } svc_xprt_put(&xprt->sc_xprt); }
/* * Send Queue Completion Handler - potentially called on interrupt context. * * Note that caller must hold a transport reference. */ static void sq_cq_reap(struct svcxprt_rdma *xprt) { struct svc_rdma_op_ctxt *ctxt = NULL; struct ib_wc wc; struct ib_cq *cq = xprt->sc_sq_cq; int ret; if (!test_and_clear_bit(RDMAXPRT_SQ_PENDING, &xprt->sc_flags)) return; ib_req_notify_cq(xprt->sc_sq_cq, IB_CQ_NEXT_COMP); atomic_inc(&rdma_stat_sq_poll); while ((ret = ib_poll_cq(cq, 1, &wc)) > 0) { ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id; xprt = ctxt->xprt; svc_rdma_unmap_dma(ctxt); if (wc.status != IB_WC_SUCCESS) /* Close the transport */ set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); /* Decrement used SQ WR count */ atomic_dec(&xprt->sc_sq_count); wake_up(&xprt->sc_send_wait); switch (ctxt->wr_op) { case IB_WR_SEND: svc_rdma_put_context(ctxt, 1); break; case IB_WR_RDMA_WRITE: svc_rdma_put_context(ctxt, 0); break; case IB_WR_RDMA_READ: if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) { struct svc_rdma_op_ctxt *read_hdr = ctxt->read_hdr; BUG_ON(!read_hdr); spin_lock_bh(&xprt->sc_rq_dto_lock); set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags); list_add_tail(&read_hdr->dto_q, &xprt->sc_read_complete_q); spin_unlock_bh(&xprt->sc_rq_dto_lock); svc_xprt_enqueue(&xprt->sc_xprt); } svc_rdma_put_context(ctxt, 0); break; default: printk(KERN_ERR "svcrdma: unexpected completion type, " "opcode=%d, status=%d\n", wc.opcode, wc.status); break; } svc_xprt_put(&xprt->sc_xprt); } if (ctxt) atomic_inc(&rdma_stat_sq_prod); }