/* Send a backwards direction RPC call. * * Caller holds the connection's mutex and has already marshaled * the RPC/RDMA request. * * This is similar to svc_rdma_reply, but takes an rpc_rqst * instead, does not support chunks, and avoids blocking memory * allocation. * * XXX: There is still an opportunity to block in svc_rdma_send() * if there are no SQ entries to post the Send. This may occur if * the adapter has a small maximum SQ depth. */ static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma, struct rpc_rqst *rqst) { struct xdr_buf *sndbuf = &rqst->rq_snd_buf; struct svc_rdma_op_ctxt *ctxt; struct svc_rdma_req_map *vec; struct ib_send_wr send_wr; int ret; vec = svc_rdma_get_req_map(rdma); ret = svc_rdma_map_xdr(rdma, sndbuf, vec, false); if (ret) goto out_err; ret = svc_rdma_repost_recv(rdma, GFP_NOIO); if (ret) goto out_err; ctxt = svc_rdma_get_context(rdma); ctxt->pages[0] = virt_to_page(rqst->rq_buffer); ctxt->count = 1; ctxt->direction = DMA_TO_DEVICE; ctxt->sge[0].lkey = rdma->sc_pd->local_dma_lkey; ctxt->sge[0].length = sndbuf->len; ctxt->sge[0].addr = ib_dma_map_page(rdma->sc_cm_id->device, ctxt->pages[0], 0, sndbuf->len, DMA_TO_DEVICE); if (ib_dma_mapping_error(rdma->sc_cm_id->device, ctxt->sge[0].addr)) { ret = -EIO; goto out_unmap; } svc_rdma_count_mappings(rdma, ctxt); memset(&send_wr, 0, sizeof(send_wr)); ctxt->cqe.done = svc_rdma_wc_send; send_wr.wr_cqe = &ctxt->cqe; send_wr.sg_list = ctxt->sge; send_wr.num_sge = 1; send_wr.opcode = IB_WR_SEND; send_wr.send_flags = IB_SEND_SIGNALED; ret = svc_rdma_send(rdma, &send_wr); if (ret) { ret = -EIO; goto out_unmap; } out_err: svc_rdma_put_req_map(rdma, vec); dprintk("svcrdma: %s returns %d\n", __func__, ret); return ret; out_unmap: svc_rdma_unmap_dma(ctxt); svc_rdma_put_context(ctxt, 1); goto out_err; }
/* * Use RDMA_READ to read data from the advertised client buffer into the * XDR stream starting at rq_arg.head[0].iov_base. * Each chunk in the array * contains the following fields: * discrim - '1', This isn't used for data placement * position - The xdr stream offset (the same for every chunk) * handle - RMR for client memory region * length - data transfer length * offset - 64 bit tagged offset in remote memory region * * On our side, we need to read into a pagelist. The first page immediately * follows the RPC header. * * This function returns: * 0 - No error and no read-list found. * * 1 - Successful read-list processing. The data is not yet in * the pagelist and therefore the RPC request must be deferred. The * I/O completion will enqueue the transport again and * svc_rdma_recvfrom will complete the request. * * <0 - Error processing/posting read-list. * * NOTE: The ctxt must not be touched after the last WR has been posted * because the I/O completion processing may occur on another * processor and free / modify the context. Ne touche pas! */ static int rdma_read_xdr(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp, struct svc_rqst *rqstp, struct svc_rdma_op_ctxt *hdr_ctxt) { struct ib_send_wr read_wr; struct ib_send_wr inv_wr; int err = 0; int ch_no; int ch_count; int byte_count; int sge_count; u64 sgl_offset; struct rpcrdma_read_chunk *ch; struct svc_rdma_op_ctxt *ctxt = NULL; struct svc_rdma_req_map *rpl_map; struct svc_rdma_req_map *chl_map; /* If no read list is present, return 0 */ ch = svc_rdma_get_read_chunk(rmsgp); if (!ch) return 0; svc_rdma_rcl_chunk_counts(ch, &ch_count, &byte_count); if (ch_count > RPCSVC_MAXPAGES) return -EINVAL; /* Allocate temporary reply and chunk maps */ rpl_map = svc_rdma_get_req_map(); chl_map = svc_rdma_get_req_map(); if (!xprt->sc_frmr_pg_list_len) sge_count = map_read_chunks(xprt, rqstp, hdr_ctxt, rmsgp, rpl_map, chl_map, ch_count, byte_count); else sge_count = fast_reg_read_chunks(xprt, rqstp, hdr_ctxt, rmsgp, rpl_map, chl_map, ch_count, byte_count); if (sge_count < 0) { err = -EIO; goto out; } sgl_offset = 0; ch_no = 0; for (ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0]; ch->rc_discrim != 0; ch++, ch_no++) { next_sge: ctxt = svc_rdma_get_context(xprt); ctxt->direction = DMA_FROM_DEVICE; ctxt->frmr = hdr_ctxt->frmr; ctxt->read_hdr = NULL; clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); clear_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags); /* Prepare READ WR */ memset(&read_wr, 0, sizeof read_wr); read_wr.wr_id = (unsigned long)ctxt; read_wr.opcode = IB_WR_RDMA_READ; ctxt->wr_op = read_wr.opcode; read_wr.send_flags = IB_SEND_SIGNALED; read_wr.wr.rdma.rkey = ch->rc_target.rs_handle; read_wr.wr.rdma.remote_addr = get_unaligned(&(ch->rc_target.rs_offset)) + sgl_offset; read_wr.sg_list = ctxt->sge; read_wr.num_sge = rdma_read_max_sge(xprt, chl_map->ch[ch_no].count); err = rdma_set_ctxt_sge(xprt, ctxt, hdr_ctxt->frmr, &rpl_map->sge[chl_map->ch[ch_no].start], &sgl_offset, read_wr.num_sge); if (err) { svc_rdma_unmap_dma(ctxt); svc_rdma_put_context(ctxt, 0); goto out; } if (((ch+1)->rc_discrim == 0) && (read_wr.num_sge == chl_map->ch[ch_no].count)) { /* * Mark the last RDMA_READ with a bit to * indicate all RPC data has been fetched from * the client and the RPC needs to be enqueued. */ set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); if (hdr_ctxt->frmr) { set_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags); /* * Invalidate the local MR used to map the data * sink. */ if (xprt->sc_dev_caps & SVCRDMA_DEVCAP_READ_W_INV) { read_wr.opcode = IB_WR_RDMA_READ_WITH_INV; ctxt->wr_op = read_wr.opcode; read_wr.ex.invalidate_rkey = ctxt->frmr->mr->lkey; } else { /* Prepare INVALIDATE WR */ memset(&inv_wr, 0, sizeof inv_wr); inv_wr.opcode = IB_WR_LOCAL_INV; inv_wr.send_flags = IB_SEND_SIGNALED; inv_wr.ex.invalidate_rkey = hdr_ctxt->frmr->mr->lkey; read_wr.next = &inv_wr; } } ctxt->read_hdr = hdr_ctxt; } /* Post the read */ err = svc_rdma_send(xprt, &read_wr); if (err) { printk(KERN_ERR "svcrdma: Error %d posting RDMA_READ\n", err); set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); svc_rdma_unmap_dma(ctxt); svc_rdma_put_context(ctxt, 0); goto out; } atomic_inc(&rdma_stat_read); if (read_wr.num_sge < chl_map->ch[ch_no].count) { chl_map->ch[ch_no].count -= read_wr.num_sge; chl_map->ch[ch_no].start += read_wr.num_sge; goto next_sge; } sgl_offset = 0; err = 1; } out: svc_rdma_put_req_map(rpl_map); svc_rdma_put_req_map(chl_map); /* Detach arg pages. svc_recv will replenish them */ for (ch_no = 0; &rqstp->rq_pages[ch_no] < rqstp->rq_respages; ch_no++) rqstp->rq_pages[ch_no] = NULL; /* * Detach res pages. svc_release must see a resused count of * zero or it will attempt to put them. */ while (rqstp->rq_resused) rqstp->rq_respages[--rqstp->rq_resused] = NULL; return err; }
int svc_rdma_sendto(struct svc_rqst *rqstp) { struct svc_xprt *xprt = rqstp->rq_xprt; struct svcxprt_rdma *rdma = container_of(xprt, struct svcxprt_rdma, sc_xprt); struct rpcrdma_msg *rdma_argp; struct rpcrdma_msg *rdma_resp; struct rpcrdma_write_array *reply_ary; enum rpcrdma_proc reply_type; int ret; int inline_bytes; struct page *res_page; struct svc_rdma_op_ctxt *ctxt; struct svc_rdma_req_map *vec; dprintk("svcrdma: sending response for rqstp=%p\n", rqstp); /* Get the RDMA request header. */ rdma_argp = xdr_start(&rqstp->rq_arg); /* Build an req vec for the XDR */ ctxt = svc_rdma_get_context(rdma); ctxt->direction = DMA_TO_DEVICE; vec = svc_rdma_get_req_map(); ret = map_xdr(rdma, &rqstp->rq_res, vec); if (ret) goto err0; inline_bytes = rqstp->rq_res.len; /* Create the RDMA response header */ res_page = svc_rdma_get_page(); rdma_resp = page_address(res_page); reply_ary = svc_rdma_get_reply_array(rdma_argp); if (reply_ary) reply_type = RDMA_NOMSG; else reply_type = RDMA_MSG; svc_rdma_xdr_encode_reply_header(rdma, rdma_argp, rdma_resp, reply_type); /* Send any write-chunk data and build resp write-list */ ret = send_write_chunks(rdma, rdma_argp, rdma_resp, rqstp, vec); if (ret < 0) { printk(KERN_ERR "svcrdma: failed to send write chunks, rc=%d\n", ret); goto err1; } inline_bytes -= ret; /* Send any reply-list data and update resp reply-list */ ret = send_reply_chunks(rdma, rdma_argp, rdma_resp, rqstp, vec); if (ret < 0) { printk(KERN_ERR "svcrdma: failed to send reply chunks, rc=%d\n", ret); goto err1; } inline_bytes -= ret; ret = send_reply(rdma, rqstp, res_page, rdma_resp, ctxt, vec, inline_bytes); svc_rdma_put_req_map(vec); dprintk("svcrdma: send_reply returns %d\n", ret); return ret; err1: put_page(res_page); err0: svc_rdma_put_req_map(vec); svc_rdma_put_context(ctxt, 0); return ret; }
static int rdma_read_xdr(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp, struct svc_rqst *rqstp, struct svc_rdma_op_ctxt *hdr_ctxt) { struct ib_send_wr read_wr; struct ib_send_wr inv_wr; int err = 0; int ch_no; int ch_count; int byte_count; int sge_count; u64 sgl_offset; struct rpcrdma_read_chunk *ch; struct svc_rdma_op_ctxt *ctxt = NULL; struct svc_rdma_req_map *rpl_map; struct svc_rdma_req_map *chl_map; /* */ ch = svc_rdma_get_read_chunk(rmsgp); if (!ch) return 0; svc_rdma_rcl_chunk_counts(ch, &ch_count, &byte_count); if (ch_count > RPCSVC_MAXPAGES) return -EINVAL; /* */ rpl_map = svc_rdma_get_req_map(); chl_map = svc_rdma_get_req_map(); if (!xprt->sc_frmr_pg_list_len) sge_count = map_read_chunks(xprt, rqstp, hdr_ctxt, rmsgp, rpl_map, chl_map, ch_count, byte_count); else sge_count = fast_reg_read_chunks(xprt, rqstp, hdr_ctxt, rmsgp, rpl_map, chl_map, ch_count, byte_count); if (sge_count < 0) { err = -EIO; goto out; } sgl_offset = 0; ch_no = 0; for (ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0]; ch->rc_discrim != 0; ch++, ch_no++) { u64 rs_offset; next_sge: ctxt = svc_rdma_get_context(xprt); ctxt->direction = DMA_FROM_DEVICE; ctxt->frmr = hdr_ctxt->frmr; ctxt->read_hdr = NULL; clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); clear_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags); /* */ memset(&read_wr, 0, sizeof read_wr); read_wr.wr_id = (unsigned long)ctxt; read_wr.opcode = IB_WR_RDMA_READ; ctxt->wr_op = read_wr.opcode; read_wr.send_flags = IB_SEND_SIGNALED; read_wr.wr.rdma.rkey = ntohl(ch->rc_target.rs_handle); xdr_decode_hyper((__be32 *)&ch->rc_target.rs_offset, &rs_offset); read_wr.wr.rdma.remote_addr = rs_offset + sgl_offset; read_wr.sg_list = ctxt->sge; read_wr.num_sge = rdma_read_max_sge(xprt, chl_map->ch[ch_no].count); err = rdma_set_ctxt_sge(xprt, ctxt, hdr_ctxt->frmr, &rpl_map->sge[chl_map->ch[ch_no].start], &sgl_offset, read_wr.num_sge); if (err) { svc_rdma_unmap_dma(ctxt); svc_rdma_put_context(ctxt, 0); goto out; } if (((ch+1)->rc_discrim == 0) && (read_wr.num_sge == chl_map->ch[ch_no].count)) { /* */ set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); if (hdr_ctxt->frmr) { set_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags); /* */ if (xprt->sc_dev_caps & SVCRDMA_DEVCAP_READ_W_INV) { read_wr.opcode = IB_WR_RDMA_READ_WITH_INV; ctxt->wr_op = read_wr.opcode; read_wr.ex.invalidate_rkey = ctxt->frmr->mr->lkey; } else { /* */ memset(&inv_wr, 0, sizeof inv_wr); inv_wr.opcode = IB_WR_LOCAL_INV; inv_wr.send_flags = IB_SEND_SIGNALED; inv_wr.ex.invalidate_rkey = hdr_ctxt->frmr->mr->lkey; read_wr.next = &inv_wr; } } ctxt->read_hdr = hdr_ctxt; } /* */ err = svc_rdma_send(xprt, &read_wr); if (err) { printk(KERN_ERR "svcrdma: Error %d posting RDMA_READ\n", err); set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); svc_rdma_unmap_dma(ctxt); svc_rdma_put_context(ctxt, 0); goto out; } atomic_inc(&rdma_stat_read); if (read_wr.num_sge < chl_map->ch[ch_no].count) { chl_map->ch[ch_no].count -= read_wr.num_sge; chl_map->ch[ch_no].start += read_wr.num_sge; goto next_sge; } sgl_offset = 0; err = 1; } out: svc_rdma_put_req_map(rpl_map); svc_rdma_put_req_map(chl_map); /* */ for (ch_no = 0; &rqstp->rq_pages[ch_no] < rqstp->rq_respages; ch_no++) rqstp->rq_pages[ch_no] = NULL; /* */ while (rqstp->rq_resused) rqstp->rq_respages[--rqstp->rq_resused] = NULL; return err; }