/* * Use RDMA_READ to read data from the advertised client buffer into the * XDR stream starting at rq_arg.head[0].iov_base. * Each chunk in the array * contains the following fields: * discrim - '1', This isn't used for data placement * position - The xdr stream offset (the same for every chunk) * handle - RMR for client memory region * length - data transfer length * offset - 64 bit tagged offset in remote memory region * * On our side, we need to read into a pagelist. The first page immediately * follows the RPC header. * * This function returns: * 0 - No error and no read-list found. * * 1 - Successful read-list processing. The data is not yet in * the pagelist and therefore the RPC request must be deferred. The * I/O completion will enqueue the transport again and * svc_rdma_recvfrom will complete the request. * * <0 - Error processing/posting read-list. * * NOTE: The ctxt must not be touched after the last WR has been posted * because the I/O completion processing may occur on another * processor and free / modify the context. Ne touche pas! */ static int rdma_read_xdr(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp, struct svc_rqst *rqstp, struct svc_rdma_op_ctxt *hdr_ctxt) { struct ib_send_wr read_wr; struct ib_send_wr inv_wr; int err = 0; int ch_no; int ch_count; int byte_count; int sge_count; u64 sgl_offset; struct rpcrdma_read_chunk *ch; struct svc_rdma_op_ctxt *ctxt = NULL; struct svc_rdma_req_map *rpl_map; struct svc_rdma_req_map *chl_map; /* If no read list is present, return 0 */ ch = svc_rdma_get_read_chunk(rmsgp); if (!ch) return 0; svc_rdma_rcl_chunk_counts(ch, &ch_count, &byte_count); if (ch_count > RPCSVC_MAXPAGES) return -EINVAL; /* Allocate temporary reply and chunk maps */ rpl_map = svc_rdma_get_req_map(); chl_map = svc_rdma_get_req_map(); if (!xprt->sc_frmr_pg_list_len) sge_count = map_read_chunks(xprt, rqstp, hdr_ctxt, rmsgp, rpl_map, chl_map, ch_count, byte_count); else sge_count = fast_reg_read_chunks(xprt, rqstp, hdr_ctxt, rmsgp, rpl_map, chl_map, ch_count, byte_count); if (sge_count < 0) { err = -EIO; goto out; } sgl_offset = 0; ch_no = 0; for (ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0]; ch->rc_discrim != 0; ch++, ch_no++) { next_sge: ctxt = svc_rdma_get_context(xprt); ctxt->direction = DMA_FROM_DEVICE; ctxt->frmr = hdr_ctxt->frmr; ctxt->read_hdr = NULL; clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); clear_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags); /* Prepare READ WR */ memset(&read_wr, 0, sizeof read_wr); read_wr.wr_id = (unsigned long)ctxt; read_wr.opcode = IB_WR_RDMA_READ; ctxt->wr_op = read_wr.opcode; read_wr.send_flags = IB_SEND_SIGNALED; read_wr.wr.rdma.rkey = ch->rc_target.rs_handle; read_wr.wr.rdma.remote_addr = get_unaligned(&(ch->rc_target.rs_offset)) + sgl_offset; read_wr.sg_list = ctxt->sge; read_wr.num_sge = rdma_read_max_sge(xprt, chl_map->ch[ch_no].count); err = rdma_set_ctxt_sge(xprt, ctxt, hdr_ctxt->frmr, &rpl_map->sge[chl_map->ch[ch_no].start], &sgl_offset, read_wr.num_sge); if (err) { svc_rdma_unmap_dma(ctxt); svc_rdma_put_context(ctxt, 0); goto out; } if (((ch+1)->rc_discrim == 0) && (read_wr.num_sge == chl_map->ch[ch_no].count)) { /* * Mark the last RDMA_READ with a bit to * indicate all RPC data has been fetched from * the client and the RPC needs to be enqueued. */ set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); if (hdr_ctxt->frmr) { set_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags); /* * Invalidate the local MR used to map the data * sink. */ if (xprt->sc_dev_caps & SVCRDMA_DEVCAP_READ_W_INV) { read_wr.opcode = IB_WR_RDMA_READ_WITH_INV; ctxt->wr_op = read_wr.opcode; read_wr.ex.invalidate_rkey = ctxt->frmr->mr->lkey; } else { /* Prepare INVALIDATE WR */ memset(&inv_wr, 0, sizeof inv_wr); inv_wr.opcode = IB_WR_LOCAL_INV; inv_wr.send_flags = IB_SEND_SIGNALED; inv_wr.ex.invalidate_rkey = hdr_ctxt->frmr->mr->lkey; read_wr.next = &inv_wr; } } ctxt->read_hdr = hdr_ctxt; } /* Post the read */ err = svc_rdma_send(xprt, &read_wr); if (err) { printk(KERN_ERR "svcrdma: Error %d posting RDMA_READ\n", err); set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); svc_rdma_unmap_dma(ctxt); svc_rdma_put_context(ctxt, 0); goto out; } atomic_inc(&rdma_stat_read); if (read_wr.num_sge < chl_map->ch[ch_no].count) { chl_map->ch[ch_no].count -= read_wr.num_sge; chl_map->ch[ch_no].start += read_wr.num_sge; goto next_sge; } sgl_offset = 0; err = 1; } out: svc_rdma_put_req_map(rpl_map); svc_rdma_put_req_map(chl_map); /* Detach arg pages. svc_recv will replenish them */ for (ch_no = 0; &rqstp->rq_pages[ch_no] < rqstp->rq_respages; ch_no++) rqstp->rq_pages[ch_no] = NULL; /* * Detach res pages. svc_release must see a resused count of * zero or it will attempt to put them. */ while (rqstp->rq_resused) rqstp->rq_respages[--rqstp->rq_resused] = NULL; return err; }
static int rdma_read_chunks(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp, struct svc_rqst *rqstp, struct svc_rdma_op_ctxt *head) { int page_no, ret; struct rpcrdma_read_chunk *ch; u32 handle, page_offset, byte_count; u32 position; u64 rs_offset; bool last; /* If no read list is present, return 0 */ ch = svc_rdma_get_read_chunk(rmsgp); if (!ch) return 0; if (rdma_rcl_chunk_count(ch) > RPCSVC_MAXPAGES) return -EINVAL; /* The request is completed when the RDMA_READs complete. The * head context keeps all the pages that comprise the * request. */ head->arg.head[0] = rqstp->rq_arg.head[0]; head->arg.tail[0] = rqstp->rq_arg.tail[0]; head->hdr_count = head->count; head->arg.page_base = 0; head->arg.page_len = 0; head->arg.len = rqstp->rq_arg.len; head->arg.buflen = rqstp->rq_arg.buflen; ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0]; position = be32_to_cpu(ch->rc_position); /* RDMA_NOMSG: RDMA READ data should land just after RDMA RECV data */ if (position == 0) { head->arg.pages = &head->pages[0]; page_offset = head->byte_len; } else { head->arg.pages = &head->pages[head->count]; page_offset = 0; } ret = 0; page_no = 0; for (; ch->rc_discrim != xdr_zero; ch++) { if (be32_to_cpu(ch->rc_position) != position) goto err; handle = be32_to_cpu(ch->rc_target.rs_handle), byte_count = be32_to_cpu(ch->rc_target.rs_length); xdr_decode_hyper((__be32 *)&ch->rc_target.rs_offset, &rs_offset); while (byte_count > 0) { last = (ch + 1)->rc_discrim == xdr_zero; ret = xprt->sc_reader(xprt, rqstp, head, &page_no, &page_offset, handle, byte_count, rs_offset, last); if (ret < 0) goto err; byte_count -= ret; rs_offset += ret; head->arg.buflen += ret; } } /* Read list may need XDR round-up (see RFC 5666, s. 3.7) */ if (page_offset & 3) { u32 pad = 4 - (page_offset & 3); head->arg.page_len += pad; head->arg.len += pad; head->arg.buflen += pad; page_offset += pad; } ret = 1; if (position && position < head->arg.head[0].iov_len) ret = rdma_copy_tail(rqstp, head, position, byte_count, page_offset, page_no); head->arg.head[0].iov_len = position; head->position = position; err: /* Detach arg pages. svc_recv will replenish them */ for (page_no = 0; &rqstp->rq_pages[page_no] < rqstp->rq_respages; page_no++) rqstp->rq_pages[page_no] = NULL; return ret; }
static int rdma_read_chunks(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp, struct svc_rqst *rqstp, struct svc_rdma_op_ctxt *head) { int page_no, ch_count, ret; struct rpcrdma_read_chunk *ch; u32 page_offset, byte_count; u64 rs_offset; rdma_reader_fn reader; /* If no read list is present, return 0 */ ch = svc_rdma_get_read_chunk(rmsgp); if (!ch) return 0; svc_rdma_rcl_chunk_counts(ch, &ch_count, &byte_count); if (ch_count > RPCSVC_MAXPAGES) return -EINVAL; /* The request is completed when the RDMA_READs complete. The * head context keeps all the pages that comprise the * request. */ head->arg.head[0] = rqstp->rq_arg.head[0]; head->arg.tail[0] = rqstp->rq_arg.tail[0]; head->arg.pages = &head->pages[head->count]; head->hdr_count = head->count; head->arg.page_base = 0; head->arg.page_len = 0; head->arg.len = rqstp->rq_arg.len; head->arg.buflen = rqstp->rq_arg.buflen; /* Use FRMR if supported */ if (xprt->sc_dev_caps & SVCRDMA_DEVCAP_FAST_REG) reader = rdma_read_chunk_frmr; else reader = rdma_read_chunk_lcl; page_no = 0; page_offset = 0; for (ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0]; ch->rc_discrim != 0; ch++) { xdr_decode_hyper((__be32 *)&ch->rc_target.rs_offset, &rs_offset); byte_count = ntohl(ch->rc_target.rs_length); while (byte_count > 0) { ret = reader(xprt, rqstp, head, &page_no, &page_offset, ntohl(ch->rc_target.rs_handle), byte_count, rs_offset, ((ch+1)->rc_discrim == 0) /* last */ ); if (ret < 0) goto err; byte_count -= ret; rs_offset += ret; head->arg.buflen += ret; } } ret = 1; err: /* Detach arg pages. svc_recv will replenish them */ for (page_no = 0; &rqstp->rq_pages[page_no] < rqstp->rq_respages; page_no++) rqstp->rq_pages[page_no] = NULL; return ret; }
static int rdma_read_xdr(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp, struct svc_rqst *rqstp, struct svc_rdma_op_ctxt *hdr_ctxt) { struct ib_send_wr read_wr; struct ib_send_wr inv_wr; int err = 0; int ch_no; int ch_count; int byte_count; int sge_count; u64 sgl_offset; struct rpcrdma_read_chunk *ch; struct svc_rdma_op_ctxt *ctxt = NULL; struct svc_rdma_req_map *rpl_map; struct svc_rdma_req_map *chl_map; /* */ ch = svc_rdma_get_read_chunk(rmsgp); if (!ch) return 0; svc_rdma_rcl_chunk_counts(ch, &ch_count, &byte_count); if (ch_count > RPCSVC_MAXPAGES) return -EINVAL; /* */ rpl_map = svc_rdma_get_req_map(); chl_map = svc_rdma_get_req_map(); if (!xprt->sc_frmr_pg_list_len) sge_count = map_read_chunks(xprt, rqstp, hdr_ctxt, rmsgp, rpl_map, chl_map, ch_count, byte_count); else sge_count = fast_reg_read_chunks(xprt, rqstp, hdr_ctxt, rmsgp, rpl_map, chl_map, ch_count, byte_count); if (sge_count < 0) { err = -EIO; goto out; } sgl_offset = 0; ch_no = 0; for (ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0]; ch->rc_discrim != 0; ch++, ch_no++) { u64 rs_offset; next_sge: ctxt = svc_rdma_get_context(xprt); ctxt->direction = DMA_FROM_DEVICE; ctxt->frmr = hdr_ctxt->frmr; ctxt->read_hdr = NULL; clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); clear_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags); /* */ memset(&read_wr, 0, sizeof read_wr); read_wr.wr_id = (unsigned long)ctxt; read_wr.opcode = IB_WR_RDMA_READ; ctxt->wr_op = read_wr.opcode; read_wr.send_flags = IB_SEND_SIGNALED; read_wr.wr.rdma.rkey = ntohl(ch->rc_target.rs_handle); xdr_decode_hyper((__be32 *)&ch->rc_target.rs_offset, &rs_offset); read_wr.wr.rdma.remote_addr = rs_offset + sgl_offset; read_wr.sg_list = ctxt->sge; read_wr.num_sge = rdma_read_max_sge(xprt, chl_map->ch[ch_no].count); err = rdma_set_ctxt_sge(xprt, ctxt, hdr_ctxt->frmr, &rpl_map->sge[chl_map->ch[ch_no].start], &sgl_offset, read_wr.num_sge); if (err) { svc_rdma_unmap_dma(ctxt); svc_rdma_put_context(ctxt, 0); goto out; } if (((ch+1)->rc_discrim == 0) && (read_wr.num_sge == chl_map->ch[ch_no].count)) { /* */ set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); if (hdr_ctxt->frmr) { set_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags); /* */ if (xprt->sc_dev_caps & SVCRDMA_DEVCAP_READ_W_INV) { read_wr.opcode = IB_WR_RDMA_READ_WITH_INV; ctxt->wr_op = read_wr.opcode; read_wr.ex.invalidate_rkey = ctxt->frmr->mr->lkey; } else { /* */ memset(&inv_wr, 0, sizeof inv_wr); inv_wr.opcode = IB_WR_LOCAL_INV; inv_wr.send_flags = IB_SEND_SIGNALED; inv_wr.ex.invalidate_rkey = hdr_ctxt->frmr->mr->lkey; read_wr.next = &inv_wr; } } ctxt->read_hdr = hdr_ctxt; } /* */ err = svc_rdma_send(xprt, &read_wr); if (err) { printk(KERN_ERR "svcrdma: Error %d posting RDMA_READ\n", err); set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); svc_rdma_unmap_dma(ctxt); svc_rdma_put_context(ctxt, 0); goto out; } atomic_inc(&rdma_stat_read); if (read_wr.num_sge < chl_map->ch[ch_no].count) { chl_map->ch[ch_no].count -= read_wr.num_sge; chl_map->ch[ch_no].start += read_wr.num_sge; goto next_sge; } sgl_offset = 0; err = 1; } out: svc_rdma_put_req_map(rpl_map); svc_rdma_put_req_map(chl_map); /* */ for (ch_no = 0; &rqstp->rq_pages[ch_no] < rqstp->rq_respages; ch_no++) rqstp->rq_pages[ch_no] = NULL; /* */ while (rqstp->rq_resused) rqstp->rq_respages[--rqstp->rq_resused] = NULL; return err; }