/* * Use RDMA_READ to read data from the advertised client buffer into the * XDR stream starting at rq_arg.head[0].iov_base. * Each chunk in the array * contains the following fields: * discrim - '1', This isn't used for data placement * position - The xdr stream offset (the same for every chunk) * handle - RMR for client memory region * length - data transfer length * offset - 64 bit tagged offset in remote memory region * * On our side, we need to read into a pagelist. The first page immediately * follows the RPC header. * * This function returns: * 0 - No error and no read-list found. * * 1 - Successful read-list processing. The data is not yet in * the pagelist and therefore the RPC request must be deferred. The * I/O completion will enqueue the transport again and * svc_rdma_recvfrom will complete the request. * * <0 - Error processing/posting read-list. * * NOTE: The ctxt must not be touched after the last WR has been posted * because the I/O completion processing may occur on another * processor and free / modify the context. Ne touche pas! */ static int rdma_read_xdr(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp, struct svc_rqst *rqstp, struct svc_rdma_op_ctxt *hdr_ctxt) { struct ib_send_wr read_wr; struct ib_send_wr inv_wr; int err = 0; int ch_no; int ch_count; int byte_count; int sge_count; u64 sgl_offset; struct rpcrdma_read_chunk *ch; struct svc_rdma_op_ctxt *ctxt = NULL; struct svc_rdma_req_map *rpl_map; struct svc_rdma_req_map *chl_map; /* If no read list is present, return 0 */ ch = svc_rdma_get_read_chunk(rmsgp); if (!ch) return 0; svc_rdma_rcl_chunk_counts(ch, &ch_count, &byte_count); if (ch_count > RPCSVC_MAXPAGES) return -EINVAL; /* Allocate temporary reply and chunk maps */ rpl_map = svc_rdma_get_req_map(); chl_map = svc_rdma_get_req_map(); if (!xprt->sc_frmr_pg_list_len) sge_count = map_read_chunks(xprt, rqstp, hdr_ctxt, rmsgp, rpl_map, chl_map, ch_count, byte_count); else sge_count = fast_reg_read_chunks(xprt, rqstp, hdr_ctxt, rmsgp, rpl_map, chl_map, ch_count, byte_count); if (sge_count < 0) { err = -EIO; goto out; } sgl_offset = 0; ch_no = 0; for (ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0]; ch->rc_discrim != 0; ch++, ch_no++) { next_sge: ctxt = svc_rdma_get_context(xprt); ctxt->direction = DMA_FROM_DEVICE; ctxt->frmr = hdr_ctxt->frmr; ctxt->read_hdr = NULL; clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); clear_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags); /* Prepare READ WR */ memset(&read_wr, 0, sizeof read_wr); read_wr.wr_id = (unsigned long)ctxt; read_wr.opcode = IB_WR_RDMA_READ; ctxt->wr_op = read_wr.opcode; read_wr.send_flags = IB_SEND_SIGNALED; read_wr.wr.rdma.rkey = ch->rc_target.rs_handle; read_wr.wr.rdma.remote_addr = get_unaligned(&(ch->rc_target.rs_offset)) + sgl_offset; read_wr.sg_list = ctxt->sge; read_wr.num_sge = rdma_read_max_sge(xprt, chl_map->ch[ch_no].count); err = rdma_set_ctxt_sge(xprt, ctxt, hdr_ctxt->frmr, &rpl_map->sge[chl_map->ch[ch_no].start], &sgl_offset, read_wr.num_sge); if (err) { svc_rdma_unmap_dma(ctxt); svc_rdma_put_context(ctxt, 0); goto out; } if (((ch+1)->rc_discrim == 0) && (read_wr.num_sge == chl_map->ch[ch_no].count)) { /* * Mark the last RDMA_READ with a bit to * indicate all RPC data has been fetched from * the client and the RPC needs to be enqueued. */ set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); if (hdr_ctxt->frmr) { set_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags); /* * Invalidate the local MR used to map the data * sink. */ if (xprt->sc_dev_caps & SVCRDMA_DEVCAP_READ_W_INV) { read_wr.opcode = IB_WR_RDMA_READ_WITH_INV; ctxt->wr_op = read_wr.opcode; read_wr.ex.invalidate_rkey = ctxt->frmr->mr->lkey; } else { /* Prepare INVALIDATE WR */ memset(&inv_wr, 0, sizeof inv_wr); inv_wr.opcode = IB_WR_LOCAL_INV; inv_wr.send_flags = IB_SEND_SIGNALED; inv_wr.ex.invalidate_rkey = hdr_ctxt->frmr->mr->lkey; read_wr.next = &inv_wr; } } ctxt->read_hdr = hdr_ctxt; } /* Post the read */ err = svc_rdma_send(xprt, &read_wr); if (err) { printk(KERN_ERR "svcrdma: Error %d posting RDMA_READ\n", err); set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); svc_rdma_unmap_dma(ctxt); svc_rdma_put_context(ctxt, 0); goto out; } atomic_inc(&rdma_stat_read); if (read_wr.num_sge < chl_map->ch[ch_no].count) { chl_map->ch[ch_no].count -= read_wr.num_sge; chl_map->ch[ch_no].start += read_wr.num_sge; goto next_sge; } sgl_offset = 0; err = 1; } out: svc_rdma_put_req_map(rpl_map); svc_rdma_put_req_map(chl_map); /* Detach arg pages. svc_recv will replenish them */ for (ch_no = 0; &rqstp->rq_pages[ch_no] < rqstp->rq_respages; ch_no++) rqstp->rq_pages[ch_no] = NULL; /* * Detach res pages. svc_release must see a resused count of * zero or it will attempt to put them. */ while (rqstp->rq_resused) rqstp->rq_respages[--rqstp->rq_resused] = NULL; return err; }
static int rdma_read_xdr(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp, struct svc_rqst *rqstp, struct svc_rdma_op_ctxt *hdr_ctxt) { struct ib_send_wr read_wr; struct ib_send_wr inv_wr; int err = 0; int ch_no; int ch_count; int byte_count; int sge_count; u64 sgl_offset; struct rpcrdma_read_chunk *ch; struct svc_rdma_op_ctxt *ctxt = NULL; struct svc_rdma_req_map *rpl_map; struct svc_rdma_req_map *chl_map; /* */ ch = svc_rdma_get_read_chunk(rmsgp); if (!ch) return 0; svc_rdma_rcl_chunk_counts(ch, &ch_count, &byte_count); if (ch_count > RPCSVC_MAXPAGES) return -EINVAL; /* */ rpl_map = svc_rdma_get_req_map(); chl_map = svc_rdma_get_req_map(); if (!xprt->sc_frmr_pg_list_len) sge_count = map_read_chunks(xprt, rqstp, hdr_ctxt, rmsgp, rpl_map, chl_map, ch_count, byte_count); else sge_count = fast_reg_read_chunks(xprt, rqstp, hdr_ctxt, rmsgp, rpl_map, chl_map, ch_count, byte_count); if (sge_count < 0) { err = -EIO; goto out; } sgl_offset = 0; ch_no = 0; for (ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0]; ch->rc_discrim != 0; ch++, ch_no++) { u64 rs_offset; next_sge: ctxt = svc_rdma_get_context(xprt); ctxt->direction = DMA_FROM_DEVICE; ctxt->frmr = hdr_ctxt->frmr; ctxt->read_hdr = NULL; clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); clear_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags); /* */ memset(&read_wr, 0, sizeof read_wr); read_wr.wr_id = (unsigned long)ctxt; read_wr.opcode = IB_WR_RDMA_READ; ctxt->wr_op = read_wr.opcode; read_wr.send_flags = IB_SEND_SIGNALED; read_wr.wr.rdma.rkey = ntohl(ch->rc_target.rs_handle); xdr_decode_hyper((__be32 *)&ch->rc_target.rs_offset, &rs_offset); read_wr.wr.rdma.remote_addr = rs_offset + sgl_offset; read_wr.sg_list = ctxt->sge; read_wr.num_sge = rdma_read_max_sge(xprt, chl_map->ch[ch_no].count); err = rdma_set_ctxt_sge(xprt, ctxt, hdr_ctxt->frmr, &rpl_map->sge[chl_map->ch[ch_no].start], &sgl_offset, read_wr.num_sge); if (err) { svc_rdma_unmap_dma(ctxt); svc_rdma_put_context(ctxt, 0); goto out; } if (((ch+1)->rc_discrim == 0) && (read_wr.num_sge == chl_map->ch[ch_no].count)) { /* */ set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); if (hdr_ctxt->frmr) { set_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags); /* */ if (xprt->sc_dev_caps & SVCRDMA_DEVCAP_READ_W_INV) { read_wr.opcode = IB_WR_RDMA_READ_WITH_INV; ctxt->wr_op = read_wr.opcode; read_wr.ex.invalidate_rkey = ctxt->frmr->mr->lkey; } else { /* */ memset(&inv_wr, 0, sizeof inv_wr); inv_wr.opcode = IB_WR_LOCAL_INV; inv_wr.send_flags = IB_SEND_SIGNALED; inv_wr.ex.invalidate_rkey = hdr_ctxt->frmr->mr->lkey; read_wr.next = &inv_wr; } } ctxt->read_hdr = hdr_ctxt; } /* */ err = svc_rdma_send(xprt, &read_wr); if (err) { printk(KERN_ERR "svcrdma: Error %d posting RDMA_READ\n", err); set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); svc_rdma_unmap_dma(ctxt); svc_rdma_put_context(ctxt, 0); goto out; } atomic_inc(&rdma_stat_read); if (read_wr.num_sge < chl_map->ch[ch_no].count) { chl_map->ch[ch_no].count -= read_wr.num_sge; chl_map->ch[ch_no].start += read_wr.num_sge; goto next_sge; } sgl_offset = 0; err = 1; } out: svc_rdma_put_req_map(rpl_map); svc_rdma_put_req_map(chl_map); /* */ for (ch_no = 0; &rqstp->rq_pages[ch_no] < rqstp->rq_respages; ch_no++) rqstp->rq_pages[ch_no] = NULL; /* */ while (rqstp->rq_resused) rqstp->rq_respages[--rqstp->rq_resused] = NULL; return err; }