/* * Now that we are going to post the WRs we can update the lkey and need_inval * state on the MRs. If we were doing this at init time, we would get double * or missing invalidations if a context was initialized but not actually * posted. */ static void rdma_rw_update_lkey(struct rdma_rw_reg_ctx *reg, bool need_inval) { reg->mr->need_inval = need_inval; ib_update_fast_reg_key(reg->mr, ib_inc_rkey(reg->mr->lkey)); reg->reg_wr.key = reg->mr->lkey; reg->sge.lkey = reg->mr->lkey; }
/** * rdma_rw_ctx_wrs - return chain of WRs for a RDMA READ or WRITE operation * @ctx: context to operate on * @qp: queue pair to operate on * @port_num: port num to which the connection is bound * @cqe: completion queue entry for the last WR * @chain_wr: WR to append to the posted chain * * Return the WR chain for the set of RDMA READ/WRITE operations described by * @ctx, as well as any memory registration operations needed. If @chain_wr * is non-NULL the WR it points to will be appended to the chain of WRs posted. * If @chain_wr is not set @cqe must be set so that the caller gets a * completion notification. */ struct ib_send_wr *rdma_rw_ctx_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num, struct ib_cqe *cqe, struct ib_send_wr *chain_wr) { struct ib_send_wr *first_wr, *last_wr; int i; switch (ctx->type) { case RDMA_RW_SIG_MR: rdma_rw_update_lkey(&ctx->sig->data, true); if (ctx->sig->prot.mr) rdma_rw_update_lkey(&ctx->sig->prot, true); ctx->sig->sig_mr->need_inval = true; ib_update_fast_reg_key(ctx->sig->sig_mr, ib_inc_rkey(ctx->sig->sig_mr->lkey)); ctx->sig->sig_sge.lkey = ctx->sig->sig_mr->lkey; if (ctx->sig->data.inv_wr.next) first_wr = &ctx->sig->data.inv_wr; else first_wr = &ctx->sig->data.reg_wr.wr; last_wr = &ctx->sig->data.wr.wr; break; case RDMA_RW_MR: for (i = 0; i < ctx->nr_ops; i++) { rdma_rw_update_lkey(&ctx->reg[i], ctx->reg[i].wr.wr.opcode != IB_WR_RDMA_READ_WITH_INV); } if (ctx->reg[0].inv_wr.next) first_wr = &ctx->reg[0].inv_wr; else first_wr = &ctx->reg[0].reg_wr.wr; last_wr = &ctx->reg[ctx->nr_ops - 1].wr.wr; break; case RDMA_RW_MULTI_WR: first_wr = &ctx->map.wrs[0].wr; last_wr = &ctx->map.wrs[ctx->nr_ops - 1].wr; break; case RDMA_RW_SINGLE_WR: first_wr = &ctx->single.wr.wr; last_wr = &ctx->single.wr.wr; break; default: BUG(); } if (chain_wr) { last_wr->next = chain_wr; } else { last_wr->wr_cqe = cqe; last_wr->send_flags |= IB_SEND_SIGNALED; } return first_wr; }
static int iser_reg_sig_mr(struct iscsi_iser_task *iser_task, struct iser_pi_context *pi_ctx, struct iser_mem_reg *data_reg, struct iser_mem_reg *prot_reg, struct iser_mem_reg *sig_reg) { struct iser_tx_desc *tx_desc = &iser_task->desc; struct ib_sig_attrs *sig_attrs = &tx_desc->sig_attrs; struct ib_cqe *cqe = &iser_task->iser_conn->ib_conn.reg_cqe; struct ib_sig_handover_wr *wr; struct ib_mr *mr = pi_ctx->sig_mr; int ret; memset(sig_attrs, 0, sizeof(*sig_attrs)); ret = iser_set_sig_attrs(iser_task->sc, sig_attrs); if (ret) goto err; iser_set_prot_checks(iser_task->sc, &sig_attrs->check_mask); if (pi_ctx->sig_mr_valid) iser_inv_rkey(iser_tx_next_wr(tx_desc), mr, cqe); ib_update_fast_reg_key(mr, ib_inc_rkey(mr->rkey)); wr = container_of(iser_tx_next_wr(tx_desc), struct ib_sig_handover_wr, wr); wr->wr.opcode = IB_WR_REG_SIG_MR; wr->wr.wr_cqe = cqe; wr->wr.sg_list = &data_reg->sge; wr->wr.num_sge = 1; wr->wr.send_flags = 0; wr->sig_attrs = sig_attrs; wr->sig_mr = mr; if (scsi_prot_sg_count(iser_task->sc)) wr->prot = &prot_reg->sge; else wr->prot = NULL; wr->access_flags = IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_READ | IB_ACCESS_REMOTE_WRITE; pi_ctx->sig_mr_valid = 1; sig_reg->sge.lkey = mr->lkey; sig_reg->rkey = mr->rkey; sig_reg->sge.addr = 0; sig_reg->sge.length = scsi_transfer_length(iser_task->sc); iser_dbg("lkey=0x%x rkey=0x%x addr=0x%llx length=%u\n", sig_reg->sge.lkey, sig_reg->rkey, sig_reg->sge.addr, sig_reg->sge.length); err: return ret; }
static int rds_ib_post_reg_frmr(struct rds_ib_mr *ibmr) { struct rds_ib_frmr *frmr = &ibmr->u.frmr; struct ib_send_wr *failed_wr; struct ib_reg_wr reg_wr; int ret, off = 0; while (atomic_dec_return(&ibmr->ic->i_fastreg_wrs) <= 0) { atomic_inc(&ibmr->ic->i_fastreg_wrs); cpu_relax(); } ret = ib_map_mr_sg_zbva(frmr->mr, ibmr->sg, ibmr->sg_len, &off, PAGE_SIZE); if (unlikely(ret != ibmr->sg_len)) return ret < 0 ? ret : -EINVAL; /* Perform a WR for the fast_reg_mr. Each individual page * in the sg list is added to the fast reg page list and placed * inside the fast_reg_mr WR. The key used is a rolling 8bit * counter, which should guarantee uniqueness. */ ib_update_fast_reg_key(frmr->mr, ibmr->remap_count++); frmr->fr_state = FRMR_IS_INUSE; memset(®_wr, 0, sizeof(reg_wr)); reg_wr.wr.wr_id = (unsigned long)(void *)ibmr; reg_wr.wr.opcode = IB_WR_REG_MR; reg_wr.wr.num_sge = 0; reg_wr.mr = frmr->mr; reg_wr.key = frmr->mr->rkey; reg_wr.access = IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_READ | IB_ACCESS_REMOTE_WRITE; reg_wr.wr.send_flags = IB_SEND_SIGNALED; failed_wr = ®_wr.wr; ret = ib_post_send(ibmr->ic->i_cm_id->qp, ®_wr.wr, &failed_wr); WARN_ON(failed_wr != ®_wr.wr); if (unlikely(ret)) { /* Failure here can be because of -ENOMEM as well */ frmr->fr_state = FRMR_IS_STALE; atomic_inc(&ibmr->ic->i_fastreg_wrs); if (printk_ratelimit()) pr_warn("RDS/IB: %s returned error(%d)\n", __func__, ret); } return ret; }
static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task, struct iser_data_buf *mem, struct iser_reg_resources *rsc, struct iser_mem_reg *reg) { struct iser_tx_desc *tx_desc = &iser_task->desc; struct ib_cqe *cqe = &iser_task->iser_conn->ib_conn.reg_cqe; struct ib_mr *mr = rsc->mr; struct ib_reg_wr *wr; int n; if (rsc->mr_valid) iser_inv_rkey(iser_tx_next_wr(tx_desc), mr, cqe); ib_update_fast_reg_key(mr, ib_inc_rkey(mr->rkey)); n = ib_map_mr_sg(mr, mem->sg, mem->size, NULL, SIZE_4K); if (unlikely(n != mem->size)) { iser_err("failed to map sg (%d/%d)\n", n, mem->size); return n < 0 ? n : -EINVAL; } wr = reg_wr(iser_tx_next_wr(tx_desc)); wr->wr.opcode = IB_WR_REG_MR; wr->wr.wr_cqe = cqe; wr->wr.send_flags = 0; wr->wr.num_sge = 0; wr->mr = mr; wr->key = mr->rkey; wr->access = IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ; rsc->mr_valid = 1; reg->sge.lkey = mr->lkey; reg->rkey = mr->rkey; reg->sge.addr = mr->iova; reg->sge.length = mr->length; iser_dbg("lkey=0x%x rkey=0x%x addr=0x%llx length=0x%x\n", reg->sge.lkey, reg->rkey, reg->sge.addr, reg->sge.length); return 0; }
/* Issue an RDMA_READ using an FRMR to map the data sink */ static int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, struct svc_rdma_op_ctxt *head, int *page_no, u32 *page_offset, u32 rs_handle, u32 rs_length, u64 rs_offset, int last) { struct ib_send_wr read_wr; struct ib_send_wr inv_wr; struct ib_send_wr fastreg_wr; u8 key; int pages_needed = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT; struct svc_rdma_op_ctxt *ctxt = svc_rdma_get_context(xprt); struct svc_rdma_fastreg_mr *frmr = svc_rdma_get_frmr(xprt); int ret, read, pno; u32 pg_off = *page_offset; u32 pg_no = *page_no; if (IS_ERR(frmr)) return -ENOMEM; ctxt->direction = DMA_FROM_DEVICE; ctxt->frmr = frmr; pages_needed = min_t(int, pages_needed, xprt->sc_frmr_pg_list_len); read = min_t(int, pages_needed << PAGE_SHIFT, rs_length); frmr->kva = page_address(rqstp->rq_arg.pages[pg_no]); frmr->direction = DMA_FROM_DEVICE; frmr->access_flags = (IB_ACCESS_LOCAL_WRITE|IB_ACCESS_REMOTE_WRITE); frmr->map_len = pages_needed << PAGE_SHIFT; frmr->page_list_len = pages_needed; for (pno = 0; pno < pages_needed; pno++) { int len = min_t(int, rs_length, PAGE_SIZE - pg_off); head->arg.pages[pg_no] = rqstp->rq_arg.pages[pg_no]; head->arg.page_len += len; head->arg.len += len; if (!pg_off) head->count++; rqstp->rq_respages = &rqstp->rq_arg.pages[pg_no+1]; #if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,8,0)) rqstp->rq_next_page = rqstp->rq_respages + 1; #endif frmr->page_list->page_list[pno] = ib_dma_map_page(xprt->sc_cm_id->device, head->arg.pages[pg_no], 0, PAGE_SIZE, DMA_FROM_DEVICE); ret = ib_dma_mapping_error(xprt->sc_cm_id->device, frmr->page_list->page_list[pno]); if (ret) goto err; atomic_inc(&xprt->sc_dma_used); /* adjust offset and wrap to next page if needed */ pg_off += len; if (pg_off == PAGE_SIZE) { pg_off = 0; pg_no++; } rs_length -= len; } if (last && rs_length == 0) set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); else clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); /* Bump the key */ key = (u8)(frmr->mr->lkey & 0x000000FF); ib_update_fast_reg_key(frmr->mr, ++key); ctxt->sge[0].addr = (unsigned long)frmr->kva + *page_offset; ctxt->sge[0].lkey = frmr->mr->lkey; ctxt->sge[0].length = read; ctxt->count = 1; ctxt->read_hdr = head; /* Prepare FASTREG WR */ memset(&fastreg_wr, 0, sizeof(fastreg_wr)); fastreg_wr.opcode = IB_WR_FAST_REG_MR; fastreg_wr.send_flags = IB_SEND_SIGNALED; fastreg_wr.wr.fast_reg.iova_start = (unsigned long)frmr->kva; fastreg_wr.wr.fast_reg.page_list = frmr->page_list; fastreg_wr.wr.fast_reg.page_list_len = frmr->page_list_len; fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT; fastreg_wr.wr.fast_reg.length = frmr->map_len; fastreg_wr.wr.fast_reg.access_flags = frmr->access_flags; fastreg_wr.wr.fast_reg.rkey = frmr->mr->lkey; fastreg_wr.next = &read_wr; /* Prepare RDMA_READ */ memset(&read_wr, 0, sizeof(read_wr)); read_wr.send_flags = IB_SEND_SIGNALED; read_wr.wr.rdma.rkey = rs_handle; read_wr.wr.rdma.remote_addr = rs_offset; read_wr.sg_list = ctxt->sge; read_wr.num_sge = 1; if (xprt->sc_dev_caps & SVCRDMA_DEVCAP_READ_W_INV) { read_wr.opcode = IB_WR_RDMA_READ_WITH_INV; read_wr.wr_id = (unsigned long)ctxt; read_wr.ex.invalidate_rkey = ctxt->frmr->mr->lkey; } else { read_wr.opcode = IB_WR_RDMA_READ; read_wr.next = &inv_wr; /* Prepare invalidate */ memset(&inv_wr, 0, sizeof(inv_wr)); inv_wr.wr_id = (unsigned long)ctxt; inv_wr.opcode = IB_WR_LOCAL_INV; inv_wr.send_flags = IB_SEND_SIGNALED | IB_SEND_FENCE; inv_wr.ex.invalidate_rkey = frmr->mr->lkey; } ctxt->wr_op = read_wr.opcode; /* Post the chain */ ret = svc_rdma_send(xprt, &fastreg_wr); if (ret) { pr_err("svcrdma: Error %d posting RDMA_READ\n", ret); set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); goto err; } /* return current location in page array */ *page_no = pg_no; *page_offset = pg_off; ret = read; atomic_inc(&rdma_stat_read); return ret; err: svc_rdma_unmap_dma(ctxt); svc_rdma_put_context(ctxt, 0); svc_rdma_put_frmr(xprt, frmr); return ret; }
/* Issue an RDMA_READ using an FRMR to map the data sink */ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, struct svc_rdma_op_ctxt *head, int *page_no, u32 *page_offset, u32 rs_handle, u32 rs_length, u64 rs_offset, bool last) { struct ib_rdma_wr read_wr; struct ib_send_wr inv_wr; struct ib_reg_wr reg_wr; u8 key; int nents = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT; struct svc_rdma_op_ctxt *ctxt = svc_rdma_get_context(xprt); struct svc_rdma_fastreg_mr *frmr = svc_rdma_get_frmr(xprt); int ret, read, pno, dma_nents, n; u32 pg_off = *page_offset; u32 pg_no = *page_no; if (IS_ERR(frmr)) return -ENOMEM; ctxt->direction = DMA_FROM_DEVICE; ctxt->frmr = frmr; nents = min_t(unsigned int, nents, xprt->sc_frmr_pg_list_len); read = min_t(int, (nents << PAGE_SHIFT) - *page_offset, rs_length); frmr->direction = DMA_FROM_DEVICE; frmr->access_flags = (IB_ACCESS_LOCAL_WRITE|IB_ACCESS_REMOTE_WRITE); frmr->sg_nents = nents; for (pno = 0; pno < nents; pno++) { int len = min_t(int, rs_length, PAGE_SIZE - pg_off); head->arg.pages[pg_no] = rqstp->rq_arg.pages[pg_no]; head->arg.page_len += len; head->arg.len += len; if (!pg_off) head->count++; sg_set_page(&frmr->sg[pno], rqstp->rq_arg.pages[pg_no], len, pg_off); rqstp->rq_respages = &rqstp->rq_arg.pages[pg_no+1]; rqstp->rq_next_page = rqstp->rq_respages + 1; /* adjust offset and wrap to next page if needed */ pg_off += len; if (pg_off == PAGE_SIZE) { pg_off = 0; pg_no++; } rs_length -= len; } if (last && rs_length == 0) set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); else clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); dma_nents = ib_dma_map_sg(xprt->sc_cm_id->device, frmr->sg, frmr->sg_nents, frmr->direction); if (!dma_nents) { pr_err("svcrdma: failed to dma map sg %p\n", frmr->sg); return -ENOMEM; } atomic_inc(&xprt->sc_dma_used); n = ib_map_mr_sg(frmr->mr, frmr->sg, frmr->sg_nents, PAGE_SIZE); if (unlikely(n != frmr->sg_nents)) { pr_err("svcrdma: failed to map mr %p (%d/%d elements)\n", frmr->mr, n, frmr->sg_nents); return n < 0 ? n : -EINVAL; } /* Bump the key */ key = (u8)(frmr->mr->lkey & 0x000000FF); ib_update_fast_reg_key(frmr->mr, ++key); ctxt->sge[0].addr = frmr->mr->iova; ctxt->sge[0].lkey = frmr->mr->lkey; ctxt->sge[0].length = frmr->mr->length; ctxt->count = 1; ctxt->read_hdr = head; /* Prepare REG WR */ reg_wr.wr.opcode = IB_WR_REG_MR; reg_wr.wr.wr_id = 0; reg_wr.wr.send_flags = IB_SEND_SIGNALED; reg_wr.wr.num_sge = 0; reg_wr.mr = frmr->mr; reg_wr.key = frmr->mr->lkey; reg_wr.access = frmr->access_flags; reg_wr.wr.next = &read_wr.wr; /* Prepare RDMA_READ */ memset(&read_wr, 0, sizeof(read_wr)); read_wr.wr.send_flags = IB_SEND_SIGNALED; read_wr.rkey = rs_handle; read_wr.remote_addr = rs_offset; read_wr.wr.sg_list = ctxt->sge; read_wr.wr.num_sge = 1; if (xprt->sc_dev_caps & SVCRDMA_DEVCAP_READ_W_INV) { read_wr.wr.opcode = IB_WR_RDMA_READ_WITH_INV; read_wr.wr.wr_id = (unsigned long)ctxt; read_wr.wr.ex.invalidate_rkey = ctxt->frmr->mr->lkey; } else { read_wr.wr.opcode = IB_WR_RDMA_READ; read_wr.wr.next = &inv_wr; /* Prepare invalidate */ memset(&inv_wr, 0, sizeof(inv_wr)); inv_wr.wr_id = (unsigned long)ctxt; inv_wr.opcode = IB_WR_LOCAL_INV; inv_wr.send_flags = IB_SEND_SIGNALED | IB_SEND_FENCE; inv_wr.ex.invalidate_rkey = frmr->mr->lkey; } ctxt->wr_op = read_wr.wr.opcode; /* Post the chain */ ret = svc_rdma_send(xprt, ®_wr.wr); if (ret) { pr_err("svcrdma: Error %d posting RDMA_READ\n", ret); set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); goto err; } /* return current location in page array */ *page_no = pg_no; *page_offset = pg_off; ret = read; atomic_inc(&rdma_stat_read); return ret; err: ib_dma_unmap_sg(xprt->sc_cm_id->device, frmr->sg, frmr->sg_nents, frmr->direction); svc_rdma_put_context(ctxt, 0); svc_rdma_put_frmr(xprt, frmr); return ret; }
/* Post a REG_MR Work Request to register a memory region * for remote access via RDMA READ or RDMA WRITE. */ static struct rpcrdma_mr_seg * frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, int nsegs, bool writing, struct rpcrdma_mr **out) { struct rpcrdma_ia *ia = &r_xprt->rx_ia; bool holes_ok = ia->ri_mrtype == IB_MR_TYPE_SG_GAPS; struct rpcrdma_frwr *frwr; struct rpcrdma_mr *mr; struct ib_mr *ibmr; struct ib_reg_wr *reg_wr; struct ib_send_wr *bad_wr; int rc, i, n; u8 key; mr = NULL; do { if (mr) rpcrdma_mr_defer_recovery(mr); mr = rpcrdma_mr_get(r_xprt); if (!mr) return ERR_PTR(-ENOBUFS); } while (mr->frwr.fr_state != FRWR_IS_INVALID); frwr = &mr->frwr; frwr->fr_state = FRWR_IS_VALID; if (nsegs > ia->ri_max_frwr_depth) nsegs = ia->ri_max_frwr_depth; for (i = 0; i < nsegs;) { if (seg->mr_page) sg_set_page(&mr->mr_sg[i], seg->mr_page, seg->mr_len, offset_in_page(seg->mr_offset)); else sg_set_buf(&mr->mr_sg[i], seg->mr_offset, seg->mr_len); ++seg; ++i; if (holes_ok) continue; if ((i < nsegs && offset_in_page(seg->mr_offset)) || offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) break; } mr->mr_dir = rpcrdma_data_dir(writing); mr->mr_nents = ib_dma_map_sg(ia->ri_device, mr->mr_sg, i, mr->mr_dir); if (!mr->mr_nents) goto out_dmamap_err; ibmr = frwr->fr_mr; n = ib_map_mr_sg(ibmr, mr->mr_sg, mr->mr_nents, NULL, PAGE_SIZE); if (unlikely(n != mr->mr_nents)) goto out_mapmr_err; key = (u8)(ibmr->rkey & 0x000000FF); ib_update_fast_reg_key(ibmr, ++key); reg_wr = &frwr->fr_regwr; reg_wr->wr.next = NULL; reg_wr->wr.opcode = IB_WR_REG_MR; frwr->fr_cqe.done = frwr_wc_fastreg; reg_wr->wr.wr_cqe = &frwr->fr_cqe; reg_wr->wr.num_sge = 0; reg_wr->wr.send_flags = 0; reg_wr->mr = ibmr; reg_wr->key = ibmr->rkey; reg_wr->access = writing ? IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : IB_ACCESS_REMOTE_READ; rc = ib_post_send(ia->ri_id->qp, ®_wr->wr, &bad_wr); if (rc) goto out_senderr; mr->mr_handle = ibmr->rkey; mr->mr_length = ibmr->length; mr->mr_offset = ibmr->iova; *out = mr; return seg; out_dmamap_err: pr_err("rpcrdma: failed to DMA map sg %p sg_nents %d\n", mr->mr_sg, i); frwr->fr_state = FRWR_IS_INVALID; rpcrdma_mr_put(mr); return ERR_PTR(-EIO); out_mapmr_err: pr_err("rpcrdma: failed to map mr %p (%d/%d)\n", frwr->fr_mr, n, mr->mr_nents); rpcrdma_mr_defer_recovery(mr); return ERR_PTR(-EIO); out_senderr: pr_err("rpcrdma: FRWR registration ib_post_send returned %i\n", rc); rpcrdma_mr_defer_recovery(mr); return ERR_PTR(-ENOTCONN); }