static void isert_handle_wc_error(struct ib_wc *wc) { struct isert_wr *wr = _u64_to_ptr(wc->wr_id); struct isert_cmnd *isert_pdu = wr->pdu; struct isert_connection *isert_conn = wr->conn; struct isert_buf *isert_buf = wr->buf; struct isert_device *isert_dev = wr->isert_dev; struct ib_device *ib_dev = isert_dev->ib_dev; TRACE_ENTRY(); if (wc->status != IB_WC_WR_FLUSH_ERR) pr_err("conn:%p wr_id:0x%p status:%s vendor_err:0x%0x\n", isert_conn, wr, wr_status_str(wc->status), wc->vendor_err); if (!test_bit(ISERT_CONNECTION_ABORTED, &isert_conn->flags)) if (!test_and_set_bit(ISERT_DISCON_CALLED, &isert_conn->flags)) isert_sched_discon(isert_conn); switch (wr->wr_op) { case ISER_WR_SEND: if (unlikely(wr->send_wr.num_sge == 0)) /* Drain WR */ isert_sched_conn_drained(isert_conn); else isert_pdu_err(&isert_pdu->iscsi); break; case ISER_WR_RDMA_READ: if (isert_buf->sg_cnt != 0) { ib_dma_unmap_sg(ib_dev, isert_buf->sg, isert_buf->sg_cnt, isert_buf->dma_dir); isert_buf->sg_cnt = 0; } isert_pdu_err(&isert_pdu->iscsi); break; case ISER_WR_RECV: /* this should be the Flush, no task has been created yet */ break; case ISER_WR_RDMA_WRITE: if (isert_buf->sg_cnt != 0) { ib_dma_unmap_sg(ib_dev, isert_buf->sg, isert_buf->sg_cnt, isert_buf->dma_dir); isert_buf->sg_cnt = 0; } /* RDMA-WR and SEND response of a READ task are sent together, so when receiving RDMA-WR error, wait until SEND error arrives to complete the task */ break; default: pr_err("unexpected opcode %d, wc:%p wr_id:%p conn:%p\n", wr->wr_op, wc, wr, isert_conn); break; } TRACE_EXIT(); }
/* Reset of a single FMR. */ static void fmr_op_recover_mr(struct rpcrdma_mw *mw) { struct rpcrdma_xprt *r_xprt = mw->mw_xprt; int rc; /* ORDER: invalidate first */ rc = __fmr_unmap(mw); /* ORDER: then DMA unmap */ ib_dma_unmap_sg(r_xprt->rx_ia.ri_device, mw->mw_sg, mw->mw_nents, mw->mw_dir); if (rc) goto out_release; rpcrdma_put_mw(r_xprt, mw); r_xprt->rx_stats.mrs_recovered++; return; out_release: pr_err("rpcrdma: FMR reset failed (%d), %p released\n", rc, mw); r_xprt->rx_stats.mrs_orphaned++; spin_lock(&r_xprt->rx_buf.rb_mwlock); list_del(&mw->mw_all); spin_unlock(&r_xprt->rx_buf.rb_mwlock); fmr_op_release_mr(mw); }
/* Reset of a single FRMR. Generate a fresh rkey by replacing the MR. * * There's no recovery if this fails. The FRMR is abandoned, but * remains in rb_all. It will be cleaned up when the transport is * destroyed. */ static void frwr_op_recover_mr(struct rpcrdma_mw *mw) { struct rpcrdma_xprt *r_xprt = mw->mw_xprt; struct rpcrdma_ia *ia = &r_xprt->rx_ia; int rc; rc = __frwr_reset_mr(ia, mw); ib_dma_unmap_sg(ia->ri_device, mw->mw_sg, mw->mw_nents, mw->mw_dir); if (rc) goto out_release; rpcrdma_put_mw(r_xprt, mw); r_xprt->rx_stats.mrs_recovered++; return; out_release: pr_err("rpcrdma: FRMR reset failed %d, %p release\n", rc, mw); r_xprt->rx_stats.mrs_orphaned++; spin_lock(&r_xprt->rx_buf.rb_mwlock); list_del(&mw->mw_all); spin_unlock(&r_xprt->rx_buf.rb_mwlock); frwr_op_release_mr(mw); }
/** * rdma_rw_ctx_destroy - release all resources allocated by rdma_rw_ctx_init * @ctx: context to release * @qp: queue pair to operate on * @port_num: port num to which the connection is bound * @sg: scatterlist that was used for the READ/WRITE * @sg_cnt: number of entries in @sg * @dir: %DMA_TO_DEVICE for RDMA WRITE, %DMA_FROM_DEVICE for RDMA READ */ void rdma_rw_ctx_destroy(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num, struct scatterlist *sg, u32 sg_cnt, enum dma_data_direction dir) { int i; switch (ctx->type) { case RDMA_RW_MR: for (i = 0; i < ctx->nr_ops; i++) ib_mr_pool_put(qp, &qp->rdma_mrs, ctx->reg[i].mr); kfree(ctx->reg); break; case RDMA_RW_MULTI_WR: kfree(ctx->map.wrs); kfree(ctx->map.sges); break; case RDMA_RW_SINGLE_WR: break; default: BUG(); break; } /* P2PDMA contexts do not need to be unmapped */ if (!is_pci_p2pdma_page(sg_page(sg))) ib_dma_unmap_sg(qp->pd->device, sg, sg_cnt, dir); }
/* Reset of a single FRWR. Generate a fresh rkey by replacing the MR. */ static void frwr_op_recover_mr(struct rpcrdma_mr *mr) { enum rpcrdma_frwr_state state = mr->frwr.fr_state; struct rpcrdma_xprt *r_xprt = mr->mr_xprt; struct rpcrdma_ia *ia = &r_xprt->rx_ia; int rc; rc = __frwr_mr_reset(ia, mr); if (state != FRWR_FLUSHED_LI) { trace_xprtrdma_dma_unmap(mr); ib_dma_unmap_sg(ia->ri_device, mr->mr_sg, mr->mr_nents, mr->mr_dir); } if (rc) goto out_release; rpcrdma_mr_put(mr); r_xprt->rx_stats.mrs_recovered++; return; out_release: pr_err("rpcrdma: FRWR reset failed %d, %p release\n", rc, mr); r_xprt->rx_stats.mrs_orphaned++; spin_lock(&r_xprt->rx_buf.rb_mrlock); list_del(&mr->mr_all); spin_unlock(&r_xprt->rx_buf.rb_mrlock); frwr_op_release_mr(mr); }
void iser_dma_unmap_task_data(struct iscsi_iser_cmd_task *iser_ctask) { struct ib_device *dev; struct iser_data_buf *data; dev = iser_ctask->iser_conn->ib_conn->device->ib_device; if (iser_ctask->dir[ISER_DIR_IN]) { data = &iser_ctask->data[ISER_DIR_IN]; ib_dma_unmap_sg(dev, data->buf, data->size, DMA_FROM_DEVICE); } if (iser_ctask->dir[ISER_DIR_OUT]) { data = &iser_ctask->data[ISER_DIR_OUT]; ib_dma_unmap_sg(dev, data->buf, data->size, DMA_TO_DEVICE); } }
void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task, struct iser_data_buf *data, enum dma_data_direction dir) { struct ib_device *dev; dev = iser_task->iser_conn->ib_conn.device->ib_device; ib_dma_unmap_sg(dev, data->sg, data->size, dir); }
static void rds_ib_send_unmap_data(struct rds_ib_connection *ic, struct rm_data_op *op, int wc_status) { if (op->op_nents) ib_dma_unmap_sg(ic->i_cm_id->device, op->op_sg, op->op_nents, DMA_TO_DEVICE); }
/** * rdma_rw_ctx_destroy_signature - release all resources allocated by * rdma_rw_ctx_init_signature * @ctx: context to release * @qp: queue pair to operate on * @port_num: port num to which the connection is bound * @sg: scatterlist that was used for the READ/WRITE * @sg_cnt: number of entries in @sg * @prot_sg: scatterlist that was used for the READ/WRITE of the PI * @prot_sg_cnt: number of entries in @prot_sg * @dir: %DMA_TO_DEVICE for RDMA WRITE, %DMA_FROM_DEVICE for RDMA READ */ void rdma_rw_ctx_destroy_signature(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num, struct scatterlist *sg, u32 sg_cnt, struct scatterlist *prot_sg, u32 prot_sg_cnt, enum dma_data_direction dir) { if (WARN_ON_ONCE(ctx->type != RDMA_RW_SIG_MR)) return; ib_mr_pool_put(qp, &qp->rdma_mrs, ctx->sig->data.mr); ib_dma_unmap_sg(qp->pd->device, sg, sg_cnt, dir); if (ctx->sig->prot.mr) { ib_mr_pool_put(qp, &qp->rdma_mrs, ctx->sig->prot.mr); ib_dma_unmap_sg(qp->pd->device, prot_sg, prot_sg_cnt, dir); } ib_mr_pool_put(qp, &qp->sig_mrs, ctx->sig->sig_mr); kfree(ctx->sig); }
static void rds_ib_send_unmap_rdma(struct rds_ib_connection *ic, struct rds_rdma_op *op) { if (op->r_mapped) { ib_dma_unmap_sg(ic->i_cm_id->device, op->r_sg, op->r_nents, op->r_write ? DMA_TO_DEVICE : DMA_FROM_DEVICE); op->r_mapped = 0; } }
static void isert_rdma_wr_completion_handler(struct isert_wr *wr) { struct isert_buf *isert_buf = wr->buf; struct isert_device *isert_dev = wr->isert_dev; struct ib_device *ib_dev = isert_dev->ib_dev; ib_dma_unmap_sg(ib_dev, isert_buf->sg, isert_buf->sg_cnt, isert_buf->dma_dir); isert_buf->sg_cnt = 0; isert_data_in_sent(&wr->pdu->iscsi); }
/** * rdma_rw_ctx_init - initialize a RDMA READ/WRITE context * @ctx: context to initialize * @qp: queue pair to operate on * @port_num: port num to which the connection is bound * @sg: scatterlist to READ/WRITE from/to * @sg_cnt: number of entries in @sg * @sg_offset: current byte offset into @sg * @remote_addr:remote address to read/write (relative to @rkey) * @rkey: remote key to operate on * @dir: %DMA_TO_DEVICE for RDMA WRITE, %DMA_FROM_DEVICE for RDMA READ * * Returns the number of WQEs that will be needed on the workqueue if * successful, or a negative error code. */ int rdma_rw_ctx_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num, struct scatterlist *sg, u32 sg_cnt, u32 sg_offset, u64 remote_addr, u32 rkey, enum dma_data_direction dir) { struct ib_device *dev = qp->pd->device; int ret; if (is_pci_p2pdma_page(sg_page(sg))) ret = pci_p2pdma_map_sg(dev->dma_device, sg, sg_cnt, dir); else ret = ib_dma_map_sg(dev, sg, sg_cnt, dir); if (!ret) return -ENOMEM; sg_cnt = ret; /* * Skip to the S/G entry that sg_offset falls into: */ for (;;) { u32 len = sg_dma_len(sg); if (sg_offset < len) break; sg = sg_next(sg); sg_offset -= len; sg_cnt--; } ret = -EIO; if (WARN_ON_ONCE(sg_cnt == 0)) goto out_unmap_sg; if (rdma_rw_io_needs_mr(qp->device, port_num, dir, sg_cnt)) { ret = rdma_rw_init_mr_wrs(ctx, qp, port_num, sg, sg_cnt, sg_offset, remote_addr, rkey, dir); } else if (sg_cnt > 1) { ret = rdma_rw_init_map_wrs(ctx, qp, sg, sg_cnt, sg_offset, remote_addr, rkey, dir); } else { ret = rdma_rw_init_single_wr(ctx, qp, sg, sg_offset, remote_addr, rkey, dir); } if (ret < 0) goto out_unmap_sg; return ret; out_unmap_sg: ib_dma_unmap_sg(dev, sg, sg_cnt, dir); return ret; }
static void rds_ib_send_unmap_rm(struct rds_ib_connection *ic, struct rds_ib_send_work *send, int wc_status) { struct rds_message *rm = send->s_rm; rdsdebug("ic %p send %p rm %p\n", ic, send, rm); ib_dma_unmap_sg(ic->i_cm_id->device, rm->m_sg, rm->m_nents, DMA_TO_DEVICE); if (rm->m_rdma_op != NULL) { rds_ib_send_unmap_rdma(ic, rm->m_rdma_op); /* If the user asked for a completion notification on this * message, we can implement three different semantics: * 1. Notify when we received the ACK on the RDS message * that was queued with the RDMA. This provides reliable * notification of RDMA status at the expense of a one-way * packet delay. * 2. Notify when the IB stack gives us the completion event for * the RDMA operation. * 3. Notify when the IB stack gives us the completion event for * the accompanying RDS messages. * Here, we implement approach #3. To implement approach #2, * call rds_rdma_send_complete from the cq_handler. To implement #1, * don't call rds_rdma_send_complete at all, and fall back to the notify * handling in the ACK processing code. * * Note: There's no need to explicitly sync any RDMA buffers using * ib_dma_sync_sg_for_cpu - the completion for the RDMA * operation itself unmapped the RDMA buffers, which takes care * of synching. */ rds_ib_send_rdma_complete(rm, wc_status); if (rm->m_rdma_op->r_write) rds_stats_add(s_send_rdma_bytes, rm->m_rdma_op->r_bytes); else rds_stats_add(s_recv_rdma_bytes, rm->m_rdma_op->r_bytes); } /* If anyone waited for this message to get flushed out, wake * them up now */ rds_message_unmapped(rm); rds_message_put(rm); send->s_rm = NULL; }
/** * iser_finalize_rdma_unaligned_sg */ void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_cmd_task *iser_ctask, enum iser_data_dir cmd_dir) { struct ib_device *dev; struct iser_data_buf *mem_copy; unsigned long cmd_data_len; dev = iser_ctask->iser_conn->ib_conn->device->ib_device; mem_copy = &iser_ctask->data_copy[cmd_dir]; ib_dma_unmap_sg(dev, &mem_copy->sg_single, 1, (cmd_dir == ISER_DIR_OUT) ? DMA_TO_DEVICE : DMA_FROM_DEVICE); if (cmd_dir == ISER_DIR_IN) { char *mem; struct scatterlist *sg; unsigned char *p, *to; unsigned int sg_size; int i; /* copy back read RDMA to unaligned sg */ mem = mem_copy->copy_buf; sg = (struct scatterlist *)iser_ctask->data[ISER_DIR_IN].buf; sg_size = iser_ctask->data[ISER_DIR_IN].size; for (p = mem, i = 0; i < sg_size; i++){ to = kmap_atomic(sg[i].page, KM_SOFTIRQ0); memcpy(to + sg[i].offset, p, sg[i].length); kunmap_atomic(to, KM_SOFTIRQ0); p += sg[i].length; } } cmd_data_len = iser_ctask->data[cmd_dir].data_len; if (cmd_data_len > ISER_KMALLOC_THRESHOLD) free_pages((unsigned long)mem_copy->copy_buf, ilog2(roundup_pow_of_two(cmd_data_len)) - PAGE_SHIFT); else kfree(mem_copy->copy_buf); mem_copy->copy_buf = NULL; }
static void rds_ib_send_unmap_data(struct rds_ib_connection *ic, struct rm_data_op *op, int wc_status) { struct rds_message *rm; rm = container_of(op, struct rds_message, data); if (op->op_nents) ib_dma_unmap_sg(ic->i_cm_id->device, op->op_sg, op->op_nents, DMA_TO_DEVICE); if (rm->data.op_async) rds_ib_send_complete(rm, wc_status, rds_asend_complete); else if (rm->rdma.op_active && rm->rdma.op_remote_complete) rds_ib_send_unmap_rdma(ic, &rm->rdma, wc_status); }
static void rds_ib_send_unmap_atomic(struct rds_ib_connection *ic, struct rm_atomic_op *op, int wc_status) { /* unmap atomic recvbuf */ if (op->op_mapped) { ib_dma_unmap_sg(ic->i_cm_id->device, op->op_sg, 1, DMA_FROM_DEVICE); op->op_mapped = 0; } rds_ib_send_complete(container_of(op, struct rds_message, atomic), wc_status, rds_atomic_send_complete); if (op->op_type == RDS_ATOMIC_TYPE_CSWP) rds_ib_stats_inc(s_ib_atomic_cswp); else rds_ib_stats_inc(s_ib_atomic_fadd); }
static void rds_ib_send_unmap_rdma(struct rds_ib_connection *ic, struct rm_rdma_op *op, int wc_status) { if (op->op_mapped) { ib_dma_unmap_sg(ic->i_cm_id->device, op->op_sg, op->op_nents, op->op_write ? DMA_TO_DEVICE : DMA_FROM_DEVICE); op->op_mapped = 0; } /* If the user asked for a completion notification on this * message, we can implement three different semantics: * 1. Notify when we received the ACK on the RDS message * that was queued with the RDMA. This provides reliable * notification of RDMA status at the expense of a one-way * packet delay. * 2. Notify when the IB stack gives us the completion event for * the RDMA operation. * 3. Notify when the IB stack gives us the completion event for * the accompanying RDS messages. * Here, we implement approach #3. To implement approach #2, * we would need to take an event for the rdma WR. To implement #1, * don't call rds_rdma_send_complete at all, and fall back to the notify * handling in the ACK processing code. * * Note: There's no need to explicitly sync any RDMA buffers using * ib_dma_sync_sg_for_cpu - the completion for the RDMA * operation itself unmapped the RDMA buffers, which takes care * of synching. */ rds_ib_send_complete(container_of(op, struct rds_message, rdma), wc_status, rds_rdma_send_complete); if (op->op_write) rds_stats_add(s_send_rdma_bytes, op->op_bytes); else rds_stats_add(s_recv_rdma_bytes, op->op_bytes); }
/* Issue an RDMA_READ using an FRMR to map the data sink */ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, struct svc_rdma_op_ctxt *head, int *page_no, u32 *page_offset, u32 rs_handle, u32 rs_length, u64 rs_offset, bool last) { struct ib_rdma_wr read_wr; struct ib_send_wr inv_wr; struct ib_reg_wr reg_wr; u8 key; int nents = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT; struct svc_rdma_op_ctxt *ctxt = svc_rdma_get_context(xprt); struct svc_rdma_fastreg_mr *frmr = svc_rdma_get_frmr(xprt); int ret, read, pno, dma_nents, n; u32 pg_off = *page_offset; u32 pg_no = *page_no; if (IS_ERR(frmr)) return -ENOMEM; ctxt->direction = DMA_FROM_DEVICE; ctxt->frmr = frmr; nents = min_t(unsigned int, nents, xprt->sc_frmr_pg_list_len); read = min_t(int, (nents << PAGE_SHIFT) - *page_offset, rs_length); frmr->direction = DMA_FROM_DEVICE; frmr->access_flags = (IB_ACCESS_LOCAL_WRITE|IB_ACCESS_REMOTE_WRITE); frmr->sg_nents = nents; for (pno = 0; pno < nents; pno++) { int len = min_t(int, rs_length, PAGE_SIZE - pg_off); head->arg.pages[pg_no] = rqstp->rq_arg.pages[pg_no]; head->arg.page_len += len; head->arg.len += len; if (!pg_off) head->count++; sg_set_page(&frmr->sg[pno], rqstp->rq_arg.pages[pg_no], len, pg_off); rqstp->rq_respages = &rqstp->rq_arg.pages[pg_no+1]; rqstp->rq_next_page = rqstp->rq_respages + 1; /* adjust offset and wrap to next page if needed */ pg_off += len; if (pg_off == PAGE_SIZE) { pg_off = 0; pg_no++; } rs_length -= len; } if (last && rs_length == 0) set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); else clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); dma_nents = ib_dma_map_sg(xprt->sc_cm_id->device, frmr->sg, frmr->sg_nents, frmr->direction); if (!dma_nents) { pr_err("svcrdma: failed to dma map sg %p\n", frmr->sg); return -ENOMEM; } atomic_inc(&xprt->sc_dma_used); n = ib_map_mr_sg(frmr->mr, frmr->sg, frmr->sg_nents, PAGE_SIZE); if (unlikely(n != frmr->sg_nents)) { pr_err("svcrdma: failed to map mr %p (%d/%d elements)\n", frmr->mr, n, frmr->sg_nents); return n < 0 ? n : -EINVAL; } /* Bump the key */ key = (u8)(frmr->mr->lkey & 0x000000FF); ib_update_fast_reg_key(frmr->mr, ++key); ctxt->sge[0].addr = frmr->mr->iova; ctxt->sge[0].lkey = frmr->mr->lkey; ctxt->sge[0].length = frmr->mr->length; ctxt->count = 1; ctxt->read_hdr = head; /* Prepare REG WR */ reg_wr.wr.opcode = IB_WR_REG_MR; reg_wr.wr.wr_id = 0; reg_wr.wr.send_flags = IB_SEND_SIGNALED; reg_wr.wr.num_sge = 0; reg_wr.mr = frmr->mr; reg_wr.key = frmr->mr->lkey; reg_wr.access = frmr->access_flags; reg_wr.wr.next = &read_wr.wr; /* Prepare RDMA_READ */ memset(&read_wr, 0, sizeof(read_wr)); read_wr.wr.send_flags = IB_SEND_SIGNALED; read_wr.rkey = rs_handle; read_wr.remote_addr = rs_offset; read_wr.wr.sg_list = ctxt->sge; read_wr.wr.num_sge = 1; if (xprt->sc_dev_caps & SVCRDMA_DEVCAP_READ_W_INV) { read_wr.wr.opcode = IB_WR_RDMA_READ_WITH_INV; read_wr.wr.wr_id = (unsigned long)ctxt; read_wr.wr.ex.invalidate_rkey = ctxt->frmr->mr->lkey; } else { read_wr.wr.opcode = IB_WR_RDMA_READ; read_wr.wr.next = &inv_wr; /* Prepare invalidate */ memset(&inv_wr, 0, sizeof(inv_wr)); inv_wr.wr_id = (unsigned long)ctxt; inv_wr.opcode = IB_WR_LOCAL_INV; inv_wr.send_flags = IB_SEND_SIGNALED | IB_SEND_FENCE; inv_wr.ex.invalidate_rkey = frmr->mr->lkey; } ctxt->wr_op = read_wr.wr.opcode; /* Post the chain */ ret = svc_rdma_send(xprt, ®_wr.wr); if (ret) { pr_err("svcrdma: Error %d posting RDMA_READ\n", ret); set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); goto err; } /* return current location in page array */ *page_no = pg_no; *page_offset = pg_off; ret = read; atomic_inc(&rdma_stat_read); return ret; err: ib_dma_unmap_sg(xprt->sc_cm_id->device, frmr->sg, frmr->sg_nents, frmr->direction); svc_rdma_put_context(ctxt, 0); svc_rdma_put_frmr(xprt, frmr); return ret; }
/** * rdma_rw_ctx_signature init - initialize a RW context with signature offload * @ctx: context to initialize * @qp: queue pair to operate on * @port_num: port num to which the connection is bound * @sg: scatterlist to READ/WRITE from/to * @sg_cnt: number of entries in @sg * @prot_sg: scatterlist to READ/WRITE protection information from/to * @prot_sg_cnt: number of entries in @prot_sg * @sig_attrs: signature offloading algorithms * @remote_addr:remote address to read/write (relative to @rkey) * @rkey: remote key to operate on * @dir: %DMA_TO_DEVICE for RDMA WRITE, %DMA_FROM_DEVICE for RDMA READ * * Returns the number of WQEs that will be needed on the workqueue if * successful, or a negative error code. */ int rdma_rw_ctx_signature_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num, struct scatterlist *sg, u32 sg_cnt, struct scatterlist *prot_sg, u32 prot_sg_cnt, struct ib_sig_attrs *sig_attrs, u64 remote_addr, u32 rkey, enum dma_data_direction dir) { struct ib_device *dev = qp->pd->device; u32 pages_per_mr = rdma_rw_fr_page_list_len(qp->pd->device); struct ib_rdma_wr *rdma_wr; struct ib_send_wr *prev_wr = NULL; int count = 0, ret; if (sg_cnt > pages_per_mr || prot_sg_cnt > pages_per_mr) { pr_err("SG count too large\n"); return -EINVAL; } ret = ib_dma_map_sg(dev, sg, sg_cnt, dir); if (!ret) return -ENOMEM; sg_cnt = ret; ret = ib_dma_map_sg(dev, prot_sg, prot_sg_cnt, dir); if (!ret) { ret = -ENOMEM; goto out_unmap_sg; } prot_sg_cnt = ret; ctx->type = RDMA_RW_SIG_MR; ctx->nr_ops = 1; ctx->sig = kcalloc(1, sizeof(*ctx->sig), GFP_KERNEL); if (!ctx->sig) { ret = -ENOMEM; goto out_unmap_prot_sg; } ret = rdma_rw_init_one_mr(qp, port_num, &ctx->sig->data, sg, sg_cnt, 0); if (ret < 0) goto out_free_ctx; count += ret; prev_wr = &ctx->sig->data.reg_wr.wr; if (prot_sg_cnt) { ret = rdma_rw_init_one_mr(qp, port_num, &ctx->sig->prot, prot_sg, prot_sg_cnt, 0); if (ret < 0) goto out_destroy_data_mr; count += ret; if (ctx->sig->prot.inv_wr.next) prev_wr->next = &ctx->sig->prot.inv_wr; else prev_wr->next = &ctx->sig->prot.reg_wr.wr; prev_wr = &ctx->sig->prot.reg_wr.wr; } else { ctx->sig->prot.mr = NULL; } ctx->sig->sig_mr = ib_mr_pool_get(qp, &qp->sig_mrs); if (!ctx->sig->sig_mr) { ret = -EAGAIN; goto out_destroy_prot_mr; } if (ctx->sig->sig_mr->need_inval) { memset(&ctx->sig->sig_inv_wr, 0, sizeof(ctx->sig->sig_inv_wr)); ctx->sig->sig_inv_wr.opcode = IB_WR_LOCAL_INV; ctx->sig->sig_inv_wr.ex.invalidate_rkey = ctx->sig->sig_mr->rkey; prev_wr->next = &ctx->sig->sig_inv_wr; prev_wr = &ctx->sig->sig_inv_wr; } ctx->sig->sig_wr.wr.opcode = IB_WR_REG_SIG_MR; ctx->sig->sig_wr.wr.wr_cqe = NULL; ctx->sig->sig_wr.wr.sg_list = &ctx->sig->data.sge; ctx->sig->sig_wr.wr.num_sge = 1; ctx->sig->sig_wr.access_flags = IB_ACCESS_LOCAL_WRITE; ctx->sig->sig_wr.sig_attrs = sig_attrs; ctx->sig->sig_wr.sig_mr = ctx->sig->sig_mr; if (prot_sg_cnt) ctx->sig->sig_wr.prot = &ctx->sig->prot.sge; prev_wr->next = &ctx->sig->sig_wr.wr; prev_wr = &ctx->sig->sig_wr.wr; count++; ctx->sig->sig_sge.addr = 0; ctx->sig->sig_sge.length = ctx->sig->data.sge.length; if (sig_attrs->wire.sig_type != IB_SIG_TYPE_NONE) ctx->sig->sig_sge.length += ctx->sig->prot.sge.length; rdma_wr = &ctx->sig->data.wr; rdma_wr->wr.sg_list = &ctx->sig->sig_sge; rdma_wr->wr.num_sge = 1; rdma_wr->remote_addr = remote_addr; rdma_wr->rkey = rkey; if (dir == DMA_TO_DEVICE) rdma_wr->wr.opcode = IB_WR_RDMA_WRITE; else rdma_wr->wr.opcode = IB_WR_RDMA_READ; prev_wr->next = &rdma_wr->wr; prev_wr = &rdma_wr->wr; count++; return count; out_destroy_prot_mr: if (prot_sg_cnt) ib_mr_pool_put(qp, &qp->rdma_mrs, ctx->sig->prot.mr); out_destroy_data_mr: ib_mr_pool_put(qp, &qp->rdma_mrs, ctx->sig->data.mr); out_free_ctx: kfree(ctx->sig); out_unmap_prot_sg: ib_dma_unmap_sg(dev, prot_sg, prot_sg_cnt, dir); out_unmap_sg: ib_dma_unmap_sg(dev, sg, sg_cnt, dir); return ret; }
static int rds_ib_map_frmr(struct rds_ib_device *rds_ibdev, struct rds_ib_mr_pool *pool, struct rds_ib_mr *ibmr, struct scatterlist *sg, unsigned int sg_len) { struct ib_device *dev = rds_ibdev->dev; struct rds_ib_frmr *frmr = &ibmr->u.frmr; int i; u32 len; int ret = 0; /* We want to teardown old ibmr values here and fill it up with * new sg values */ rds_ib_teardown_mr(ibmr); ibmr->sg = sg; ibmr->sg_len = sg_len; ibmr->sg_dma_len = 0; frmr->sg_byte_len = 0; WARN_ON(ibmr->sg_dma_len); ibmr->sg_dma_len = ib_dma_map_sg(dev, ibmr->sg, ibmr->sg_len, DMA_BIDIRECTIONAL); if (unlikely(!ibmr->sg_dma_len)) { pr_warn("RDS/IB: %s failed!\n", __func__); return -EBUSY; } frmr->sg_byte_len = 0; frmr->dma_npages = 0; len = 0; ret = -EINVAL; for (i = 0; i < ibmr->sg_dma_len; ++i) { unsigned int dma_len = ib_sg_dma_len(dev, &ibmr->sg[i]); u64 dma_addr = ib_sg_dma_address(dev, &ibmr->sg[i]); frmr->sg_byte_len += dma_len; if (dma_addr & ~PAGE_MASK) { if (i > 0) goto out_unmap; else ++frmr->dma_npages; } if ((dma_addr + dma_len) & ~PAGE_MASK) { if (i < ibmr->sg_dma_len - 1) goto out_unmap; else ++frmr->dma_npages; } len += dma_len; } frmr->dma_npages += len >> PAGE_SHIFT; if (frmr->dma_npages > ibmr->pool->fmr_attr.max_pages) { ret = -EMSGSIZE; goto out_unmap; } ret = rds_ib_post_reg_frmr(ibmr); if (ret) goto out_unmap; if (ibmr->pool->pool_type == RDS_IB_MR_8K_POOL) rds_ib_stats_inc(s_ib_rdma_mr_8k_used); else rds_ib_stats_inc(s_ib_rdma_mr_1m_used); return ret; out_unmap: ib_dma_unmap_sg(rds_ibdev->dev, ibmr->sg, ibmr->sg_len, DMA_BIDIRECTIONAL); ibmr->sg_dma_len = 0; return ret; }