static struct sk_buff *ipoib_cm_alloc_rx_skb(struct net_device *dev, struct ipoib_cm_rx_buf *rx_ring, int id, int frags, u64 mapping[IPOIB_CM_RX_SG]) { struct ipoib_dev_priv *priv = netdev_priv(dev); struct sk_buff *skb; int i; skb = dev_alloc_skb(IPOIB_CM_HEAD_SIZE + 12); if (unlikely(!skb)) return NULL; /* * IPoIB adds a 4 byte header. So we need 12 more bytes to align the * IP header to a multiple of 16. */ skb_reserve(skb, 12); mapping[0] = ib_dma_map_single(priv->ca, skb->data, IPOIB_CM_HEAD_SIZE, DMA_FROM_DEVICE); if (unlikely(ib_dma_mapping_error(priv->ca, mapping[0]))) { dev_kfree_skb_any(skb); return NULL; } for (i = 0; i < frags; i++) { struct page *page = alloc_page(GFP_ATOMIC); if (!page) goto partial_error; skb_fill_page_desc(skb, i, page, 0, PAGE_SIZE); mapping[i + 1] = ib_dma_map_page(priv->ca, skb_shinfo(skb)->frags[i].page, 0, PAGE_SIZE, DMA_FROM_DEVICE); if (unlikely(ib_dma_mapping_error(priv->ca, mapping[i + 1]))) goto partial_error; } rx_ring[id].skb = skb; return skb; partial_error: ib_dma_unmap_single(priv->ca, mapping[0], IPOIB_CM_HEAD_SIZE, DMA_FROM_DEVICE); for (; i > 0; --i) ib_dma_unmap_single(priv->ca, mapping[i], PAGE_SIZE, DMA_FROM_DEVICE); dev_kfree_skb_any(skb); return NULL; }
int iser_alloc_rx_descriptors(struct iser_conn *iser_conn, struct iscsi_session *session) { int i, j; u64 dma_addr; struct iser_rx_desc *rx_desc; struct ib_sge *rx_sg; struct ib_conn *ib_conn = &iser_conn->ib_conn; struct iser_device *device = ib_conn->device; iser_conn->qp_max_recv_dtos = session->cmds_max; iser_conn->qp_max_recv_dtos_mask = session->cmds_max - 1; /* cmds_max is 2^N */ iser_conn->min_posted_rx = iser_conn->qp_max_recv_dtos >> 2; if (device->iser_alloc_rdma_reg_res(ib_conn, session->scsi_cmds_max)) goto create_rdma_reg_res_failed; if (iser_alloc_login_buf(iser_conn)) goto alloc_login_buf_fail; iser_conn->num_rx_descs = session->cmds_max; iser_conn->rx_descs = kmalloc(iser_conn->num_rx_descs * sizeof(struct iser_rx_desc), GFP_KERNEL); if (!iser_conn->rx_descs) goto rx_desc_alloc_fail; rx_desc = iser_conn->rx_descs; for (i = 0; i < iser_conn->qp_max_recv_dtos; i++, rx_desc++) { dma_addr = ib_dma_map_single(device->ib_device, (void *)rx_desc, ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE); if (ib_dma_mapping_error(device->ib_device, dma_addr)) goto rx_desc_dma_map_failed; rx_desc->dma_addr = dma_addr; rx_sg = &rx_desc->rx_sg; rx_sg->addr = rx_desc->dma_addr; rx_sg->length = ISER_RX_PAYLOAD_SIZE; rx_sg->lkey = device->mr->lkey; } iser_conn->rx_desc_head = 0; return 0; rx_desc_dma_map_failed: rx_desc = iser_conn->rx_descs; for (j = 0; j < i; j++, rx_desc++) ib_dma_unmap_single(device->ib_device, rx_desc->dma_addr, ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE); kfree(iser_conn->rx_descs); iser_conn->rx_descs = NULL; rx_desc_alloc_fail: iser_free_login_buf(iser_conn); alloc_login_buf_fail: device->iser_free_rdma_reg_res(ib_conn); create_rdma_reg_res_failed: iser_err("failed allocating rx descriptors / data buffers\n"); return -ENOMEM; }
static int rdma_set_ctxt_sge(struct svcxprt_rdma *xprt, struct svc_rdma_op_ctxt *ctxt, struct svc_rdma_fastreg_mr *frmr, struct kvec *vec, u64 *sgl_offset, int count) { int i; ctxt->count = count; ctxt->direction = DMA_FROM_DEVICE; for (i = 0; i < count; i++) { ctxt->sge[i].length = 0; /* in case map fails */ if (!frmr) { ctxt->sge[i].addr = ib_dma_map_single(xprt->sc_cm_id->device, vec[i].iov_base, vec[i].iov_len, DMA_FROM_DEVICE); if (ib_dma_mapping_error(xprt->sc_cm_id->device, ctxt->sge[i].addr)) return -EINVAL; ctxt->sge[i].lkey = xprt->sc_dma_lkey; atomic_inc(&xprt->sc_dma_used); } else { ctxt->sge[i].addr = (unsigned long)vec[i].iov_base; ctxt->sge[i].lkey = frmr->mr->lkey; } ctxt->sge[i].length = vec[i].iov_len; *sgl_offset = *sgl_offset + vec[i].iov_len; } return 0; }
struct sk_buff *vnic_alloc_rx_skb(struct vnic_rx_ring *ring, int buf_ind, gfp_t gfp_flag) { struct ib_device *ca = ring->port->dev->ca; struct sk_buff *skb; u64 mapping; int buf_size = VNIC_BUF_SIZE(ring->port); skb = alloc_skb(buf_size, gfp_flag); if (!skb) { vnic_dbg_data(ring->port->name, "alloc_skb for size %d failed\n", buf_size); goto err_alloc; } mapping = ib_dma_map_single(ca, skb->data, buf_size, DMA_FROM_DEVICE); if (unlikely(ib_dma_mapping_error(ca, mapping))) { vnic_dbg_data(ring->port->name, "ib_dma_map_single len %d failed\n", buf_size); goto err_map; } ring->rx_info[buf_ind].skb = skb; ring->rx_info[buf_ind].dma_addr[0] = mapping; return skb; err_map: dev_kfree_skb_any(skb); err_alloc: return NULL; }
static int post_recv(struct p9_client *client, struct p9_rdma_context *c) { struct p9_trans_rdma *rdma = client->trans; struct ib_recv_wr wr; struct ib_sge sge; c->busa = ib_dma_map_single(rdma->cm_id->device, c->rc.sdata, client->msize, DMA_FROM_DEVICE); if (ib_dma_mapping_error(rdma->cm_id->device, c->busa)) goto error; c->cqe.done = recv_done; sge.addr = c->busa; sge.length = client->msize; sge.lkey = rdma->pd->local_dma_lkey; wr.next = NULL; wr.wr_cqe = &c->cqe; wr.sg_list = &sge; wr.num_sge = 1; return ib_post_recv(rdma->qp, &wr, NULL); error: p9_debug(P9_DEBUG_ERROR, "EIO\n"); return -EIO; }
static int post_recv(struct p9_client *client, struct p9_rdma_context *c) { struct p9_trans_rdma *rdma = client->trans; struct ib_recv_wr wr, *bad_wr; struct ib_sge sge; c->busa = ib_dma_map_single(rdma->cm_id->device, c->rc->sdata, client->msize, DMA_FROM_DEVICE); if (ib_dma_mapping_error(rdma->cm_id->device, c->busa)) goto error; sge.addr = c->busa; sge.length = client->msize; sge.lkey = rdma->lkey; wr.next = NULL; c->wc_op = IB_WC_RECV; wr.wr_id = (unsigned long) c; wr.sg_list = &sge; wr.num_sge = 1; return ib_post_recv(rdma->qp, &wr, &bad_wr); error: p9_debug(P9_DEBUG_ERROR, "EIO\n"); return -EIO; }
static int iser_alloc_login_buf(struct iser_conn *iser_conn) { struct iser_device *device = iser_conn->ib_conn.device; int req_err, resp_err; BUG_ON(device == NULL); iser_conn->login_buf = kmalloc(ISCSI_DEF_MAX_RECV_SEG_LEN + ISER_RX_LOGIN_SIZE, GFP_KERNEL); if (!iser_conn->login_buf) goto out_err; iser_conn->login_req_buf = iser_conn->login_buf; iser_conn->login_resp_buf = iser_conn->login_buf + ISCSI_DEF_MAX_RECV_SEG_LEN; iser_conn->login_req_dma = ib_dma_map_single(device->ib_device, iser_conn->login_req_buf, ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE); iser_conn->login_resp_dma = ib_dma_map_single(device->ib_device, iser_conn->login_resp_buf, ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE); req_err = ib_dma_mapping_error(device->ib_device, iser_conn->login_req_dma); resp_err = ib_dma_mapping_error(device->ib_device, iser_conn->login_resp_dma); if (req_err || resp_err) { if (req_err) iser_conn->login_req_dma = 0; if (resp_err) iser_conn->login_resp_dma = 0; goto free_login_buf; } return 0; free_login_buf: iser_free_login_buf(iser_conn); out_err: iser_err("unable to alloc or map login buf\n"); return -ENOMEM; }
/* Send a backwards direction RPC call. * * Caller holds the connection's mutex and has already marshaled * the RPC/RDMA request. * * This is similar to svc_rdma_reply, but takes an rpc_rqst * instead, does not support chunks, and avoids blocking memory * allocation. * * XXX: There is still an opportunity to block in svc_rdma_send() * if there are no SQ entries to post the Send. This may occur if * the adapter has a small maximum SQ depth. */ static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma, struct rpc_rqst *rqst) { struct xdr_buf *sndbuf = &rqst->rq_snd_buf; struct svc_rdma_op_ctxt *ctxt; struct svc_rdma_req_map *vec; struct ib_send_wr send_wr; int ret; vec = svc_rdma_get_req_map(rdma); ret = svc_rdma_map_xdr(rdma, sndbuf, vec, false); if (ret) goto out_err; ret = svc_rdma_repost_recv(rdma, GFP_NOIO); if (ret) goto out_err; ctxt = svc_rdma_get_context(rdma); ctxt->pages[0] = virt_to_page(rqst->rq_buffer); ctxt->count = 1; ctxt->direction = DMA_TO_DEVICE; ctxt->sge[0].lkey = rdma->sc_pd->local_dma_lkey; ctxt->sge[0].length = sndbuf->len; ctxt->sge[0].addr = ib_dma_map_page(rdma->sc_cm_id->device, ctxt->pages[0], 0, sndbuf->len, DMA_TO_DEVICE); if (ib_dma_mapping_error(rdma->sc_cm_id->device, ctxt->sge[0].addr)) { ret = -EIO; goto out_unmap; } svc_rdma_count_mappings(rdma, ctxt); memset(&send_wr, 0, sizeof(send_wr)); ctxt->cqe.done = svc_rdma_wc_send; send_wr.wr_cqe = &ctxt->cqe; send_wr.sg_list = ctxt->sge; send_wr.num_sge = 1; send_wr.opcode = IB_WR_SEND; send_wr.send_flags = IB_SEND_SIGNALED; ret = svc_rdma_send(rdma, &send_wr); if (ret) { ret = -EIO; goto out_unmap; } out_err: svc_rdma_put_req_map(rdma, vec); dprintk("svcrdma: %s returns %d\n", __func__, ret); return ret; out_unmap: svc_rdma_unmap_dma(ctxt); svc_rdma_put_context(ctxt, 1); goto out_err; }
/* * allocate a single fragment on a single ring entry and map it * to HW address. */ static int vnic_alloc_frag(struct vnic_rx_ring *ring, struct vnic_frag_data *frags_data, int i) { struct vnic_frag_info *frag_info = &ring->frag_info[i]; struct vnic_rx_alloc *page_alloc = &ring->page_alloc[i]; struct skb_frag_struct *skb_frags = &frags_data->frags[i]; struct skb_frag_struct skbf = *skb_frags; struct page *page; struct ib_device *ib_device = ring->port->dev->ca; u64 dma; int decision; if (vnic_rx_linear) return 0; if (page_alloc->offset >= frag_info->last_offset) { decision = 0; /* Allocate new page */ page = alloc_pages(GFP_ATOMIC | __GFP_COMP, VNIC_ALLOC_ORDER); if (!page) { /*frags_data->dma_addr[i] = NULL; ring->rx_info[wr_id].info = VNIC_FRAG_ALLOC_FAIL; ring->need_refill = 1; */ return -ENOMEM; } skbf.page.p = page_alloc->page; skbf.page_offset = page_alloc->offset; } else { decision = 1; page = page_alloc->page; get_page(page); skbf.page.p = page; skbf.page_offset = page_alloc->offset; } skbf.size = frag_info->frag_size; dma = ib_dma_map_single(ib_device, page_address(skbf.page.p) + skbf.page_offset, frag_info->frag_size, PCI_DMA_FROMDEVICE); if (unlikely(ib_dma_mapping_error(ib_device, dma))) { vnic_dbg_data(ring->port->name, "ib_dma_map_single len %d failed\n", frag_info->frag_size); put_page(page); return -ENOMEM; } if (!decision) { page_alloc->page = page; page_alloc->offset = frag_info->frag_align; } else page_alloc->offset += frag_info->frag_stride; *skb_frags = skbf; frags_data->dma_addr[i] = dma; return 0; }
/* * Map for DMA and insert a single page into the on-demand paging page tables. * * @umem: the umem to insert the page to. * @page_index: index in the umem to add the page to. * @page: the page struct to map and add. * @access_mask: access permissions needed for this page. * @current_seq: sequence number for synchronization with invalidations. * the sequence number is taken from * umem->odp_data->notifiers_seq. * * The function returns -EFAULT if the DMA mapping operation fails. It returns * -EAGAIN if a concurrent invalidation prevents us from updating the page. * * The page is released via put_page even if the operation failed. For * on-demand pinning, the page is released whenever it isn't stored in the * umem. */ static int ib_umem_odp_map_dma_single_page( struct ib_umem *umem, int page_index, struct page *page, u64 access_mask, unsigned long current_seq, enum ib_odp_dma_map_flags flags) { struct ib_device *dev = umem->context->device; dma_addr_t dma_addr; int stored_page = 0; int ret = 0; mutex_lock(&umem->odp_data->umem_mutex); /* * Note: we avoid writing if seq is different from the initial seq, to * handle case of a racing notifier. This check also allows us to bail * early if we have a notifier running in parallel with us. */ if (ib_umem_mmu_notifier_retry(umem, current_seq)) { ret = -EAGAIN; goto out; } if (!(umem->odp_data->dma_list[page_index])) { dma_addr = ib_dma_map_page(dev, page, 0, PAGE_SIZE, DMA_BIDIRECTIONAL); if (ib_dma_mapping_error(dev, dma_addr)) { ret = -EFAULT; goto out; } umem->odp_data->dma_list[page_index] = dma_addr | access_mask; umem->odp_data->page_list[page_index] = page; if (flags & IB_ODP_DMA_MAP_FOR_PREFETCH) atomic_inc(&dev->odp_statistics.num_prefetch_pages); else atomic_inc(&dev->odp_statistics.num_page_fault_pages); stored_page = 1; } else if (umem->odp_data->page_list[page_index] == page) { umem->odp_data->dma_list[page_index] |= access_mask; } else { pr_err("error: got different pages in IB device and from get_user_pages. IB device page: %p, gup page: %p\n" , umem->odp_data->page_list[page_index], page); } out: mutex_unlock(&umem->odp_data->umem_mutex); /* On Demand Paging - avoid pinning the page */ if (umem->context->invalidate_range || !stored_page) put_page(page); return ret; }
static int svc_rdma_post_recv(struct svcxprt_rdma *xprt) { struct ib_recv_wr recv_wr, *bad_recv_wr; struct svc_rdma_op_ctxt *ctxt; struct page *page; dma_addr_t pa; int sge_no; int buflen; int ret; ctxt = svc_rdma_get_context(xprt); buflen = 0; ctxt->direction = DMA_FROM_DEVICE; ctxt->cqe.done = svc_rdma_wc_receive; for (sge_no = 0; buflen < xprt->sc_max_req_size; sge_no++) { if (sge_no >= xprt->sc_max_sge) { pr_err("svcrdma: Too many sges (%d)\n", sge_no); goto err_put_ctxt; } page = alloc_page(GFP_KERNEL); if (!page) goto err_put_ctxt; ctxt->pages[sge_no] = page; pa = ib_dma_map_page(xprt->sc_cm_id->device, page, 0, PAGE_SIZE, DMA_FROM_DEVICE); if (ib_dma_mapping_error(xprt->sc_cm_id->device, pa)) goto err_put_ctxt; svc_rdma_count_mappings(xprt, ctxt); ctxt->sge[sge_no].addr = pa; ctxt->sge[sge_no].length = PAGE_SIZE; ctxt->sge[sge_no].lkey = xprt->sc_pd->local_dma_lkey; ctxt->count = sge_no + 1; buflen += PAGE_SIZE; } recv_wr.next = NULL; recv_wr.sg_list = &ctxt->sge[0]; recv_wr.num_sge = ctxt->count; recv_wr.wr_cqe = &ctxt->cqe; svc_xprt_get(&xprt->sc_xprt); ret = ib_post_recv(xprt->sc_qp, &recv_wr, &bad_recv_wr); if (ret) { svc_rdma_unmap_dma(ctxt); svc_rdma_put_context(ctxt, 1); svc_xprt_put(&xprt->sc_xprt); } return ret; err_put_ctxt: svc_rdma_unmap_dma(ctxt); svc_rdma_put_context(ctxt, 1); return -ENOMEM; }
static int iser_alloc_login_buf(struct iser_conn *iser_conn) { struct iser_device *device = iser_conn->ib_conn.device; struct iser_login_desc *desc = &iser_conn->login_desc; desc->req = kmalloc(ISCSI_DEF_MAX_RECV_SEG_LEN, GFP_KERNEL); if (!desc->req) return -ENOMEM; desc->req_dma = ib_dma_map_single(device->ib_device, desc->req, ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE); if (ib_dma_mapping_error(device->ib_device, desc->req_dma)) goto free_req; desc->rsp = kmalloc(ISER_RX_LOGIN_SIZE, GFP_KERNEL); if (!desc->rsp) goto unmap_req; desc->rsp_dma = ib_dma_map_single(device->ib_device, desc->rsp, ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE); if (ib_dma_mapping_error(device->ib_device, desc->rsp_dma)) goto free_rsp; return 0; free_rsp: kfree(desc->rsp); unmap_req: ib_dma_unmap_single(device->ib_device, desc->req_dma, ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE); free_req: kfree(desc->req); return -ENOMEM; }
u64 rdma_map_address(void* addr, int length) { u64 dma_addr; LOG_KERN(LOG_INFO, ("Mapping addr\n")); dma_addr = ib_dma_map_single(rdma_ib_device.dev, addr, length, DMA_BIDIRECTIONAL); if (ib_dma_mapping_error(rdma_ib_device.dev, dma_addr) != 0) { LOG_KERN(LOG_INFO, ("Error mapping myaddr")); return 0; //error } return dma_addr; }
int svc_rdma_post_recv(struct svcxprt_rdma *xprt) { struct ib_recv_wr recv_wr, *bad_recv_wr; struct svc_rdma_op_ctxt *ctxt; struct page *page; dma_addr_t pa; int sge_no; int buflen; int ret; ctxt = svc_rdma_get_context(xprt); buflen = 0; ctxt->direction = DMA_FROM_DEVICE; for (sge_no = 0; buflen < xprt->sc_max_req_size; sge_no++) { BUG_ON(sge_no >= xprt->sc_max_sge); page = svc_rdma_get_page(); ctxt->pages[sge_no] = page; pa = ib_dma_map_page(xprt->sc_cm_id->device, page, 0, PAGE_SIZE, DMA_FROM_DEVICE); if (ib_dma_mapping_error(xprt->sc_cm_id->device, pa)) goto err_put_ctxt; atomic_inc(&xprt->sc_dma_used); ctxt->sge[sge_no].addr = pa; ctxt->sge[sge_no].length = PAGE_SIZE; ctxt->sge[sge_no].lkey = xprt->sc_dma_lkey; ctxt->count = sge_no + 1; buflen += PAGE_SIZE; } recv_wr.next = NULL; recv_wr.sg_list = &ctxt->sge[0]; recv_wr.num_sge = ctxt->count; recv_wr.wr_id = (u64)(unsigned long)ctxt; svc_xprt_get(&xprt->sc_xprt); ret = ib_post_recv(xprt->sc_qp, &recv_wr, &bad_recv_wr); if (ret) { svc_rdma_unmap_dma(ctxt); svc_rdma_put_context(ctxt, 1); svc_xprt_put(&xprt->sc_xprt); } return ret; err_put_ctxt: svc_rdma_unmap_dma(ctxt); svc_rdma_put_context(ctxt, 1); return -ENOMEM; }
/** * iser_reg_single - fills registered buffer descriptor with * registration information */ void iser_reg_single(struct iser_device *device, struct iser_regd_buf *regd_buf, enum dma_data_direction direction) { u64 dma_addr; dma_addr = ib_dma_map_single(device->ib_device, regd_buf->virt_addr, regd_buf->data_size, direction); BUG_ON(ib_dma_mapping_error(device->ib_device, dma_addr)); regd_buf->reg.lkey = device->mr->lkey; regd_buf->reg.len = regd_buf->data_size; regd_buf->reg.va = dma_addr; regd_buf->reg.is_fmr = 0; regd_buf->dma_addr = dma_addr; regd_buf->direction = direction; }
int iser_alloc_rx_descriptors(struct iser_conn *ib_conn) { int i, j; u64 dma_addr; struct iser_rx_desc *rx_desc; struct ib_sge *rx_sg; struct iser_device *device = ib_conn->device; ib_conn->rx_descs = kmalloc(ISER_QP_MAX_RECV_DTOS * sizeof(struct iser_rx_desc), GFP_KERNEL); if (!ib_conn->rx_descs) goto rx_desc_alloc_fail; rx_desc = ib_conn->rx_descs; for (i = 0; i < ISER_QP_MAX_RECV_DTOS; i++, rx_desc++) { dma_addr = ib_dma_map_single(device->ib_device, (void *)rx_desc, ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE); if (ib_dma_mapping_error(device->ib_device, dma_addr)) goto rx_desc_dma_map_failed; rx_desc->dma_addr = dma_addr; rx_sg = &rx_desc->rx_sg; rx_sg->addr = rx_desc->dma_addr; rx_sg->length = ISER_RX_PAYLOAD_SIZE; rx_sg->lkey = device->mr->lkey; } ib_conn->rx_desc_head = 0; return 0; rx_desc_dma_map_failed: rx_desc = ib_conn->rx_descs; for (j = 0; j < i; j++, rx_desc++) ib_dma_unmap_single(device->ib_device, rx_desc->dma_addr, ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE); kfree(ib_conn->rx_descs); ib_conn->rx_descs = NULL; rx_desc_alloc_fail: iser_err("failed allocating rx descriptors / data buffers\n"); return -ENOMEM; }
int iser_initialize_task_headers(struct iscsi_task *task, struct iser_tx_desc *tx_desc) { struct iser_conn *ib_conn = task->conn->dd_data; struct iser_device *device = ib_conn->device; struct iscsi_iser_task *iser_task = task->dd_data; u64 dma_addr; dma_addr = ib_dma_map_single(device->ib_device, (void *)tx_desc, ISER_HEADERS_LEN, DMA_TO_DEVICE); if (ib_dma_mapping_error(device->ib_device, dma_addr)) return -ENOMEM; tx_desc->dma_addr = dma_addr; tx_desc->tx_sg[0].addr = tx_desc->dma_addr; tx_desc->tx_sg[0].length = ISER_HEADERS_LEN; tx_desc->tx_sg[0].lkey = device->mr->lkey; iser_task->ib_conn = ib_conn; return 0; }
/** * iser_initialize_task_headers() - Initialize task headers * @task: iscsi task * @tx_desc: iser tx descriptor * * Notes: * This routine may race with iser teardown flow for scsi * error handling TMFs. So for TMF we should acquire the * state mutex to avoid dereferencing the IB device which * may have already been terminated. */ int iser_initialize_task_headers(struct iscsi_task *task, struct iser_tx_desc *tx_desc) { struct iser_conn *iser_conn = task->conn->dd_data; struct iser_device *device = iser_conn->ib_conn.device; struct iscsi_iser_task *iser_task = task->dd_data; u64 dma_addr; const bool mgmt_task = !task->sc && !in_interrupt(); int ret = 0; if (unlikely(mgmt_task)) mutex_lock(&iser_conn->state_mutex); if (unlikely(iser_conn->state != ISER_CONN_UP)) { ret = -ENODEV; goto out; } dma_addr = ib_dma_map_single(device->ib_device, (void *)tx_desc, ISER_HEADERS_LEN, DMA_TO_DEVICE); if (ib_dma_mapping_error(device->ib_device, dma_addr)) { ret = -ENOMEM; goto out; } tx_desc->wr_idx = 0; tx_desc->mapped = true; tx_desc->dma_addr = dma_addr; tx_desc->tx_sg[0].addr = tx_desc->dma_addr; tx_desc->tx_sg[0].length = ISER_HEADERS_LEN; tx_desc->tx_sg[0].lkey = device->pd->local_dma_lkey; iser_task->iser_conn = iser_conn; out: if (unlikely(mgmt_task)) mutex_unlock(&iser_conn->state_mutex); return ret; }
static struct svc_rdma_recv_ctxt * svc_rdma_recv_ctxt_alloc(struct svcxprt_rdma *rdma) { struct svc_rdma_recv_ctxt *ctxt; dma_addr_t addr; void *buffer; ctxt = kmalloc(sizeof(*ctxt), GFP_KERNEL); if (!ctxt) goto fail0; buffer = kmalloc(rdma->sc_max_req_size, GFP_KERNEL); if (!buffer) goto fail1; addr = ib_dma_map_single(rdma->sc_pd->device, buffer, rdma->sc_max_req_size, DMA_FROM_DEVICE); if (ib_dma_mapping_error(rdma->sc_pd->device, addr)) goto fail2; ctxt->rc_recv_wr.next = NULL; ctxt->rc_recv_wr.wr_cqe = &ctxt->rc_cqe; ctxt->rc_recv_wr.sg_list = &ctxt->rc_recv_sge; ctxt->rc_recv_wr.num_sge = 1; ctxt->rc_cqe.done = svc_rdma_wc_receive; ctxt->rc_recv_sge.addr = addr; ctxt->rc_recv_sge.length = rdma->sc_max_req_size; ctxt->rc_recv_sge.lkey = rdma->sc_pd->local_dma_lkey; ctxt->rc_recv_buf = buffer; ctxt->rc_temp = false; return ctxt; fail2: kfree(buffer); fail1: kfree(ctxt); fail0: return NULL; }
static int rdma_setup(rdma_ctx_t ctx) { // create receive buffer ctx->rdma_recv_buffer = kmalloc(RDMA_BUFFER_SIZE, GFP_KERNEL); CHECK_MSG_RET(ctx->rdma_recv_buffer != 0, "Error kmalloc", -1); // create memory region ctx->mr = ib_get_dma_mr(ctx->pd, IB_ACCESS_REMOTE_READ | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE); CHECK_MSG_RET(ctx->mr != 0, "Error creating MR", -1); ctx->rkey = ctx->mr->rkey; // get dma_addr ctx->dma_addr = ib_dma_map_single(rdma_ib_device.dev, ctx->rdma_recv_buffer, RDMA_BUFFER_SIZE, DMA_BIDIRECTIONAL); CHECK_MSG_RET(ib_dma_mapping_error(rdma_ib_device.dev, ctx->dma_addr) == 0, "Error ib_dma_map_single", -1); // modify QP until RTS modify_qp(ctx); return 0; }
static int rdma_request(struct p9_client *client, struct p9_req_t *req) { struct p9_trans_rdma *rdma = client->trans; struct ib_send_wr wr, *bad_wr; struct ib_sge sge; int err = 0; unsigned long flags; struct p9_rdma_context *c = NULL; struct p9_rdma_context *rpl_context = NULL; /* Allocate an fcall for the reply */ rpl_context = kmalloc(sizeof *rpl_context, GFP_KERNEL); if (!rpl_context) goto err_close; /* * If the request has a buffer, steal it, otherwise * allocate a new one. Typically, requests should already * have receive buffers allocated and just swap them around */ if (!req->rc) { req->rc = kmalloc(sizeof(struct p9_fcall)+client->msize, GFP_KERNEL); if (req->rc) { req->rc->sdata = (char *) req->rc + sizeof(struct p9_fcall); req->rc->capacity = client->msize; } } rpl_context->rc = req->rc; if (!rpl_context->rc) { kfree(rpl_context); goto err_close; } /* * Post a receive buffer for this request. We need to ensure * there is a reply buffer available for every outstanding * request. A flushed request can result in no reply for an * outstanding request, so we must keep a count to avoid * overflowing the RQ. */ if (atomic_inc_return(&rdma->rq_count) <= rdma->rq_depth) { err = post_recv(client, rpl_context); if (err) { kfree(rpl_context->rc); kfree(rpl_context); goto err_close; } } else atomic_dec(&rdma->rq_count); /* remove posted receive buffer from request structure */ req->rc = NULL; /* Post the request */ c = kmalloc(sizeof *c, GFP_KERNEL); if (!c) goto err_close; c->req = req; c->busa = ib_dma_map_single(rdma->cm_id->device, c->req->tc->sdata, c->req->tc->size, DMA_TO_DEVICE); if (ib_dma_mapping_error(rdma->cm_id->device, c->busa)) goto error; sge.addr = c->busa; sge.length = c->req->tc->size; sge.lkey = rdma->lkey; wr.next = NULL; c->wc_op = IB_WC_SEND; wr.wr_id = (unsigned long) c; wr.opcode = IB_WR_SEND; wr.send_flags = IB_SEND_SIGNALED; wr.sg_list = &sge; wr.num_sge = 1; if (down_interruptible(&rdma->sq_sem)) goto error; return ib_post_send(rdma->qp, &wr, &bad_wr); error: P9_DPRINTK(P9_DEBUG_ERROR, "EIO\n"); return -EIO; err_close: spin_lock_irqsave(&rdma->req_lock, flags); if (rdma->state < P9_RDMA_CLOSING) { rdma->state = P9_RDMA_CLOSING; spin_unlock_irqrestore(&rdma->req_lock, flags); rdma_disconnect(rdma->cm_id); } else spin_unlock_irqrestore(&rdma->req_lock, flags); return err; }
static int send_reply(struct svcxprt_rdma *rdma, struct svc_rqst *rqstp, struct page *page, struct rpcrdma_msg *rdma_resp, struct svc_rdma_op_ctxt *ctxt, struct svc_rdma_req_map *vec, int byte_count) { struct ib_send_wr send_wr; struct ib_send_wr inv_wr; int sge_no; int sge_bytes; int page_no; int ret; /* Post a recv buffer to handle another request. */ ret = svc_rdma_post_recv(rdma); if (ret) { printk(KERN_INFO "svcrdma: could not post a receive buffer, err=%d." "Closing transport %p.\n", ret, rdma); set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags); svc_rdma_put_frmr(rdma, vec->frmr); svc_rdma_put_context(ctxt, 0); return -ENOTCONN; } /* Prepare the context */ ctxt->pages[0] = page; ctxt->count = 1; ctxt->frmr = vec->frmr; if (vec->frmr) set_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags); else clear_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags); /* Prepare the SGE for the RPCRDMA Header */ ctxt->sge[0].lkey = rdma->sc_dma_lkey; ctxt->sge[0].length = svc_rdma_xdr_get_reply_hdr_len(rdma_resp); ctxt->sge[0].addr = ib_dma_map_single(rdma->sc_cm_id->device, page_address(page), ctxt->sge[0].length, DMA_TO_DEVICE); if (ib_dma_mapping_error(rdma->sc_cm_id->device, ctxt->sge[0].addr)) goto err; atomic_inc(&rdma->sc_dma_used); ctxt->direction = DMA_TO_DEVICE; /* Determine how many of our SGE are to be transmitted */ for (sge_no = 1; byte_count && sge_no < vec->count; sge_no++) { sge_bytes = min_t(size_t, vec->sge[sge_no].iov_len, byte_count); byte_count -= sge_bytes; if (!vec->frmr) { ctxt->sge[sge_no].addr = ib_dma_map_single(rdma->sc_cm_id->device, vec->sge[sge_no].iov_base, sge_bytes, DMA_TO_DEVICE); if (ib_dma_mapping_error(rdma->sc_cm_id->device, ctxt->sge[sge_no].addr)) goto err; atomic_inc(&rdma->sc_dma_used); ctxt->sge[sge_no].lkey = rdma->sc_dma_lkey; } else { ctxt->sge[sge_no].addr = (unsigned long) vec->sge[sge_no].iov_base; ctxt->sge[sge_no].lkey = vec->frmr->mr->lkey; } ctxt->sge[sge_no].length = sge_bytes; } BUG_ON(byte_count != 0); /* Save all respages in the ctxt and remove them from the * respages array. They are our pages until the I/O * completes. */ for (page_no = 0; page_no < rqstp->rq_resused; page_no++) { ctxt->pages[page_no+1] = rqstp->rq_respages[page_no]; ctxt->count++; rqstp->rq_respages[page_no] = NULL; /* * If there are more pages than SGE, terminate SGE * list so that svc_rdma_unmap_dma doesn't attempt to * unmap garbage. */ if (page_no+1 >= sge_no) ctxt->sge[page_no+1].length = 0; } BUG_ON(sge_no > rdma->sc_max_sge); memset(&send_wr, 0, sizeof send_wr); ctxt->wr_op = IB_WR_SEND; send_wr.wr_id = (unsigned long)ctxt; send_wr.sg_list = ctxt->sge; send_wr.num_sge = sge_no; send_wr.opcode = IB_WR_SEND; send_wr.send_flags = IB_SEND_SIGNALED; if (vec->frmr) { /* Prepare INVALIDATE WR */ memset(&inv_wr, 0, sizeof inv_wr); inv_wr.opcode = IB_WR_LOCAL_INV; inv_wr.send_flags = IB_SEND_SIGNALED; inv_wr.ex.invalidate_rkey = vec->frmr->mr->lkey; send_wr.next = &inv_wr; } ret = svc_rdma_send(rdma, &send_wr); if (ret) goto err; return 0; err: svc_rdma_unmap_dma(ctxt); svc_rdma_put_frmr(rdma, vec->frmr); svc_rdma_put_context(ctxt, 1); return -EIO; }
static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, u32 rmr, u64 to, u32 xdr_off, int write_len, struct svc_rdma_req_map *vec) { struct ib_send_wr write_wr; struct ib_sge *sge; int xdr_sge_no; int sge_no; int sge_bytes; int sge_off; int bc; struct svc_rdma_op_ctxt *ctxt; BUG_ON(vec->count > RPCSVC_MAXPAGES); dprintk("svcrdma: RDMA_WRITE rmr=%x, to=%llx, xdr_off=%d, " "write_len=%d, vec->sge=%p, vec->count=%lu\n", rmr, (unsigned long long)to, xdr_off, write_len, vec->sge, vec->count); ctxt = svc_rdma_get_context(xprt); ctxt->direction = DMA_TO_DEVICE; sge = ctxt->sge; /* Find the SGE associated with xdr_off */ for (bc = xdr_off, xdr_sge_no = 1; bc && xdr_sge_no < vec->count; xdr_sge_no++) { if (vec->sge[xdr_sge_no].iov_len > bc) break; bc -= vec->sge[xdr_sge_no].iov_len; } sge_off = bc; bc = write_len; sge_no = 0; /* Copy the remaining SGE */ while (bc != 0) { sge_bytes = min_t(size_t, bc, vec->sge[xdr_sge_no].iov_len-sge_off); sge[sge_no].length = sge_bytes; if (!vec->frmr) { sge[sge_no].addr = ib_dma_map_single(xprt->sc_cm_id->device, (void *) vec->sge[xdr_sge_no].iov_base + sge_off, sge_bytes, DMA_TO_DEVICE); if (ib_dma_mapping_error(xprt->sc_cm_id->device, sge[sge_no].addr)) goto err; atomic_inc(&xprt->sc_dma_used); sge[sge_no].lkey = xprt->sc_dma_lkey; } else { sge[sge_no].addr = (unsigned long) vec->sge[xdr_sge_no].iov_base + sge_off; sge[sge_no].lkey = vec->frmr->mr->lkey; } ctxt->count++; ctxt->frmr = vec->frmr; sge_off = 0; sge_no++; xdr_sge_no++; BUG_ON(xdr_sge_no > vec->count); bc -= sge_bytes; } /* Prepare WRITE WR */ memset(&write_wr, 0, sizeof write_wr); ctxt->wr_op = IB_WR_RDMA_WRITE; write_wr.wr_id = (unsigned long)ctxt; write_wr.sg_list = &sge[0]; write_wr.num_sge = sge_no; write_wr.opcode = IB_WR_RDMA_WRITE; write_wr.send_flags = IB_SEND_SIGNALED; write_wr.wr.rdma.rkey = rmr; write_wr.wr.rdma.remote_addr = to; /* Post It */ atomic_inc(&rdma_stat_write); if (svc_rdma_send(xprt, &write_wr)) goto err; return 0; err: svc_rdma_put_context(ctxt, 0); /* Fatal error, close transport */ return -EIO; }
static int fast_reg_xdr(struct svcxprt_rdma *xprt, struct xdr_buf *xdr, struct svc_rdma_req_map *vec) { int sge_no; u32 sge_bytes; u32 page_bytes; u32 page_off; int page_no = 0; u8 *frva; struct svc_rdma_fastreg_mr *frmr; frmr = svc_rdma_get_frmr(xprt); if (IS_ERR(frmr)) return -ENOMEM; vec->frmr = frmr; /* Skip the RPCRDMA header */ sge_no = 1; /* Map the head. */ frva = (void *)((unsigned long)(xdr->head[0].iov_base) & PAGE_MASK); vec->sge[sge_no].iov_base = xdr->head[0].iov_base; vec->sge[sge_no].iov_len = xdr->head[0].iov_len; vec->count = 2; sge_no++; /* Build the FRMR */ frmr->kva = frva; frmr->direction = DMA_TO_DEVICE; frmr->access_flags = 0; frmr->map_len = PAGE_SIZE; frmr->page_list_len = 1; frmr->page_list->page_list[page_no] = ib_dma_map_single(xprt->sc_cm_id->device, (void *)xdr->head[0].iov_base, PAGE_SIZE, DMA_TO_DEVICE); if (ib_dma_mapping_error(xprt->sc_cm_id->device, frmr->page_list->page_list[page_no])) goto fatal_err; atomic_inc(&xprt->sc_dma_used); page_off = xdr->page_base; page_bytes = xdr->page_len + page_off; if (!page_bytes) goto encode_tail; /* Map the pages */ vec->sge[sge_no].iov_base = frva + frmr->map_len + page_off; vec->sge[sge_no].iov_len = page_bytes; sge_no++; while (page_bytes) { struct page *page; page = xdr->pages[page_no++]; sge_bytes = min_t(u32, page_bytes, (PAGE_SIZE - page_off)); page_bytes -= sge_bytes; frmr->page_list->page_list[page_no] = ib_dma_map_single(xprt->sc_cm_id->device, page_address(page), PAGE_SIZE, DMA_TO_DEVICE); if (ib_dma_mapping_error(xprt->sc_cm_id->device, frmr->page_list->page_list[page_no])) goto fatal_err; atomic_inc(&xprt->sc_dma_used); page_off = 0; /* reset for next time through loop */ frmr->map_len += PAGE_SIZE; frmr->page_list_len++; } vec->count++; encode_tail: /* Map tail */ if (0 == xdr->tail[0].iov_len) goto done; vec->count++; vec->sge[sge_no].iov_len = xdr->tail[0].iov_len; if (((unsigned long)xdr->tail[0].iov_base & PAGE_MASK) == ((unsigned long)xdr->head[0].iov_base & PAGE_MASK)) { /* * If head and tail use the same page, we don't need * to map it again. */ vec->sge[sge_no].iov_base = xdr->tail[0].iov_base; } else { void *va; /* Map another page for the tail */ page_off = (unsigned long)xdr->tail[0].iov_base & ~PAGE_MASK; va = (void *)((unsigned long)xdr->tail[0].iov_base & PAGE_MASK); vec->sge[sge_no].iov_base = frva + frmr->map_len + page_off; frmr->page_list->page_list[page_no] = ib_dma_map_single(xprt->sc_cm_id->device, va, PAGE_SIZE, DMA_TO_DEVICE); if (ib_dma_mapping_error(xprt->sc_cm_id->device, frmr->page_list->page_list[page_no])) goto fatal_err; atomic_inc(&xprt->sc_dma_used); frmr->map_len += PAGE_SIZE; frmr->page_list_len++; } done: if (svc_rdma_fastreg(xprt, frmr)) goto fatal_err; return 0; fatal_err: printk("svcrdma: Error fast registering memory for xprt %p\n", xprt); vec->frmr = NULL; svc_rdma_put_frmr(xprt, frmr); return -EIO; }
/* Map a read-chunk-list to an XDR and fast register the page-list. * * Assumptions: * - chunk[0] position points to pages[0] at an offset of 0 * - pages[] will be made physically contiguous by creating a one-off memory * region using the fastreg verb. * - byte_count is # of bytes in read-chunk-list * - ch_count is # of chunks in read-chunk-list * * Output: * - sge array pointing into pages[] array. * - chunk_sge array specifying sge index and count for each * chunk in the read list */ static int fast_reg_read_chunks(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, struct svc_rdma_op_ctxt *head, struct rpcrdma_msg *rmsgp, struct svc_rdma_req_map *rpl_map, struct svc_rdma_req_map *chl_map, int ch_count, int byte_count) { int page_no; int ch_no; u32 offset; struct rpcrdma_read_chunk *ch; struct svc_rdma_fastreg_mr *frmr; int ret = 0; frmr = svc_rdma_get_frmr(xprt); if (IS_ERR(frmr)) return -ENOMEM; head->frmr = frmr; head->arg.head[0] = rqstp->rq_arg.head[0]; head->arg.tail[0] = rqstp->rq_arg.tail[0]; head->arg.pages = &head->pages[head->count]; head->hdr_count = head->count; /* save count of hdr pages */ head->arg.page_base = 0; head->arg.page_len = byte_count; head->arg.len = rqstp->rq_arg.len + byte_count; head->arg.buflen = rqstp->rq_arg.buflen + byte_count; /* Fast register the page list */ frmr->kva = page_address(rqstp->rq_arg.pages[0]); frmr->direction = DMA_FROM_DEVICE; frmr->access_flags = (IB_ACCESS_LOCAL_WRITE|IB_ACCESS_REMOTE_WRITE); frmr->map_len = byte_count; frmr->page_list_len = PAGE_ALIGN(byte_count) >> PAGE_SHIFT; for (page_no = 0; page_no < frmr->page_list_len; page_no++) { frmr->page_list->page_list[page_no] = ib_dma_map_page(xprt->sc_cm_id->device, rqstp->rq_arg.pages[page_no], 0, PAGE_SIZE, DMA_FROM_DEVICE); if (ib_dma_mapping_error(xprt->sc_cm_id->device, frmr->page_list->page_list[page_no])) goto fatal_err; atomic_inc(&xprt->sc_dma_used); head->arg.pages[page_no] = rqstp->rq_arg.pages[page_no]; } head->count += page_no; /* rq_respages points one past arg pages */ rqstp->rq_respages = &rqstp->rq_arg.pages[page_no]; /* Create the reply and chunk maps */ offset = 0; ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0]; for (ch_no = 0; ch_no < ch_count; ch_no++) { rpl_map->sge[ch_no].iov_base = frmr->kva + offset; rpl_map->sge[ch_no].iov_len = ch->rc_target.rs_length; chl_map->ch[ch_no].count = 1; chl_map->ch[ch_no].start = ch_no; offset += ch->rc_target.rs_length; ch++; } ret = svc_rdma_fastreg(xprt, frmr); if (ret) goto fatal_err; return ch_no; fatal_err: printk("svcrdma: error fast registering xdr for xprt %p", xprt); svc_rdma_put_frmr(xprt, frmr); return -EIO; }
static int rdma_request(struct p9_client *client, struct p9_req_t *req) { struct p9_trans_rdma *rdma = client->trans; struct ib_send_wr wr, *bad_wr; struct ib_sge sge; int err = 0; unsigned long flags; struct p9_rdma_context *c = NULL; struct p9_rdma_context *rpl_context = NULL; /* When an error occurs between posting the recv and the send, * there will be a receive context posted without a pending request. * Since there is no way to "un-post" it, we remember it and skip * post_recv() for the next request. * So here, * see if we are this `next request' and need to absorb an excess rc. * If yes, then drop and free our own, and do not recv_post(). **/ if (unlikely(atomic_read(&rdma->excess_rc) > 0)) { if ((atomic_sub_return(1, &rdma->excess_rc) >= 0)) { /* Got one ! */ kfree(req->rc); req->rc = NULL; goto dont_need_post_recv; } else { /* We raced and lost. */ atomic_inc(&rdma->excess_rc); } } /* Allocate an fcall for the reply */ rpl_context = kmalloc(sizeof *rpl_context, GFP_NOFS); if (!rpl_context) { err = -ENOMEM; goto recv_error; } rpl_context->rc = req->rc; /* * Post a receive buffer for this request. We need to ensure * there is a reply buffer available for every outstanding * request. A flushed request can result in no reply for an * outstanding request, so we must keep a count to avoid * overflowing the RQ. */ if (down_interruptible(&rdma->rq_sem)) { err = -EINTR; goto recv_error; } err = post_recv(client, rpl_context); if (err) { p9_debug(P9_DEBUG_FCALL, "POST RECV failed\n"); goto recv_error; } /* remove posted receive buffer from request structure */ req->rc = NULL; dont_need_post_recv: /* Post the request */ c = kmalloc(sizeof *c, GFP_NOFS); if (!c) { err = -ENOMEM; goto send_error; } c->req = req; c->busa = ib_dma_map_single(rdma->cm_id->device, c->req->tc->sdata, c->req->tc->size, DMA_TO_DEVICE); if (ib_dma_mapping_error(rdma->cm_id->device, c->busa)) { err = -EIO; goto send_error; } sge.addr = c->busa; sge.length = c->req->tc->size; sge.lkey = rdma->lkey; wr.next = NULL; c->wc_op = IB_WC_SEND; wr.wr_id = (unsigned long) c; wr.opcode = IB_WR_SEND; wr.send_flags = IB_SEND_SIGNALED; wr.sg_list = &sge; wr.num_sge = 1; if (down_interruptible(&rdma->sq_sem)) { err = -EINTR; goto send_error; } /* Mark request as `sent' *before* we actually send it, * because doing if after could erase the REQ_STATUS_RCVD * status in case of a very fast reply. */ req->status = REQ_STATUS_SENT; err = ib_post_send(rdma->qp, &wr, &bad_wr); if (err) goto send_error; /* Success */ return 0; /* Handle errors that happened during or while preparing the send: */ send_error: req->status = REQ_STATUS_ERROR; kfree(c); p9_debug(P9_DEBUG_ERROR, "Error %d in rdma_request()\n", err); /* Ach. * We did recv_post(), but not send. We have one recv_post in excess. */ atomic_inc(&rdma->excess_rc); return err; /* Handle errors that happened during or while preparing post_recv(): */ recv_error: kfree(rpl_context); spin_lock_irqsave(&rdma->req_lock, flags); if (rdma->state < P9_RDMA_CLOSING) { rdma->state = P9_RDMA_CLOSING; spin_unlock_irqrestore(&rdma->req_lock, flags); rdma_disconnect(rdma->cm_id); } else spin_unlock_irqrestore(&rdma->req_lock, flags); return err; }
static int iser_create_ib_conn_res(struct iser_conn *ib_conn) { struct iser_device *device; struct ib_qp_init_attr init_attr; int req_err, resp_err, ret = -ENOMEM; struct ib_fmr_pool_param params; BUG_ON(ib_conn->device == NULL); device = ib_conn->device; ib_conn->login_buf = kmalloc(ISCSI_DEF_MAX_RECV_SEG_LEN + ISER_RX_LOGIN_SIZE, GFP_KERNEL); if (!ib_conn->login_buf) goto out_err; ib_conn->login_req_buf = ib_conn->login_buf; ib_conn->login_resp_buf = ib_conn->login_buf + ISCSI_DEF_MAX_RECV_SEG_LEN; ib_conn->login_req_dma = ib_dma_map_single(ib_conn->device->ib_device, (void *)ib_conn->login_req_buf, ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE); ib_conn->login_resp_dma = ib_dma_map_single(ib_conn->device->ib_device, (void *)ib_conn->login_resp_buf, ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE); req_err = ib_dma_mapping_error(device->ib_device, ib_conn->login_req_dma); resp_err = ib_dma_mapping_error(device->ib_device, ib_conn->login_resp_dma); if (req_err || resp_err) { if (req_err) ib_conn->login_req_dma = 0; if (resp_err) ib_conn->login_resp_dma = 0; goto out_err; } ib_conn->page_vec = kmalloc(sizeof(struct iser_page_vec) + (sizeof(u64) * (ISCSI_ISER_SG_TABLESIZE +1)), GFP_KERNEL); if (!ib_conn->page_vec) goto out_err; ib_conn->page_vec->pages = (u64 *) (ib_conn->page_vec + 1); params.page_shift = SHIFT_4K; params.max_pages_per_fmr = ISCSI_ISER_SG_TABLESIZE + 1; params.pool_size = ISCSI_DEF_XMIT_CMDS_MAX * 2; params.dirty_watermark = ISCSI_DEF_XMIT_CMDS_MAX; params.cache = 0; params.flush_function = NULL; params.access = (IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ); ib_conn->fmr_pool = ib_create_fmr_pool(device->pd, ¶ms); if (IS_ERR(ib_conn->fmr_pool)) { ret = PTR_ERR(ib_conn->fmr_pool); ib_conn->fmr_pool = NULL; goto out_err; } memset(&init_attr, 0, sizeof init_attr); init_attr.event_handler = iser_qp_event_callback; init_attr.qp_context = (void *)ib_conn; init_attr.send_cq = device->tx_cq; init_attr.recv_cq = device->rx_cq; init_attr.cap.max_send_wr = ISER_QP_MAX_REQ_DTOS; init_attr.cap.max_recv_wr = ISER_QP_MAX_RECV_DTOS; init_attr.cap.max_send_sge = 2; init_attr.cap.max_recv_sge = 1; init_attr.sq_sig_type = IB_SIGNAL_REQ_WR; init_attr.qp_type = IB_QPT_RC; ret = rdma_create_qp(ib_conn->cma_id, device->pd, &init_attr); if (ret) goto out_err; ib_conn->qp = ib_conn->cma_id->qp; iser_err("setting conn %p cma_id %p: fmr_pool %p qp %p\n", ib_conn, ib_conn->cma_id, ib_conn->fmr_pool, ib_conn->cma_id->qp); return ret; out_err: iser_err("unable to alloc mem or create resource, err %d\n", ret); return ret; }
/* Issue an RDMA_READ using the local lkey to map the data sink */ static int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, struct svc_rdma_op_ctxt *head, int *page_no, u32 *page_offset, u32 rs_handle, u32 rs_length, u64 rs_offset, int last) { struct ib_send_wr read_wr; int pages_needed = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT; struct svc_rdma_op_ctxt *ctxt = svc_rdma_get_context(xprt); int ret, read, pno; u32 pg_off = *page_offset; u32 pg_no = *page_no; ctxt->direction = DMA_FROM_DEVICE; ctxt->read_hdr = head; pages_needed = min_t(int, pages_needed, rdma_read_max_sge(xprt, pages_needed)); read = min_t(int, pages_needed << PAGE_SHIFT, rs_length); for (pno = 0; pno < pages_needed; pno++) { int len = min_t(int, rs_length, PAGE_SIZE - pg_off); head->arg.pages[pg_no] = rqstp->rq_arg.pages[pg_no]; head->arg.page_len += len; head->arg.len += len; if (!pg_off) head->count++; rqstp->rq_respages = &rqstp->rq_arg.pages[pg_no+1]; #if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,8,0)) rqstp->rq_next_page = rqstp->rq_respages + 1; #endif ctxt->sge[pno].addr = ib_dma_map_page(xprt->sc_cm_id->device, head->arg.pages[pg_no], pg_off, PAGE_SIZE - pg_off, DMA_FROM_DEVICE); ret = ib_dma_mapping_error(xprt->sc_cm_id->device, ctxt->sge[pno].addr); if (ret) goto err; atomic_inc(&xprt->sc_dma_used); /* The lkey here is either a local dma lkey or a dma_mr lkey */ ctxt->sge[pno].lkey = xprt->sc_dma_lkey; ctxt->sge[pno].length = len; ctxt->count++; /* adjust offset and wrap to next page if needed */ pg_off += len; if (pg_off == PAGE_SIZE) { pg_off = 0; pg_no++; } rs_length -= len; } if (last && rs_length == 0) set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); else clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); memset(&read_wr, 0, sizeof(read_wr)); read_wr.wr_id = (unsigned long)ctxt; read_wr.opcode = IB_WR_RDMA_READ; ctxt->wr_op = read_wr.opcode; read_wr.send_flags = IB_SEND_SIGNALED; read_wr.wr.rdma.rkey = rs_handle; read_wr.wr.rdma.remote_addr = rs_offset; read_wr.sg_list = ctxt->sge; read_wr.num_sge = pages_needed; ret = svc_rdma_send(xprt, &read_wr); if (ret) { pr_err("svcrdma: Error %d posting RDMA_READ\n", ret); set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); goto err; } /* return current location in page array */ *page_no = pg_no; *page_offset = pg_off; ret = read; atomic_inc(&rdma_stat_read); return ret; err: svc_rdma_unmap_dma(ctxt); svc_rdma_put_context(ctxt, 0); return ret; }
void sdp_post_send(struct sdp_sock *ssk, struct mbuf *mb) { struct sdp_buf *tx_req; struct sdp_bsdh *h; unsigned long mseq; struct ib_device *dev; struct ib_send_wr *bad_wr; struct ib_sge ibsge[SDP_MAX_SEND_SGES]; struct ib_sge *sge; struct ib_send_wr tx_wr = { NULL }; int i, rc; u64 addr; SDPSTATS_COUNTER_MID_INC(post_send, h->mid); SDPSTATS_HIST(send_size, mb->len); if (!ssk->qp_active) { m_freem(mb); return; } mseq = ring_head(ssk->tx_ring); h = mtod(mb, struct sdp_bsdh *); ssk->tx_packets++; ssk->tx_bytes += mb->m_pkthdr.len; #ifdef SDP_ZCOPY if (unlikely(h->mid == SDP_MID_SRCAVAIL)) { struct tx_srcavail_state *tx_sa = TX_SRCAVAIL_STATE(mb); if (ssk->tx_sa != tx_sa) { sdp_dbg_data(ssk->socket, "SrcAvail cancelled " "before being sent!\n"); WARN_ON(1); m_freem(mb); return; } TX_SRCAVAIL_STATE(mb)->mseq = mseq; } #endif if (unlikely(mb->m_flags & M_URG)) h->flags = SDP_OOB_PRES | SDP_OOB_PEND; else h->flags = 0; mb->m_flags |= M_RDONLY; /* Don't allow compression once sent. */ h->bufs = htons(rx_ring_posted(ssk)); h->len = htonl(mb->m_pkthdr.len); h->mseq = htonl(mseq); h->mseq_ack = htonl(mseq_ack(ssk)); sdp_prf1(ssk->socket, mb, "TX: %s bufs: %d mseq:%ld ack:%d", mid2str(h->mid), rx_ring_posted(ssk), mseq, ntohl(h->mseq_ack)); SDP_DUMP_PACKET(ssk->socket, "TX", mb, h); tx_req = &ssk->tx_ring.buffer[mseq & (SDP_TX_SIZE - 1)]; tx_req->mb = mb; dev = ssk->ib_device; sge = &ibsge[0]; for (i = 0; mb != NULL; i++, mb = mb->m_next, sge++) { addr = ib_dma_map_single(dev, mb->m_data, mb->m_len, DMA_TO_DEVICE); /* TODO: proper error handling */ BUG_ON(ib_dma_mapping_error(dev, addr)); BUG_ON(i >= SDP_MAX_SEND_SGES); tx_req->mapping[i] = addr; sge->addr = addr; sge->length = mb->m_len; sge->lkey = ssk->sdp_dev->mr->lkey; } tx_wr.next = NULL; tx_wr.wr_id = mseq | SDP_OP_SEND; tx_wr.sg_list = ibsge; tx_wr.num_sge = i; tx_wr.opcode = IB_WR_SEND; tx_wr.send_flags = IB_SEND_SIGNALED; if (unlikely(tx_req->mb->m_flags & M_URG)) tx_wr.send_flags |= IB_SEND_SOLICITED; rc = ib_post_send(ssk->qp, &tx_wr, &bad_wr); if (unlikely(rc)) { sdp_dbg(ssk->socket, "ib_post_send failed with status %d.\n", rc); sdp_cleanup_sdp_buf(ssk, tx_req, DMA_TO_DEVICE); sdp_notify(ssk, ECONNRESET); m_freem(tx_req->mb); return; } atomic_inc(&ssk->tx_ring.head); atomic_dec(&ssk->tx_ring.credits); atomic_set(&ssk->remote_credits, rx_ring_posted(ssk)); return; }
/* Issue an RDMA_READ using an FRMR to map the data sink */ static int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, struct svc_rdma_op_ctxt *head, int *page_no, u32 *page_offset, u32 rs_handle, u32 rs_length, u64 rs_offset, int last) { struct ib_send_wr read_wr; struct ib_send_wr inv_wr; struct ib_send_wr fastreg_wr; u8 key; int pages_needed = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT; struct svc_rdma_op_ctxt *ctxt = svc_rdma_get_context(xprt); struct svc_rdma_fastreg_mr *frmr = svc_rdma_get_frmr(xprt); int ret, read, pno; u32 pg_off = *page_offset; u32 pg_no = *page_no; if (IS_ERR(frmr)) return -ENOMEM; ctxt->direction = DMA_FROM_DEVICE; ctxt->frmr = frmr; pages_needed = min_t(int, pages_needed, xprt->sc_frmr_pg_list_len); read = min_t(int, pages_needed << PAGE_SHIFT, rs_length); frmr->kva = page_address(rqstp->rq_arg.pages[pg_no]); frmr->direction = DMA_FROM_DEVICE; frmr->access_flags = (IB_ACCESS_LOCAL_WRITE|IB_ACCESS_REMOTE_WRITE); frmr->map_len = pages_needed << PAGE_SHIFT; frmr->page_list_len = pages_needed; for (pno = 0; pno < pages_needed; pno++) { int len = min_t(int, rs_length, PAGE_SIZE - pg_off); head->arg.pages[pg_no] = rqstp->rq_arg.pages[pg_no]; head->arg.page_len += len; head->arg.len += len; if (!pg_off) head->count++; rqstp->rq_respages = &rqstp->rq_arg.pages[pg_no+1]; #if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,8,0)) rqstp->rq_next_page = rqstp->rq_respages + 1; #endif frmr->page_list->page_list[pno] = ib_dma_map_page(xprt->sc_cm_id->device, head->arg.pages[pg_no], 0, PAGE_SIZE, DMA_FROM_DEVICE); ret = ib_dma_mapping_error(xprt->sc_cm_id->device, frmr->page_list->page_list[pno]); if (ret) goto err; atomic_inc(&xprt->sc_dma_used); /* adjust offset and wrap to next page if needed */ pg_off += len; if (pg_off == PAGE_SIZE) { pg_off = 0; pg_no++; } rs_length -= len; } if (last && rs_length == 0) set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); else clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); /* Bump the key */ key = (u8)(frmr->mr->lkey & 0x000000FF); ib_update_fast_reg_key(frmr->mr, ++key); ctxt->sge[0].addr = (unsigned long)frmr->kva + *page_offset; ctxt->sge[0].lkey = frmr->mr->lkey; ctxt->sge[0].length = read; ctxt->count = 1; ctxt->read_hdr = head; /* Prepare FASTREG WR */ memset(&fastreg_wr, 0, sizeof(fastreg_wr)); fastreg_wr.opcode = IB_WR_FAST_REG_MR; fastreg_wr.send_flags = IB_SEND_SIGNALED; fastreg_wr.wr.fast_reg.iova_start = (unsigned long)frmr->kva; fastreg_wr.wr.fast_reg.page_list = frmr->page_list; fastreg_wr.wr.fast_reg.page_list_len = frmr->page_list_len; fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT; fastreg_wr.wr.fast_reg.length = frmr->map_len; fastreg_wr.wr.fast_reg.access_flags = frmr->access_flags; fastreg_wr.wr.fast_reg.rkey = frmr->mr->lkey; fastreg_wr.next = &read_wr; /* Prepare RDMA_READ */ memset(&read_wr, 0, sizeof(read_wr)); read_wr.send_flags = IB_SEND_SIGNALED; read_wr.wr.rdma.rkey = rs_handle; read_wr.wr.rdma.remote_addr = rs_offset; read_wr.sg_list = ctxt->sge; read_wr.num_sge = 1; if (xprt->sc_dev_caps & SVCRDMA_DEVCAP_READ_W_INV) { read_wr.opcode = IB_WR_RDMA_READ_WITH_INV; read_wr.wr_id = (unsigned long)ctxt; read_wr.ex.invalidate_rkey = ctxt->frmr->mr->lkey; } else { read_wr.opcode = IB_WR_RDMA_READ; read_wr.next = &inv_wr; /* Prepare invalidate */ memset(&inv_wr, 0, sizeof(inv_wr)); inv_wr.wr_id = (unsigned long)ctxt; inv_wr.opcode = IB_WR_LOCAL_INV; inv_wr.send_flags = IB_SEND_SIGNALED | IB_SEND_FENCE; inv_wr.ex.invalidate_rkey = frmr->mr->lkey; } ctxt->wr_op = read_wr.opcode; /* Post the chain */ ret = svc_rdma_send(xprt, &fastreg_wr); if (ret) { pr_err("svcrdma: Error %d posting RDMA_READ\n", ret); set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); goto err; } /* return current location in page array */ *page_no = pg_no; *page_offset = pg_off; ret = read; atomic_inc(&rdma_stat_read); return ret; err: svc_rdma_unmap_dma(ctxt); svc_rdma_put_context(ctxt, 0); svc_rdma_put_frmr(xprt, frmr); return ret; }