/* Send a backwards direction RPC call. * * Caller holds the connection's mutex and has already marshaled * the RPC/RDMA request. * * This is similar to svc_rdma_reply, but takes an rpc_rqst * instead, does not support chunks, and avoids blocking memory * allocation. * * XXX: There is still an opportunity to block in svc_rdma_send() * if there are no SQ entries to post the Send. This may occur if * the adapter has a small maximum SQ depth. */ static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma, struct rpc_rqst *rqst) { struct xdr_buf *sndbuf = &rqst->rq_snd_buf; struct svc_rdma_op_ctxt *ctxt; struct svc_rdma_req_map *vec; struct ib_send_wr send_wr; int ret; vec = svc_rdma_get_req_map(rdma); ret = svc_rdma_map_xdr(rdma, sndbuf, vec, false); if (ret) goto out_err; ret = svc_rdma_repost_recv(rdma, GFP_NOIO); if (ret) goto out_err; ctxt = svc_rdma_get_context(rdma); ctxt->pages[0] = virt_to_page(rqst->rq_buffer); ctxt->count = 1; ctxt->direction = DMA_TO_DEVICE; ctxt->sge[0].lkey = rdma->sc_pd->local_dma_lkey; ctxt->sge[0].length = sndbuf->len; ctxt->sge[0].addr = ib_dma_map_page(rdma->sc_cm_id->device, ctxt->pages[0], 0, sndbuf->len, DMA_TO_DEVICE); if (ib_dma_mapping_error(rdma->sc_cm_id->device, ctxt->sge[0].addr)) { ret = -EIO; goto out_unmap; } svc_rdma_count_mappings(rdma, ctxt); memset(&send_wr, 0, sizeof(send_wr)); ctxt->cqe.done = svc_rdma_wc_send; send_wr.wr_cqe = &ctxt->cqe; send_wr.sg_list = ctxt->sge; send_wr.num_sge = 1; send_wr.opcode = IB_WR_SEND; send_wr.send_flags = IB_SEND_SIGNALED; ret = svc_rdma_send(rdma, &send_wr); if (ret) { ret = -EIO; goto out_unmap; } out_err: svc_rdma_put_req_map(rdma, vec); dprintk("svcrdma: %s returns %d\n", __func__, ret); return ret; out_unmap: svc_rdma_unmap_dma(ctxt); svc_rdma_put_context(ctxt, 1); goto out_err; }
/* * Map for DMA and insert a single page into the on-demand paging page tables. * * @umem: the umem to insert the page to. * @page_index: index in the umem to add the page to. * @page: the page struct to map and add. * @access_mask: access permissions needed for this page. * @current_seq: sequence number for synchronization with invalidations. * the sequence number is taken from * umem->odp_data->notifiers_seq. * * The function returns -EFAULT if the DMA mapping operation fails. It returns * -EAGAIN if a concurrent invalidation prevents us from updating the page. * * The page is released via put_page even if the operation failed. For * on-demand pinning, the page is released whenever it isn't stored in the * umem. */ static int ib_umem_odp_map_dma_single_page( struct ib_umem *umem, int page_index, struct page *page, u64 access_mask, unsigned long current_seq, enum ib_odp_dma_map_flags flags) { struct ib_device *dev = umem->context->device; dma_addr_t dma_addr; int stored_page = 0; int ret = 0; mutex_lock(&umem->odp_data->umem_mutex); /* * Note: we avoid writing if seq is different from the initial seq, to * handle case of a racing notifier. This check also allows us to bail * early if we have a notifier running in parallel with us. */ if (ib_umem_mmu_notifier_retry(umem, current_seq)) { ret = -EAGAIN; goto out; } if (!(umem->odp_data->dma_list[page_index])) { dma_addr = ib_dma_map_page(dev, page, 0, PAGE_SIZE, DMA_BIDIRECTIONAL); if (ib_dma_mapping_error(dev, dma_addr)) { ret = -EFAULT; goto out; } umem->odp_data->dma_list[page_index] = dma_addr | access_mask; umem->odp_data->page_list[page_index] = page; if (flags & IB_ODP_DMA_MAP_FOR_PREFETCH) atomic_inc(&dev->odp_statistics.num_prefetch_pages); else atomic_inc(&dev->odp_statistics.num_page_fault_pages); stored_page = 1; } else if (umem->odp_data->page_list[page_index] == page) { umem->odp_data->dma_list[page_index] |= access_mask; } else { pr_err("error: got different pages in IB device and from get_user_pages. IB device page: %p, gup page: %p\n" , umem->odp_data->page_list[page_index], page); } out: mutex_unlock(&umem->odp_data->umem_mutex); /* On Demand Paging - avoid pinning the page */ if (umem->context->invalidate_range || !stored_page) put_page(page); return ret; }
static int svc_rdma_post_recv(struct svcxprt_rdma *xprt) { struct ib_recv_wr recv_wr, *bad_recv_wr; struct svc_rdma_op_ctxt *ctxt; struct page *page; dma_addr_t pa; int sge_no; int buflen; int ret; ctxt = svc_rdma_get_context(xprt); buflen = 0; ctxt->direction = DMA_FROM_DEVICE; ctxt->cqe.done = svc_rdma_wc_receive; for (sge_no = 0; buflen < xprt->sc_max_req_size; sge_no++) { if (sge_no >= xprt->sc_max_sge) { pr_err("svcrdma: Too many sges (%d)\n", sge_no); goto err_put_ctxt; } page = alloc_page(GFP_KERNEL); if (!page) goto err_put_ctxt; ctxt->pages[sge_no] = page; pa = ib_dma_map_page(xprt->sc_cm_id->device, page, 0, PAGE_SIZE, DMA_FROM_DEVICE); if (ib_dma_mapping_error(xprt->sc_cm_id->device, pa)) goto err_put_ctxt; svc_rdma_count_mappings(xprt, ctxt); ctxt->sge[sge_no].addr = pa; ctxt->sge[sge_no].length = PAGE_SIZE; ctxt->sge[sge_no].lkey = xprt->sc_pd->local_dma_lkey; ctxt->count = sge_no + 1; buflen += PAGE_SIZE; } recv_wr.next = NULL; recv_wr.sg_list = &ctxt->sge[0]; recv_wr.num_sge = ctxt->count; recv_wr.wr_cqe = &ctxt->cqe; svc_xprt_get(&xprt->sc_xprt); ret = ib_post_recv(xprt->sc_qp, &recv_wr, &bad_recv_wr); if (ret) { svc_rdma_unmap_dma(ctxt); svc_rdma_put_context(ctxt, 1); svc_xprt_put(&xprt->sc_xprt); } return ret; err_put_ctxt: svc_rdma_unmap_dma(ctxt); svc_rdma_put_context(ctxt, 1); return -ENOMEM; }
static struct sk_buff *ipoib_cm_alloc_rx_skb(struct net_device *dev, struct ipoib_cm_rx_buf *rx_ring, int id, int frags, u64 mapping[IPOIB_CM_RX_SG]) { struct ipoib_dev_priv *priv = netdev_priv(dev); struct sk_buff *skb; int i; skb = dev_alloc_skb(IPOIB_CM_HEAD_SIZE + 12); if (unlikely(!skb)) return NULL; /* * IPoIB adds a 4 byte header. So we need 12 more bytes to align the * IP header to a multiple of 16. */ skb_reserve(skb, 12); mapping[0] = ib_dma_map_single(priv->ca, skb->data, IPOIB_CM_HEAD_SIZE, DMA_FROM_DEVICE); if (unlikely(ib_dma_mapping_error(priv->ca, mapping[0]))) { dev_kfree_skb_any(skb); return NULL; } for (i = 0; i < frags; i++) { struct page *page = alloc_page(GFP_ATOMIC); if (!page) goto partial_error; skb_fill_page_desc(skb, i, page, 0, PAGE_SIZE); mapping[i + 1] = ib_dma_map_page(priv->ca, skb_shinfo(skb)->frags[i].page, 0, PAGE_SIZE, DMA_FROM_DEVICE); if (unlikely(ib_dma_mapping_error(priv->ca, mapping[i + 1]))) goto partial_error; } rx_ring[id].skb = skb; return skb; partial_error: ib_dma_unmap_single(priv->ca, mapping[0], IPOIB_CM_HEAD_SIZE, DMA_FROM_DEVICE); for (; i > 0; --i) ib_dma_unmap_single(priv->ca, mapping[i], PAGE_SIZE, DMA_FROM_DEVICE); dev_kfree_skb_any(skb); return NULL; }
int svc_rdma_post_recv(struct svcxprt_rdma *xprt) { struct ib_recv_wr recv_wr, *bad_recv_wr; struct svc_rdma_op_ctxt *ctxt; struct page *page; dma_addr_t pa; int sge_no; int buflen; int ret; ctxt = svc_rdma_get_context(xprt); buflen = 0; ctxt->direction = DMA_FROM_DEVICE; for (sge_no = 0; buflen < xprt->sc_max_req_size; sge_no++) { BUG_ON(sge_no >= xprt->sc_max_sge); page = svc_rdma_get_page(); ctxt->pages[sge_no] = page; pa = ib_dma_map_page(xprt->sc_cm_id->device, page, 0, PAGE_SIZE, DMA_FROM_DEVICE); if (ib_dma_mapping_error(xprt->sc_cm_id->device, pa)) goto err_put_ctxt; atomic_inc(&xprt->sc_dma_used); ctxt->sge[sge_no].addr = pa; ctxt->sge[sge_no].length = PAGE_SIZE; ctxt->sge[sge_no].lkey = xprt->sc_dma_lkey; ctxt->count = sge_no + 1; buflen += PAGE_SIZE; } recv_wr.next = NULL; recv_wr.sg_list = &ctxt->sge[0]; recv_wr.num_sge = ctxt->count; recv_wr.wr_id = (u64)(unsigned long)ctxt; svc_xprt_get(&xprt->sc_xprt); ret = ib_post_recv(xprt->sc_qp, &recv_wr, &bad_recv_wr); if (ret) { svc_rdma_unmap_dma(ctxt); svc_rdma_put_context(ctxt, 1); svc_xprt_put(&xprt->sc_xprt); } return ret; err_put_ctxt: svc_rdma_unmap_dma(ctxt); svc_rdma_put_context(ctxt, 1); return -ENOMEM; }
static int rdma_set_ctxt_sge(struct svcxprt_rdma *xprt, struct svc_rdma_op_ctxt *ctxt, struct svc_rdma_fastreg_mr *frmr, struct kvec *vec, u64 *sgl_offset, int count) { int i; unsigned long off; ctxt->count = count; ctxt->direction = DMA_FROM_DEVICE; for (i = 0; i < count; i++) { ctxt->sge[i].length = 0; /* in case map fails */ if (!frmr) { BUG_ON(0 == virt_to_page(vec[i].iov_base)); off = (unsigned long)vec[i].iov_base & ~PAGE_MASK; ctxt->sge[i].addr = ib_dma_map_page(xprt->sc_cm_id->device, virt_to_page(vec[i].iov_base), off, vec[i].iov_len, DMA_FROM_DEVICE); if (ib_dma_mapping_error(xprt->sc_cm_id->device, ctxt->sge[i].addr)) return -EINVAL; ctxt->sge[i].lkey = xprt->sc_dma_lkey; atomic_inc(&xprt->sc_dma_used); } else { ctxt->sge[i].addr = (unsigned long)vec[i].iov_base; ctxt->sge[i].lkey = frmr->mr->lkey; } ctxt->sge[i].length = vec[i].iov_len; *sgl_offset = *sgl_offset + vec[i].iov_len; } return 0; }
/* Map a read-chunk-list to an XDR and fast register the page-list. * * Assumptions: * - chunk[0] position points to pages[0] at an offset of 0 * - pages[] will be made physically contiguous by creating a one-off memory * region using the fastreg verb. * - byte_count is # of bytes in read-chunk-list * - ch_count is # of chunks in read-chunk-list * * Output: * - sge array pointing into pages[] array. * - chunk_sge array specifying sge index and count for each * chunk in the read list */ static int fast_reg_read_chunks(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, struct svc_rdma_op_ctxt *head, struct rpcrdma_msg *rmsgp, struct svc_rdma_req_map *rpl_map, struct svc_rdma_req_map *chl_map, int ch_count, int byte_count) { int page_no; int ch_no; u32 offset; struct rpcrdma_read_chunk *ch; struct svc_rdma_fastreg_mr *frmr; int ret = 0; frmr = svc_rdma_get_frmr(xprt); if (IS_ERR(frmr)) return -ENOMEM; head->frmr = frmr; head->arg.head[0] = rqstp->rq_arg.head[0]; head->arg.tail[0] = rqstp->rq_arg.tail[0]; head->arg.pages = &head->pages[head->count]; head->hdr_count = head->count; /* save count of hdr pages */ head->arg.page_base = 0; head->arg.page_len = byte_count; head->arg.len = rqstp->rq_arg.len + byte_count; head->arg.buflen = rqstp->rq_arg.buflen + byte_count; /* Fast register the page list */ frmr->kva = page_address(rqstp->rq_arg.pages[0]); frmr->direction = DMA_FROM_DEVICE; frmr->access_flags = (IB_ACCESS_LOCAL_WRITE|IB_ACCESS_REMOTE_WRITE); frmr->map_len = byte_count; frmr->page_list_len = PAGE_ALIGN(byte_count) >> PAGE_SHIFT; for (page_no = 0; page_no < frmr->page_list_len; page_no++) { frmr->page_list->page_list[page_no] = ib_dma_map_page(xprt->sc_cm_id->device, rqstp->rq_arg.pages[page_no], 0, PAGE_SIZE, DMA_FROM_DEVICE); if (ib_dma_mapping_error(xprt->sc_cm_id->device, frmr->page_list->page_list[page_no])) goto fatal_err; atomic_inc(&xprt->sc_dma_used); head->arg.pages[page_no] = rqstp->rq_arg.pages[page_no]; } head->count += page_no; /* rq_respages points one past arg pages */ rqstp->rq_respages = &rqstp->rq_arg.pages[page_no]; /* Create the reply and chunk maps */ offset = 0; ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0]; for (ch_no = 0; ch_no < ch_count; ch_no++) { rpl_map->sge[ch_no].iov_base = frmr->kva + offset; rpl_map->sge[ch_no].iov_len = ch->rc_target.rs_length; chl_map->ch[ch_no].count = 1; chl_map->ch[ch_no].start = ch_no; offset += ch->rc_target.rs_length; ch++; } ret = svc_rdma_fastreg(xprt, frmr); if (ret) goto fatal_err; return ch_no; fatal_err: printk("svcrdma: error fast registering xdr for xprt %p", xprt); svc_rdma_put_frmr(xprt, frmr); return -EIO; }
/* Issue an RDMA_READ using an FRMR to map the data sink */ static int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, struct svc_rdma_op_ctxt *head, int *page_no, u32 *page_offset, u32 rs_handle, u32 rs_length, u64 rs_offset, int last) { struct ib_send_wr read_wr; struct ib_send_wr inv_wr; struct ib_send_wr fastreg_wr; u8 key; int pages_needed = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT; struct svc_rdma_op_ctxt *ctxt = svc_rdma_get_context(xprt); struct svc_rdma_fastreg_mr *frmr = svc_rdma_get_frmr(xprt); int ret, read, pno; u32 pg_off = *page_offset; u32 pg_no = *page_no; if (IS_ERR(frmr)) return -ENOMEM; ctxt->direction = DMA_FROM_DEVICE; ctxt->frmr = frmr; pages_needed = min_t(int, pages_needed, xprt->sc_frmr_pg_list_len); read = min_t(int, pages_needed << PAGE_SHIFT, rs_length); frmr->kva = page_address(rqstp->rq_arg.pages[pg_no]); frmr->direction = DMA_FROM_DEVICE; frmr->access_flags = (IB_ACCESS_LOCAL_WRITE|IB_ACCESS_REMOTE_WRITE); frmr->map_len = pages_needed << PAGE_SHIFT; frmr->page_list_len = pages_needed; for (pno = 0; pno < pages_needed; pno++) { int len = min_t(int, rs_length, PAGE_SIZE - pg_off); head->arg.pages[pg_no] = rqstp->rq_arg.pages[pg_no]; head->arg.page_len += len; head->arg.len += len; if (!pg_off) head->count++; rqstp->rq_respages = &rqstp->rq_arg.pages[pg_no+1]; #if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,8,0)) rqstp->rq_next_page = rqstp->rq_respages + 1; #endif frmr->page_list->page_list[pno] = ib_dma_map_page(xprt->sc_cm_id->device, head->arg.pages[pg_no], 0, PAGE_SIZE, DMA_FROM_DEVICE); ret = ib_dma_mapping_error(xprt->sc_cm_id->device, frmr->page_list->page_list[pno]); if (ret) goto err; atomic_inc(&xprt->sc_dma_used); /* adjust offset and wrap to next page if needed */ pg_off += len; if (pg_off == PAGE_SIZE) { pg_off = 0; pg_no++; } rs_length -= len; } if (last && rs_length == 0) set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); else clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); /* Bump the key */ key = (u8)(frmr->mr->lkey & 0x000000FF); ib_update_fast_reg_key(frmr->mr, ++key); ctxt->sge[0].addr = (unsigned long)frmr->kva + *page_offset; ctxt->sge[0].lkey = frmr->mr->lkey; ctxt->sge[0].length = read; ctxt->count = 1; ctxt->read_hdr = head; /* Prepare FASTREG WR */ memset(&fastreg_wr, 0, sizeof(fastreg_wr)); fastreg_wr.opcode = IB_WR_FAST_REG_MR; fastreg_wr.send_flags = IB_SEND_SIGNALED; fastreg_wr.wr.fast_reg.iova_start = (unsigned long)frmr->kva; fastreg_wr.wr.fast_reg.page_list = frmr->page_list; fastreg_wr.wr.fast_reg.page_list_len = frmr->page_list_len; fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT; fastreg_wr.wr.fast_reg.length = frmr->map_len; fastreg_wr.wr.fast_reg.access_flags = frmr->access_flags; fastreg_wr.wr.fast_reg.rkey = frmr->mr->lkey; fastreg_wr.next = &read_wr; /* Prepare RDMA_READ */ memset(&read_wr, 0, sizeof(read_wr)); read_wr.send_flags = IB_SEND_SIGNALED; read_wr.wr.rdma.rkey = rs_handle; read_wr.wr.rdma.remote_addr = rs_offset; read_wr.sg_list = ctxt->sge; read_wr.num_sge = 1; if (xprt->sc_dev_caps & SVCRDMA_DEVCAP_READ_W_INV) { read_wr.opcode = IB_WR_RDMA_READ_WITH_INV; read_wr.wr_id = (unsigned long)ctxt; read_wr.ex.invalidate_rkey = ctxt->frmr->mr->lkey; } else { read_wr.opcode = IB_WR_RDMA_READ; read_wr.next = &inv_wr; /* Prepare invalidate */ memset(&inv_wr, 0, sizeof(inv_wr)); inv_wr.wr_id = (unsigned long)ctxt; inv_wr.opcode = IB_WR_LOCAL_INV; inv_wr.send_flags = IB_SEND_SIGNALED | IB_SEND_FENCE; inv_wr.ex.invalidate_rkey = frmr->mr->lkey; } ctxt->wr_op = read_wr.opcode; /* Post the chain */ ret = svc_rdma_send(xprt, &fastreg_wr); if (ret) { pr_err("svcrdma: Error %d posting RDMA_READ\n", ret); set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); goto err; } /* return current location in page array */ *page_no = pg_no; *page_offset = pg_off; ret = read; atomic_inc(&rdma_stat_read); return ret; err: svc_rdma_unmap_dma(ctxt); svc_rdma_put_context(ctxt, 0); svc_rdma_put_frmr(xprt, frmr); return ret; }
/* Issue an RDMA_READ using the local lkey to map the data sink */ static int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, struct svc_rdma_op_ctxt *head, int *page_no, u32 *page_offset, u32 rs_handle, u32 rs_length, u64 rs_offset, int last) { struct ib_send_wr read_wr; int pages_needed = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT; struct svc_rdma_op_ctxt *ctxt = svc_rdma_get_context(xprt); int ret, read, pno; u32 pg_off = *page_offset; u32 pg_no = *page_no; ctxt->direction = DMA_FROM_DEVICE; ctxt->read_hdr = head; pages_needed = min_t(int, pages_needed, rdma_read_max_sge(xprt, pages_needed)); read = min_t(int, pages_needed << PAGE_SHIFT, rs_length); for (pno = 0; pno < pages_needed; pno++) { int len = min_t(int, rs_length, PAGE_SIZE - pg_off); head->arg.pages[pg_no] = rqstp->rq_arg.pages[pg_no]; head->arg.page_len += len; head->arg.len += len; if (!pg_off) head->count++; rqstp->rq_respages = &rqstp->rq_arg.pages[pg_no+1]; #if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,8,0)) rqstp->rq_next_page = rqstp->rq_respages + 1; #endif ctxt->sge[pno].addr = ib_dma_map_page(xprt->sc_cm_id->device, head->arg.pages[pg_no], pg_off, PAGE_SIZE - pg_off, DMA_FROM_DEVICE); ret = ib_dma_mapping_error(xprt->sc_cm_id->device, ctxt->sge[pno].addr); if (ret) goto err; atomic_inc(&xprt->sc_dma_used); /* The lkey here is either a local dma lkey or a dma_mr lkey */ ctxt->sge[pno].lkey = xprt->sc_dma_lkey; ctxt->sge[pno].length = len; ctxt->count++; /* adjust offset and wrap to next page if needed */ pg_off += len; if (pg_off == PAGE_SIZE) { pg_off = 0; pg_no++; } rs_length -= len; } if (last && rs_length == 0) set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); else clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); memset(&read_wr, 0, sizeof(read_wr)); read_wr.wr_id = (unsigned long)ctxt; read_wr.opcode = IB_WR_RDMA_READ; ctxt->wr_op = read_wr.opcode; read_wr.send_flags = IB_SEND_SIGNALED; read_wr.wr.rdma.rkey = rs_handle; read_wr.wr.rdma.remote_addr = rs_offset; read_wr.sg_list = ctxt->sge; read_wr.num_sge = pages_needed; ret = svc_rdma_send(xprt, &read_wr); if (ret) { pr_err("svcrdma: Error %d posting RDMA_READ\n", ret); set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); goto err; } /* return current location in page array */ *page_no = pg_no; *page_offset = pg_off; ret = read; atomic_inc(&rdma_stat_read); return ret; err: svc_rdma_unmap_dma(ctxt); svc_rdma_put_context(ctxt, 0); return ret; }
void sdp_post_send(struct sdp_sock *ssk, struct sk_buff *skb) { struct sdp_buf *tx_req; struct sdp_bsdh *h = (struct sdp_bsdh *)skb_transport_header(skb); unsigned long mseq = ring_head(ssk->tx_ring); int i, rc, frags; u64 addr; struct ib_device *dev; struct ib_send_wr *bad_wr; struct ib_sge ibsge[SDP_MAX_SEND_SGES]; struct ib_sge *sge = ibsge; struct ib_send_wr tx_wr = { NULL }; u32 send_flags = IB_SEND_SIGNALED; SDPSTATS_COUNTER_MID_INC(post_send, h->mid); SDPSTATS_HIST(send_size, skb->len); if (!ssk->qp_active) goto err; ssk->tx_packets++; if (h->mid != SDP_MID_SRCAVAIL && h->mid != SDP_MID_DATA && h->mid != SDP_MID_SRCAVAIL_CANCEL) { struct sock *sk = sk_ssk(ssk); sk->sk_wmem_queued += skb->truesize; sk_mem_charge(sk, skb->truesize); } if (unlikely(h->mid == SDP_MID_SRCAVAIL)) { struct tx_srcavail_state *tx_sa = TX_SRCAVAIL_STATE(skb); if (ssk->tx_sa != tx_sa) { sdp_dbg_data(sk_ssk(ssk), "SrcAvail cancelled " "before being sent!\n"); SDP_WARN_ON(1); sk_wmem_free_skb(sk_ssk(ssk), skb); return; } TX_SRCAVAIL_STATE(skb)->mseq = mseq; } if (unlikely(SDP_SKB_CB(skb)->flags & TCPHDR_URG)) h->flags = SDP_OOB_PRES | SDP_OOB_PEND; else h->flags = 0; h->bufs = htons(rx_ring_posted(ssk)); h->len = htonl(skb->len); h->mseq = htonl(mseq); h->mseq_ack = htonl(mseq_ack(ssk)); sdp_prf(sk_ssk(ssk), skb, "TX: %s bufs: %d mseq:%ld ack:%d c: %d", mid2str(h->mid), rx_ring_posted(ssk), mseq, ntohl(h->mseq_ack), tx_credits(ssk)); SDP_DUMP_PACKET(sk_ssk(ssk), "TX", skb, h); tx_req = &ssk->tx_ring.buffer[mseq & (SDP_TX_SIZE - 1)]; tx_req->skb = skb; dev = ssk->ib_device; if (skb->len <= ssk->inline_thresh && !skb_shinfo(skb)->nr_frags) { SDPSTATS_COUNTER_INC(inline_sends); sge->addr = (u64) skb->data; sge->length = skb->len; sge->lkey = 0; frags = 0; tx_req->mapping[0] = 0; /* Nothing to be cleaned up by sdp_cleanup_sdp_buf() */ send_flags |= IB_SEND_INLINE; } else { addr = ib_dma_map_single(dev, skb->data, skb->len - skb->data_len, DMA_TO_DEVICE); tx_req->mapping[0] = addr; /* TODO: proper error handling */ BUG_ON(ib_dma_mapping_error(dev, addr)); sge->addr = addr; sge->length = skb->len - skb->data_len; sge->lkey = ssk->sdp_dev->mr->lkey; frags = skb_shinfo(skb)->nr_frags; for (i = 0; i < frags; ++i) { ++sge; addr = ib_dma_map_page(dev, skb_shinfo(skb)->frags[i].page.p, skb_shinfo(skb)->frags[i].page_offset, skb_shinfo(skb)->frags[i].size, DMA_TO_DEVICE); BUG_ON(ib_dma_mapping_error(dev, addr)); tx_req->mapping[i + 1] = addr; sge->addr = addr; sge->length = skb_shinfo(skb)->frags[i].size; sge->lkey = ssk->sdp_dev->mr->lkey; } } tx_wr.next = NULL; tx_wr.wr_id = ring_head(ssk->tx_ring) | SDP_OP_SEND; tx_wr.sg_list = ibsge; tx_wr.num_sge = frags + 1; tx_wr.opcode = IB_WR_SEND; tx_wr.send_flags = send_flags; if (unlikely(SDP_SKB_CB(skb)->flags & TCPHDR_URG)) tx_wr.send_flags |= IB_SEND_SOLICITED; rc = ib_post_send(ssk->qp, &tx_wr, &bad_wr); if (unlikely(rc)) { sdp_dbg(sk_ssk(ssk), "ib_post_send failed with status %d.\n", rc); sdp_cleanup_sdp_buf(ssk, tx_req, skb->len - skb->data_len, DMA_TO_DEVICE); sdp_set_error(sk_ssk(ssk), -ECONNRESET); goto err; } atomic_inc(&ssk->tx_ring.head); atomic_dec(&ssk->tx_ring.credits); atomic_set(&ssk->remote_credits, rx_ring_posted(ssk)); return; err: sk_wmem_free_skb(sk_ssk(ssk), skb); }
/* Encode an XDR as an array of IB SGE * * Assumptions: * - head[0] is physically contiguous. * - tail[0] is physically contiguous. * - pages[] is not physically or virtually contigous and consists of * PAGE_SIZE elements. * * Output: * SGE[0] reserved for RCPRDMA header * SGE[1] data from xdr->head[] * SGE[2..sge_count-2] data from xdr->pages[] * SGE[sge_count-1] data from xdr->tail. * * The max SGE we need is the length of the XDR / pagesize + one for * head + one for tail + one for RPCRDMA header. Since RPCSVC_MAXPAGES * reserves a page for both the request and the reply header, and this * array is only concerned with the reply we are assured that we have * on extra page for the RPCRMDA header. */ static int fast_reg_xdr(struct svcxprt_rdma *xprt, struct xdr_buf *xdr, struct svc_rdma_req_map *vec) { int sge_no; u32 sge_bytes; u32 page_bytes; u32 page_off; int page_no = 0; u8 *frva; struct svc_rdma_fastreg_mr *frmr; frmr = svc_rdma_get_frmr(xprt); if (IS_ERR(frmr)) return -ENOMEM; vec->frmr = frmr; /* Skip the RPCRDMA header */ sge_no = 1; /* Map the head. */ frva = (void *)((unsigned long)(xdr->head[0].iov_base) & PAGE_MASK); vec->sge[sge_no].iov_base = xdr->head[0].iov_base; vec->sge[sge_no].iov_len = xdr->head[0].iov_len; vec->count = 2; sge_no++; /* Build the FRMR */ frmr->kva = frva; frmr->direction = DMA_TO_DEVICE; frmr->access_flags = 0; frmr->map_len = PAGE_SIZE; frmr->page_list_len = 1; frmr->page_list->page_list[page_no] = ib_dma_map_single(xprt->sc_cm_id->device, (void *)xdr->head[0].iov_base, PAGE_SIZE, DMA_TO_DEVICE); if (ib_dma_mapping_error(xprt->sc_cm_id->device, frmr->page_list->page_list[page_no])) goto fatal_err; atomic_inc(&xprt->sc_dma_used); page_off = xdr->page_base; page_bytes = xdr->page_len + page_off; if (!page_bytes) goto encode_tail; /* Map the pages */ vec->sge[sge_no].iov_base = frva + frmr->map_len + page_off; vec->sge[sge_no].iov_len = page_bytes; sge_no++; while (page_bytes) { struct page *page; page = xdr->pages[page_no++]; sge_bytes = min_t(u32, page_bytes, (PAGE_SIZE - page_off)); page_bytes -= sge_bytes; frmr->page_list->page_list[page_no] = ib_dma_map_page(xprt->sc_cm_id->device, page, 0, PAGE_SIZE, DMA_TO_DEVICE); if (ib_dma_mapping_error(xprt->sc_cm_id->device, frmr->page_list->page_list[page_no])) goto fatal_err; atomic_inc(&xprt->sc_dma_used); page_off = 0; /* reset for next time through loop */ frmr->map_len += PAGE_SIZE; frmr->page_list_len++; } vec->count++; encode_tail: /* Map tail */ if (0 == xdr->tail[0].iov_len) goto done; vec->count++; vec->sge[sge_no].iov_len = xdr->tail[0].iov_len; if (((unsigned long)xdr->tail[0].iov_base & PAGE_MASK) == ((unsigned long)xdr->head[0].iov_base & PAGE_MASK)) { /* * If head and tail use the same page, we don't need * to map it again. */ vec->sge[sge_no].iov_base = xdr->tail[0].iov_base; } else { void *va; /* Map another page for the tail */ page_off = (unsigned long)xdr->tail[0].iov_base & ~PAGE_MASK; va = (void *)((unsigned long)xdr->tail[0].iov_base & PAGE_MASK); vec->sge[sge_no].iov_base = frva + frmr->map_len + page_off; frmr->page_list->page_list[page_no] = ib_dma_map_single(xprt->sc_cm_id->device, va, PAGE_SIZE, DMA_TO_DEVICE); if (ib_dma_mapping_error(xprt->sc_cm_id->device, frmr->page_list->page_list[page_no])) goto fatal_err; atomic_inc(&xprt->sc_dma_used); frmr->map_len += PAGE_SIZE; frmr->page_list_len++; } done: if (svc_rdma_fastreg(xprt, frmr)) goto fatal_err; return 0; fatal_err: printk("svcrdma: Error fast registering memory for xprt %p\n", xprt); svc_rdma_put_frmr(xprt, frmr); return -EIO; }
/* This function prepares the portion of the RPCRDMA message to be * sent in the RDMA_SEND. This function is called after data sent via * RDMA has already been transmitted. There are three cases: * - The RPCRDMA header, RPC header, and payload are all sent in a * single RDMA_SEND. This is the "inline" case. * - The RPCRDMA header and some portion of the RPC header and data * are sent via this RDMA_SEND and another portion of the data is * sent via RDMA. * - The RPCRDMA header [NOMSG] is sent in this RDMA_SEND and the RPC * header and data are all transmitted via RDMA. * In all three cases, this function prepares the RPCRDMA header in * sge[0], the 'type' parameter indicates the type to place in the * RPCRDMA header, and the 'byte_count' field indicates how much of * the XDR to include in this RDMA_SEND. */ static int send_reply(struct svcxprt_rdma *rdma, struct svc_rqst *rqstp, struct page *page, struct rpcrdma_msg *rdma_resp, struct svc_rdma_op_ctxt *ctxt, struct svc_rdma_req_map *vec, int byte_count) { struct ib_send_wr send_wr; struct ib_send_wr inv_wr; int sge_no; int sge_bytes; int page_no; int ret; /* Post a recv buffer to handle another request. */ ret = svc_rdma_post_recv(rdma); if (ret) { printk(KERN_INFO "svcrdma: could not post a receive buffer, err=%d." "Closing transport %p.\n", ret, rdma); set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags); svc_rdma_put_context(ctxt, 0); return -ENOTCONN; } /* Prepare the context */ ctxt->pages[0] = page; ctxt->count = 1; ctxt->frmr = vec->frmr; if (vec->frmr) set_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags); else clear_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags); /* Prepare the SGE for the RPCRDMA Header */ ctxt->sge[0].addr = ib_dma_map_page(rdma->sc_cm_id->device, page, 0, PAGE_SIZE, DMA_TO_DEVICE); if (ib_dma_mapping_error(rdma->sc_cm_id->device, ctxt->sge[0].addr)) goto err; atomic_inc(&rdma->sc_dma_used); ctxt->direction = DMA_TO_DEVICE; ctxt->sge[0].length = svc_rdma_xdr_get_reply_hdr_len(rdma_resp); ctxt->sge[0].lkey = rdma->sc_dma_lkey; /* Determine how many of our SGE are to be transmitted */ for (sge_no = 1; byte_count && sge_no < vec->count; sge_no++) { sge_bytes = min_t(size_t, vec->sge[sge_no].iov_len, byte_count); byte_count -= sge_bytes; if (!vec->frmr) { ctxt->sge[sge_no].addr = ib_dma_map_single(rdma->sc_cm_id->device, vec->sge[sge_no].iov_base, sge_bytes, DMA_TO_DEVICE); if (ib_dma_mapping_error(rdma->sc_cm_id->device, ctxt->sge[sge_no].addr)) goto err; atomic_inc(&rdma->sc_dma_used); ctxt->sge[sge_no].lkey = rdma->sc_dma_lkey; } else { ctxt->sge[sge_no].addr = (unsigned long) vec->sge[sge_no].iov_base; ctxt->sge[sge_no].lkey = vec->frmr->mr->lkey; } ctxt->sge[sge_no].length = sge_bytes; } BUG_ON(byte_count != 0); /* Save all respages in the ctxt and remove them from the * respages array. They are our pages until the I/O * completes. */ for (page_no = 0; page_no < rqstp->rq_resused; page_no++) { ctxt->pages[page_no+1] = rqstp->rq_respages[page_no]; ctxt->count++; rqstp->rq_respages[page_no] = NULL; /* * If there are more pages than SGE, terminate SGE * list so that svc_rdma_unmap_dma doesn't attempt to * unmap garbage. */ if (page_no+1 >= sge_no) ctxt->sge[page_no+1].length = 0; } BUG_ON(sge_no > rdma->sc_max_sge); memset(&send_wr, 0, sizeof send_wr); ctxt->wr_op = IB_WR_SEND; send_wr.wr_id = (unsigned long)ctxt; send_wr.sg_list = ctxt->sge; send_wr.num_sge = sge_no; send_wr.opcode = IB_WR_SEND; send_wr.send_flags = IB_SEND_SIGNALED; if (vec->frmr) { /* Prepare INVALIDATE WR */ memset(&inv_wr, 0, sizeof inv_wr); inv_wr.opcode = IB_WR_LOCAL_INV; inv_wr.send_flags = IB_SEND_SIGNALED; inv_wr.ex.invalidate_rkey = vec->frmr->mr->lkey; send_wr.next = &inv_wr; } ret = svc_rdma_send(rdma, &send_wr); if (ret) goto err; return 0; err: svc_rdma_put_frmr(rdma, vec->frmr); svc_rdma_put_context(ctxt, 1); return -EIO; }