/* Send a backwards direction RPC call.
 *
 * Caller holds the connection's mutex and has already marshaled
 * the RPC/RDMA request.
 *
 * This is similar to svc_rdma_reply, but takes an rpc_rqst
 * instead, does not support chunks, and avoids blocking memory
 * allocation.
 *
 * XXX: There is still an opportunity to block in svc_rdma_send()
 * if there are no SQ entries to post the Send. This may occur if
 * the adapter has a small maximum SQ depth.
 */
static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma,
			      struct rpc_rqst *rqst)
{
	struct xdr_buf *sndbuf = &rqst->rq_snd_buf;
	struct svc_rdma_op_ctxt *ctxt;
	struct svc_rdma_req_map *vec;
	struct ib_send_wr send_wr;
	int ret;

	vec = svc_rdma_get_req_map(rdma);
	ret = svc_rdma_map_xdr(rdma, sndbuf, vec, false);
	if (ret)
		goto out_err;

	ret = svc_rdma_repost_recv(rdma, GFP_NOIO);
	if (ret)
		goto out_err;

	ctxt = svc_rdma_get_context(rdma);
	ctxt->pages[0] = virt_to_page(rqst->rq_buffer);
	ctxt->count = 1;

	ctxt->direction = DMA_TO_DEVICE;
	ctxt->sge[0].lkey = rdma->sc_pd->local_dma_lkey;
	ctxt->sge[0].length = sndbuf->len;
	ctxt->sge[0].addr =
	    ib_dma_map_page(rdma->sc_cm_id->device, ctxt->pages[0], 0,
			    sndbuf->len, DMA_TO_DEVICE);
	if (ib_dma_mapping_error(rdma->sc_cm_id->device, ctxt->sge[0].addr)) {
		ret = -EIO;
		goto out_unmap;
	}
	svc_rdma_count_mappings(rdma, ctxt);

	memset(&send_wr, 0, sizeof(send_wr));
	ctxt->cqe.done = svc_rdma_wc_send;
	send_wr.wr_cqe = &ctxt->cqe;
	send_wr.sg_list = ctxt->sge;
	send_wr.num_sge = 1;
	send_wr.opcode = IB_WR_SEND;
	send_wr.send_flags = IB_SEND_SIGNALED;

	ret = svc_rdma_send(rdma, &send_wr);
	if (ret) {
		ret = -EIO;
		goto out_unmap;
	}

out_err:
	svc_rdma_put_req_map(rdma, vec);
	dprintk("svcrdma: %s returns %d\n", __func__, ret);
	return ret;

out_unmap:
	svc_rdma_unmap_dma(ctxt);
	svc_rdma_put_context(ctxt, 1);
	goto out_err;
}
Esempio n. 2
0
/*
 * Map for DMA and insert a single page into the on-demand paging page tables.
 *
 * @umem: the umem to insert the page to.
 * @page_index: index in the umem to add the page to.
 * @page: the page struct to map and add.
 * @access_mask: access permissions needed for this page.
 * @current_seq: sequence number for synchronization with invalidations.
 *               the sequence number is taken from
 *               umem->odp_data->notifiers_seq.
 *
 * The function returns -EFAULT if the DMA mapping operation fails. It returns
 * -EAGAIN if a concurrent invalidation prevents us from updating the page.
 *
 * The page is released via put_page even if the operation failed. For
 * on-demand pinning, the page is released whenever it isn't stored in the
 * umem.
 */
static int ib_umem_odp_map_dma_single_page(
		struct ib_umem *umem,
		int page_index,
		struct page *page,
		u64 access_mask,
		unsigned long current_seq,
		enum ib_odp_dma_map_flags flags)
{
	struct ib_device *dev = umem->context->device;
	dma_addr_t dma_addr;
	int stored_page = 0;
	int ret = 0;
	mutex_lock(&umem->odp_data->umem_mutex);
	/*
	 * Note: we avoid writing if seq is different from the initial seq, to
	 * handle case of a racing notifier. This check also allows us to bail
	 * early if we have a notifier running in parallel with us.
	 */
	if (ib_umem_mmu_notifier_retry(umem, current_seq)) {
		ret = -EAGAIN;
		goto out;
	}
	if (!(umem->odp_data->dma_list[page_index])) {
		dma_addr = ib_dma_map_page(dev,
					   page,
					   0, PAGE_SIZE,
					   DMA_BIDIRECTIONAL);
		if (ib_dma_mapping_error(dev, dma_addr)) {
			ret = -EFAULT;
			goto out;
		}
		umem->odp_data->dma_list[page_index] = dma_addr | access_mask;
		umem->odp_data->page_list[page_index] = page;
		if (flags & IB_ODP_DMA_MAP_FOR_PREFETCH)
			atomic_inc(&dev->odp_statistics.num_prefetch_pages);
		else
			atomic_inc(&dev->odp_statistics.num_page_fault_pages);

		stored_page = 1;
	} else if (umem->odp_data->page_list[page_index] == page) {
		umem->odp_data->dma_list[page_index] |= access_mask;
	} else {
		pr_err("error: got different pages in IB device and from get_user_pages. IB device page: %p, gup page: %p\n"
		       , umem->odp_data->page_list[page_index], page);
	}

out:
	mutex_unlock(&umem->odp_data->umem_mutex);

	/* On Demand Paging - avoid pinning the page */
	if (umem->context->invalidate_range || !stored_page)
		put_page(page);

	return ret;
}
static int
svc_rdma_post_recv(struct svcxprt_rdma *xprt)
{
	struct ib_recv_wr recv_wr, *bad_recv_wr;
	struct svc_rdma_op_ctxt *ctxt;
	struct page *page;
	dma_addr_t pa;
	int sge_no;
	int buflen;
	int ret;

	ctxt = svc_rdma_get_context(xprt);
	buflen = 0;
	ctxt->direction = DMA_FROM_DEVICE;
	ctxt->cqe.done = svc_rdma_wc_receive;
	for (sge_no = 0; buflen < xprt->sc_max_req_size; sge_no++) {
		if (sge_no >= xprt->sc_max_sge) {
			pr_err("svcrdma: Too many sges (%d)\n", sge_no);
			goto err_put_ctxt;
		}
		page = alloc_page(GFP_KERNEL);
		if (!page)
			goto err_put_ctxt;
		ctxt->pages[sge_no] = page;
		pa = ib_dma_map_page(xprt->sc_cm_id->device,
				     page, 0, PAGE_SIZE,
				     DMA_FROM_DEVICE);
		if (ib_dma_mapping_error(xprt->sc_cm_id->device, pa))
			goto err_put_ctxt;
		svc_rdma_count_mappings(xprt, ctxt);
		ctxt->sge[sge_no].addr = pa;
		ctxt->sge[sge_no].length = PAGE_SIZE;
		ctxt->sge[sge_no].lkey = xprt->sc_pd->local_dma_lkey;
		ctxt->count = sge_no + 1;
		buflen += PAGE_SIZE;
	}
	recv_wr.next = NULL;
	recv_wr.sg_list = &ctxt->sge[0];
	recv_wr.num_sge = ctxt->count;
	recv_wr.wr_cqe = &ctxt->cqe;

	svc_xprt_get(&xprt->sc_xprt);
	ret = ib_post_recv(xprt->sc_qp, &recv_wr, &bad_recv_wr);
	if (ret) {
		svc_rdma_unmap_dma(ctxt);
		svc_rdma_put_context(ctxt, 1);
		svc_xprt_put(&xprt->sc_xprt);
	}
	return ret;

 err_put_ctxt:
	svc_rdma_unmap_dma(ctxt);
	svc_rdma_put_context(ctxt, 1);
	return -ENOMEM;
}
static struct sk_buff *ipoib_cm_alloc_rx_skb(struct net_device *dev,
					     struct ipoib_cm_rx_buf *rx_ring,
					     int id, int frags,
					     u64 mapping[IPOIB_CM_RX_SG])
{
	struct ipoib_dev_priv *priv = netdev_priv(dev);
	struct sk_buff *skb;
	int i;

	skb = dev_alloc_skb(IPOIB_CM_HEAD_SIZE + 12);
	if (unlikely(!skb))
		return NULL;

	/*
	 * IPoIB adds a 4 byte header. So we need 12 more bytes to align the
	 * IP header to a multiple of 16.
	 */
	skb_reserve(skb, 12);

	mapping[0] = ib_dma_map_single(priv->ca, skb->data, IPOIB_CM_HEAD_SIZE,
				       DMA_FROM_DEVICE);
	if (unlikely(ib_dma_mapping_error(priv->ca, mapping[0]))) {
		dev_kfree_skb_any(skb);
		return NULL;
	}

	for (i = 0; i < frags; i++) {
		struct page *page = alloc_page(GFP_ATOMIC);

		if (!page)
			goto partial_error;
		skb_fill_page_desc(skb, i, page, 0, PAGE_SIZE);

		mapping[i + 1] = ib_dma_map_page(priv->ca, skb_shinfo(skb)->frags[i].page,
						 0, PAGE_SIZE, DMA_FROM_DEVICE);
		if (unlikely(ib_dma_mapping_error(priv->ca, mapping[i + 1])))
			goto partial_error;
	}

	rx_ring[id].skb = skb;
	return skb;

partial_error:

	ib_dma_unmap_single(priv->ca, mapping[0], IPOIB_CM_HEAD_SIZE, DMA_FROM_DEVICE);

	for (; i > 0; --i)
		ib_dma_unmap_single(priv->ca, mapping[i], PAGE_SIZE, DMA_FROM_DEVICE);

	dev_kfree_skb_any(skb);
	return NULL;
}
int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
{
	struct ib_recv_wr recv_wr, *bad_recv_wr;
	struct svc_rdma_op_ctxt *ctxt;
	struct page *page;
	dma_addr_t pa;
	int sge_no;
	int buflen;
	int ret;

	ctxt = svc_rdma_get_context(xprt);
	buflen = 0;
	ctxt->direction = DMA_FROM_DEVICE;
	for (sge_no = 0; buflen < xprt->sc_max_req_size; sge_no++) {
		BUG_ON(sge_no >= xprt->sc_max_sge);
		page = svc_rdma_get_page();
		ctxt->pages[sge_no] = page;
		pa = ib_dma_map_page(xprt->sc_cm_id->device,
				     page, 0, PAGE_SIZE,
				     DMA_FROM_DEVICE);
		if (ib_dma_mapping_error(xprt->sc_cm_id->device, pa))
			goto err_put_ctxt;
		atomic_inc(&xprt->sc_dma_used);
		ctxt->sge[sge_no].addr = pa;
		ctxt->sge[sge_no].length = PAGE_SIZE;
		ctxt->sge[sge_no].lkey = xprt->sc_dma_lkey;
		ctxt->count = sge_no + 1;
		buflen += PAGE_SIZE;
	}
	recv_wr.next = NULL;
	recv_wr.sg_list = &ctxt->sge[0];
	recv_wr.num_sge = ctxt->count;
	recv_wr.wr_id = (u64)(unsigned long)ctxt;

	svc_xprt_get(&xprt->sc_xprt);
	ret = ib_post_recv(xprt->sc_qp, &recv_wr, &bad_recv_wr);
	if (ret) {
		svc_rdma_unmap_dma(ctxt);
		svc_rdma_put_context(ctxt, 1);
		svc_xprt_put(&xprt->sc_xprt);
	}
	return ret;

 err_put_ctxt:
	svc_rdma_unmap_dma(ctxt);
	svc_rdma_put_context(ctxt, 1);
	return -ENOMEM;
}
Esempio n. 6
0
static int rdma_set_ctxt_sge(struct svcxprt_rdma *xprt,
			     struct svc_rdma_op_ctxt *ctxt,
			     struct svc_rdma_fastreg_mr *frmr,
			     struct kvec *vec,
			     u64 *sgl_offset,
			     int count)
{
	int i;
	unsigned long off;

	ctxt->count = count;
	ctxt->direction = DMA_FROM_DEVICE;
	for (i = 0; i < count; i++) {
		ctxt->sge[i].length = 0; /* in case map fails */
		if (!frmr) {
			BUG_ON(0 == virt_to_page(vec[i].iov_base));
			off = (unsigned long)vec[i].iov_base & ~PAGE_MASK;
			ctxt->sge[i].addr =
				ib_dma_map_page(xprt->sc_cm_id->device,
						virt_to_page(vec[i].iov_base),
						off,
						vec[i].iov_len,
						DMA_FROM_DEVICE);
			if (ib_dma_mapping_error(xprt->sc_cm_id->device,
						 ctxt->sge[i].addr))
				return -EINVAL;
			ctxt->sge[i].lkey = xprt->sc_dma_lkey;
			atomic_inc(&xprt->sc_dma_used);
		} else {
			ctxt->sge[i].addr = (unsigned long)vec[i].iov_base;
			ctxt->sge[i].lkey = frmr->mr->lkey;
		}
		ctxt->sge[i].length = vec[i].iov_len;
		*sgl_offset = *sgl_offset + vec[i].iov_len;
	}
	return 0;
}
Esempio n. 7
0
/* Map a read-chunk-list to an XDR and fast register the page-list.
 *
 * Assumptions:
 * - chunk[0]	position points to pages[0] at an offset of 0
 * - pages[]	will be made physically contiguous by creating a one-off memory
 *		region using the fastreg verb.
 * - byte_count is # of bytes in read-chunk-list
 * - ch_count	is # of chunks in read-chunk-list
 *
 * Output:
 * - sge array pointing into pages[] array.
 * - chunk_sge array specifying sge index and count for each
 *   chunk in the read list
 */
static int fast_reg_read_chunks(struct svcxprt_rdma *xprt,
				struct svc_rqst *rqstp,
				struct svc_rdma_op_ctxt *head,
				struct rpcrdma_msg *rmsgp,
				struct svc_rdma_req_map *rpl_map,
				struct svc_rdma_req_map *chl_map,
				int ch_count,
				int byte_count)
{
	int page_no;
	int ch_no;
	u32 offset;
	struct rpcrdma_read_chunk *ch;
	struct svc_rdma_fastreg_mr *frmr;
	int ret = 0;

	frmr = svc_rdma_get_frmr(xprt);
	if (IS_ERR(frmr))
		return -ENOMEM;

	head->frmr = frmr;
	head->arg.head[0] = rqstp->rq_arg.head[0];
	head->arg.tail[0] = rqstp->rq_arg.tail[0];
	head->arg.pages = &head->pages[head->count];
	head->hdr_count = head->count; /* save count of hdr pages */
	head->arg.page_base = 0;
	head->arg.page_len = byte_count;
	head->arg.len = rqstp->rq_arg.len + byte_count;
	head->arg.buflen = rqstp->rq_arg.buflen + byte_count;

	/* Fast register the page list */
	frmr->kva = page_address(rqstp->rq_arg.pages[0]);
	frmr->direction = DMA_FROM_DEVICE;
	frmr->access_flags = (IB_ACCESS_LOCAL_WRITE|IB_ACCESS_REMOTE_WRITE);
	frmr->map_len = byte_count;
	frmr->page_list_len = PAGE_ALIGN(byte_count) >> PAGE_SHIFT;
	for (page_no = 0; page_no < frmr->page_list_len; page_no++) {
		frmr->page_list->page_list[page_no] =
			ib_dma_map_page(xprt->sc_cm_id->device,
					rqstp->rq_arg.pages[page_no], 0,
					PAGE_SIZE, DMA_FROM_DEVICE);
		if (ib_dma_mapping_error(xprt->sc_cm_id->device,
					 frmr->page_list->page_list[page_no]))
			goto fatal_err;
		atomic_inc(&xprt->sc_dma_used);
		head->arg.pages[page_no] = rqstp->rq_arg.pages[page_no];
	}
	head->count += page_no;

	/* rq_respages points one past arg pages */
	rqstp->rq_respages = &rqstp->rq_arg.pages[page_no];

	/* Create the reply and chunk maps */
	offset = 0;
	ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0];
	for (ch_no = 0; ch_no < ch_count; ch_no++) {
		rpl_map->sge[ch_no].iov_base = frmr->kva + offset;
		rpl_map->sge[ch_no].iov_len = ch->rc_target.rs_length;
		chl_map->ch[ch_no].count = 1;
		chl_map->ch[ch_no].start = ch_no;
		offset += ch->rc_target.rs_length;
		ch++;
	}

	ret = svc_rdma_fastreg(xprt, frmr);
	if (ret)
		goto fatal_err;

	return ch_no;

 fatal_err:
	printk("svcrdma: error fast registering xdr for xprt %p", xprt);
	svc_rdma_put_frmr(xprt, frmr);
	return -EIO;
}
/* Issue an RDMA_READ using an FRMR to map the data sink */
static int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
				struct svc_rqst *rqstp,
				struct svc_rdma_op_ctxt *head,
				int *page_no,
				u32 *page_offset,
				u32 rs_handle,
				u32 rs_length,
				u64 rs_offset,
				int last)
{
	struct ib_send_wr read_wr;
	struct ib_send_wr inv_wr;
	struct ib_send_wr fastreg_wr;
	u8 key;
	int pages_needed = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT;
	struct svc_rdma_op_ctxt *ctxt = svc_rdma_get_context(xprt);
	struct svc_rdma_fastreg_mr *frmr = svc_rdma_get_frmr(xprt);
	int ret, read, pno;
	u32 pg_off = *page_offset;
	u32 pg_no = *page_no;

	if (IS_ERR(frmr))
		return -ENOMEM;

	ctxt->direction = DMA_FROM_DEVICE;
	ctxt->frmr = frmr;
	pages_needed = min_t(int, pages_needed, xprt->sc_frmr_pg_list_len);
	read = min_t(int, pages_needed << PAGE_SHIFT, rs_length);

	frmr->kva = page_address(rqstp->rq_arg.pages[pg_no]);
	frmr->direction = DMA_FROM_DEVICE;
	frmr->access_flags = (IB_ACCESS_LOCAL_WRITE|IB_ACCESS_REMOTE_WRITE);
	frmr->map_len = pages_needed << PAGE_SHIFT;
	frmr->page_list_len = pages_needed;

	for (pno = 0; pno < pages_needed; pno++) {
		int len = min_t(int, rs_length, PAGE_SIZE - pg_off);

		head->arg.pages[pg_no] = rqstp->rq_arg.pages[pg_no];
		head->arg.page_len += len;
		head->arg.len += len;
		if (!pg_off)
			head->count++;
		rqstp->rq_respages = &rqstp->rq_arg.pages[pg_no+1];
#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,8,0))
		rqstp->rq_next_page = rqstp->rq_respages + 1;
#endif
		frmr->page_list->page_list[pno] =
			ib_dma_map_page(xprt->sc_cm_id->device,
					head->arg.pages[pg_no], 0,
					PAGE_SIZE, DMA_FROM_DEVICE);
		ret = ib_dma_mapping_error(xprt->sc_cm_id->device,
					   frmr->page_list->page_list[pno]);
		if (ret)
			goto err;
		atomic_inc(&xprt->sc_dma_used);

		/* adjust offset and wrap to next page if needed */
		pg_off += len;
		if (pg_off == PAGE_SIZE) {
			pg_off = 0;
			pg_no++;
		}
		rs_length -= len;
	}

	if (last && rs_length == 0)
		set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
	else
		clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);

	/* Bump the key */
	key = (u8)(frmr->mr->lkey & 0x000000FF);
	ib_update_fast_reg_key(frmr->mr, ++key);

	ctxt->sge[0].addr = (unsigned long)frmr->kva + *page_offset;
	ctxt->sge[0].lkey = frmr->mr->lkey;
	ctxt->sge[0].length = read;
	ctxt->count = 1;
	ctxt->read_hdr = head;

	/* Prepare FASTREG WR */
	memset(&fastreg_wr, 0, sizeof(fastreg_wr));
	fastreg_wr.opcode = IB_WR_FAST_REG_MR;
	fastreg_wr.send_flags = IB_SEND_SIGNALED;
	fastreg_wr.wr.fast_reg.iova_start = (unsigned long)frmr->kva;
	fastreg_wr.wr.fast_reg.page_list = frmr->page_list;
	fastreg_wr.wr.fast_reg.page_list_len = frmr->page_list_len;
	fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
	fastreg_wr.wr.fast_reg.length = frmr->map_len;
	fastreg_wr.wr.fast_reg.access_flags = frmr->access_flags;
	fastreg_wr.wr.fast_reg.rkey = frmr->mr->lkey;
	fastreg_wr.next = &read_wr;

	/* Prepare RDMA_READ */
	memset(&read_wr, 0, sizeof(read_wr));
	read_wr.send_flags = IB_SEND_SIGNALED;
	read_wr.wr.rdma.rkey = rs_handle;
	read_wr.wr.rdma.remote_addr = rs_offset;
	read_wr.sg_list = ctxt->sge;
	read_wr.num_sge = 1;
	if (xprt->sc_dev_caps & SVCRDMA_DEVCAP_READ_W_INV) {
		read_wr.opcode = IB_WR_RDMA_READ_WITH_INV;
		read_wr.wr_id = (unsigned long)ctxt;
		read_wr.ex.invalidate_rkey = ctxt->frmr->mr->lkey;
	} else {
		read_wr.opcode = IB_WR_RDMA_READ;
		read_wr.next = &inv_wr;
		/* Prepare invalidate */
		memset(&inv_wr, 0, sizeof(inv_wr));
		inv_wr.wr_id = (unsigned long)ctxt;
		inv_wr.opcode = IB_WR_LOCAL_INV;
		inv_wr.send_flags = IB_SEND_SIGNALED | IB_SEND_FENCE;
		inv_wr.ex.invalidate_rkey = frmr->mr->lkey;
	}
	ctxt->wr_op = read_wr.opcode;

	/* Post the chain */
	ret = svc_rdma_send(xprt, &fastreg_wr);
	if (ret) {
		pr_err("svcrdma: Error %d posting RDMA_READ\n", ret);
		set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
		goto err;
	}

	/* return current location in page array */
	*page_no = pg_no;
	*page_offset = pg_off;
	ret = read;
	atomic_inc(&rdma_stat_read);
	return ret;
 err:
	svc_rdma_unmap_dma(ctxt);
	svc_rdma_put_context(ctxt, 0);
	svc_rdma_put_frmr(xprt, frmr);
	return ret;
}
/* Issue an RDMA_READ using the local lkey to map the data sink */
static int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt,
			       struct svc_rqst *rqstp,
			       struct svc_rdma_op_ctxt *head,
			       int *page_no,
			       u32 *page_offset,
			       u32 rs_handle,
			       u32 rs_length,
			       u64 rs_offset,
			       int last)
{
	struct ib_send_wr read_wr;
	int pages_needed = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT;
	struct svc_rdma_op_ctxt *ctxt = svc_rdma_get_context(xprt);
	int ret, read, pno;
	u32 pg_off = *page_offset;
	u32 pg_no = *page_no;

	ctxt->direction = DMA_FROM_DEVICE;
	ctxt->read_hdr = head;
	pages_needed =
		min_t(int, pages_needed, rdma_read_max_sge(xprt, pages_needed));
	read = min_t(int, pages_needed << PAGE_SHIFT, rs_length);

	for (pno = 0; pno < pages_needed; pno++) {
		int len = min_t(int, rs_length, PAGE_SIZE - pg_off);

		head->arg.pages[pg_no] = rqstp->rq_arg.pages[pg_no];
		head->arg.page_len += len;
		head->arg.len += len;
		if (!pg_off)
			head->count++;
		rqstp->rq_respages = &rqstp->rq_arg.pages[pg_no+1];
#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,8,0))
		rqstp->rq_next_page = rqstp->rq_respages + 1;
#endif
		ctxt->sge[pno].addr =
			ib_dma_map_page(xprt->sc_cm_id->device,
					head->arg.pages[pg_no], pg_off,
					PAGE_SIZE - pg_off,
					DMA_FROM_DEVICE);
		ret = ib_dma_mapping_error(xprt->sc_cm_id->device,
					   ctxt->sge[pno].addr);
		if (ret)
			goto err;
		atomic_inc(&xprt->sc_dma_used);

		/* The lkey here is either a local dma lkey or a dma_mr lkey */
		ctxt->sge[pno].lkey = xprt->sc_dma_lkey;
		ctxt->sge[pno].length = len;
		ctxt->count++;

		/* adjust offset and wrap to next page if needed */
		pg_off += len;
		if (pg_off == PAGE_SIZE) {
			pg_off = 0;
			pg_no++;
		}
		rs_length -= len;
	}

	if (last && rs_length == 0)
		set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
	else
		clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);

	memset(&read_wr, 0, sizeof(read_wr));
	read_wr.wr_id = (unsigned long)ctxt;
	read_wr.opcode = IB_WR_RDMA_READ;
	ctxt->wr_op = read_wr.opcode;
	read_wr.send_flags = IB_SEND_SIGNALED;
	read_wr.wr.rdma.rkey = rs_handle;
	read_wr.wr.rdma.remote_addr = rs_offset;
	read_wr.sg_list = ctxt->sge;
	read_wr.num_sge = pages_needed;

	ret = svc_rdma_send(xprt, &read_wr);
	if (ret) {
		pr_err("svcrdma: Error %d posting RDMA_READ\n", ret);
		set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
		goto err;
	}

	/* return current location in page array */
	*page_no = pg_no;
	*page_offset = pg_off;
	ret = read;
	atomic_inc(&rdma_stat_read);
	return ret;
 err:
	svc_rdma_unmap_dma(ctxt);
	svc_rdma_put_context(ctxt, 0);
	return ret;
}
Esempio n. 10
0
void sdp_post_send(struct sdp_sock *ssk, struct sk_buff *skb)
{
	struct sdp_buf *tx_req;
	struct sdp_bsdh *h = (struct sdp_bsdh *)skb_transport_header(skb);
	unsigned long mseq = ring_head(ssk->tx_ring);
	int i, rc, frags;
	u64 addr;
	struct ib_device *dev;
	struct ib_send_wr *bad_wr;

	struct ib_sge ibsge[SDP_MAX_SEND_SGES];
	struct ib_sge *sge = ibsge;
	struct ib_send_wr tx_wr = { NULL };
	u32 send_flags = IB_SEND_SIGNALED;

	SDPSTATS_COUNTER_MID_INC(post_send, h->mid);
	SDPSTATS_HIST(send_size, skb->len);

	if (!ssk->qp_active)
		goto err;

	ssk->tx_packets++;

	if (h->mid != SDP_MID_SRCAVAIL &&
			h->mid != SDP_MID_DATA &&
			h->mid != SDP_MID_SRCAVAIL_CANCEL) {
		struct sock *sk = sk_ssk(ssk);

		sk->sk_wmem_queued += skb->truesize;
		sk_mem_charge(sk, skb->truesize);
	}

	if (unlikely(h->mid == SDP_MID_SRCAVAIL)) {
		struct tx_srcavail_state *tx_sa = TX_SRCAVAIL_STATE(skb);
		if (ssk->tx_sa != tx_sa) {
			sdp_dbg_data(sk_ssk(ssk), "SrcAvail cancelled "
					"before being sent!\n");
			SDP_WARN_ON(1);
			sk_wmem_free_skb(sk_ssk(ssk), skb);
			return;
		}
		TX_SRCAVAIL_STATE(skb)->mseq = mseq;
	}

	if (unlikely(SDP_SKB_CB(skb)->flags & TCPHDR_URG))
		h->flags = SDP_OOB_PRES | SDP_OOB_PEND;
	else
		h->flags = 0;

	h->bufs = htons(rx_ring_posted(ssk));
	h->len = htonl(skb->len);
	h->mseq = htonl(mseq);
	h->mseq_ack = htonl(mseq_ack(ssk));

	sdp_prf(sk_ssk(ssk), skb, "TX: %s bufs: %d mseq:%ld ack:%d c: %d",
			mid2str(h->mid), rx_ring_posted(ssk), mseq,
			ntohl(h->mseq_ack), tx_credits(ssk));

	SDP_DUMP_PACKET(sk_ssk(ssk), "TX", skb, h);

	tx_req = &ssk->tx_ring.buffer[mseq & (SDP_TX_SIZE - 1)];
	tx_req->skb = skb;
	dev = ssk->ib_device;

	if (skb->len <= ssk->inline_thresh && !skb_shinfo(skb)->nr_frags) {
		SDPSTATS_COUNTER_INC(inline_sends);
		sge->addr = (u64) skb->data;
		sge->length = skb->len;
		sge->lkey = 0;
		frags = 0;
		tx_req->mapping[0] = 0; /* Nothing to be cleaned up by sdp_cleanup_sdp_buf() */
		send_flags |= IB_SEND_INLINE;
	} else {
		addr = ib_dma_map_single(dev, skb->data, skb->len - skb->data_len,
				DMA_TO_DEVICE);
		tx_req->mapping[0] = addr;

		/* TODO: proper error handling */
		BUG_ON(ib_dma_mapping_error(dev, addr));

		sge->addr = addr;
		sge->length = skb->len - skb->data_len;
		sge->lkey = ssk->sdp_dev->mr->lkey;
		frags = skb_shinfo(skb)->nr_frags;
		for (i = 0; i < frags; ++i) {
			++sge;
			addr = ib_dma_map_page(dev,
					skb_shinfo(skb)->frags[i].page.p,
					skb_shinfo(skb)->frags[i].page_offset,
					skb_shinfo(skb)->frags[i].size,
					DMA_TO_DEVICE);
			BUG_ON(ib_dma_mapping_error(dev, addr));
			tx_req->mapping[i + 1] = addr;
			sge->addr = addr;
			sge->length = skb_shinfo(skb)->frags[i].size;
			sge->lkey = ssk->sdp_dev->mr->lkey;
		}
	}

	tx_wr.next = NULL;
	tx_wr.wr_id = ring_head(ssk->tx_ring) | SDP_OP_SEND;
	tx_wr.sg_list = ibsge;
	tx_wr.num_sge = frags + 1;
	tx_wr.opcode = IB_WR_SEND;
	tx_wr.send_flags = send_flags;
	if (unlikely(SDP_SKB_CB(skb)->flags & TCPHDR_URG))
		tx_wr.send_flags |= IB_SEND_SOLICITED;

	rc = ib_post_send(ssk->qp, &tx_wr, &bad_wr);
	if (unlikely(rc)) {
		sdp_dbg(sk_ssk(ssk),
				"ib_post_send failed with status %d.\n", rc);

		sdp_cleanup_sdp_buf(ssk, tx_req, skb->len - skb->data_len, DMA_TO_DEVICE);

		sdp_set_error(sk_ssk(ssk), -ECONNRESET);

		goto err;
	}

	atomic_inc(&ssk->tx_ring.head);
	atomic_dec(&ssk->tx_ring.credits);
	atomic_set(&ssk->remote_credits, rx_ring_posted(ssk));

	return;

err:
	sk_wmem_free_skb(sk_ssk(ssk), skb);
}
Esempio n. 11
0
/* Encode an XDR as an array of IB SGE
 *
 * Assumptions:
 * - head[0] is physically contiguous.
 * - tail[0] is physically contiguous.
 * - pages[] is not physically or virtually contigous and consists of
 *   PAGE_SIZE elements.
 *
 * Output:
 * SGE[0]              reserved for RCPRDMA header
 * SGE[1]              data from xdr->head[]
 * SGE[2..sge_count-2] data from xdr->pages[]
 * SGE[sge_count-1]    data from xdr->tail.
 *
 * The max SGE we need is the length of the XDR / pagesize + one for
 * head + one for tail + one for RPCRDMA header. Since RPCSVC_MAXPAGES
 * reserves a page for both the request and the reply header, and this
 * array is only concerned with the reply we are assured that we have
 * on extra page for the RPCRMDA header.
 */
static int fast_reg_xdr(struct svcxprt_rdma *xprt,
		 struct xdr_buf *xdr,
		 struct svc_rdma_req_map *vec)
{
	int sge_no;
	u32 sge_bytes;
	u32 page_bytes;
	u32 page_off;
	int page_no = 0;
	u8 *frva;
	struct svc_rdma_fastreg_mr *frmr;

	frmr = svc_rdma_get_frmr(xprt);
	if (IS_ERR(frmr))
		return -ENOMEM;
	vec->frmr = frmr;

	/* Skip the RPCRDMA header */
	sge_no = 1;

	/* Map the head. */
	frva = (void *)((unsigned long)(xdr->head[0].iov_base) & PAGE_MASK);
	vec->sge[sge_no].iov_base = xdr->head[0].iov_base;
	vec->sge[sge_no].iov_len = xdr->head[0].iov_len;
	vec->count = 2;
	sge_no++;

	/* Build the FRMR */
	frmr->kva = frva;
	frmr->direction = DMA_TO_DEVICE;
	frmr->access_flags = 0;
	frmr->map_len = PAGE_SIZE;
	frmr->page_list_len = 1;
	frmr->page_list->page_list[page_no] =
		ib_dma_map_single(xprt->sc_cm_id->device,
				  (void *)xdr->head[0].iov_base,
				  PAGE_SIZE, DMA_TO_DEVICE);
	if (ib_dma_mapping_error(xprt->sc_cm_id->device,
				 frmr->page_list->page_list[page_no]))
		goto fatal_err;
	atomic_inc(&xprt->sc_dma_used);

	page_off = xdr->page_base;
	page_bytes = xdr->page_len + page_off;
	if (!page_bytes)
		goto encode_tail;

	/* Map the pages */
	vec->sge[sge_no].iov_base = frva + frmr->map_len + page_off;
	vec->sge[sge_no].iov_len = page_bytes;
	sge_no++;
	while (page_bytes) {
		struct page *page;

		page = xdr->pages[page_no++];
		sge_bytes = min_t(u32, page_bytes, (PAGE_SIZE - page_off));
		page_bytes -= sge_bytes;

		frmr->page_list->page_list[page_no] =
			ib_dma_map_page(xprt->sc_cm_id->device, page, 0,
					  PAGE_SIZE, DMA_TO_DEVICE);
		if (ib_dma_mapping_error(xprt->sc_cm_id->device,
					 frmr->page_list->page_list[page_no]))
			goto fatal_err;

		atomic_inc(&xprt->sc_dma_used);
		page_off = 0; /* reset for next time through loop */
		frmr->map_len += PAGE_SIZE;
		frmr->page_list_len++;
	}
	vec->count++;

 encode_tail:
	/* Map tail */
	if (0 == xdr->tail[0].iov_len)
		goto done;

	vec->count++;
	vec->sge[sge_no].iov_len = xdr->tail[0].iov_len;

	if (((unsigned long)xdr->tail[0].iov_base & PAGE_MASK) ==
	    ((unsigned long)xdr->head[0].iov_base & PAGE_MASK)) {
		/*
		 * If head and tail use the same page, we don't need
		 * to map it again.
		 */
		vec->sge[sge_no].iov_base = xdr->tail[0].iov_base;
	} else {
		void *va;

		/* Map another page for the tail */
		page_off = (unsigned long)xdr->tail[0].iov_base & ~PAGE_MASK;
		va = (void *)((unsigned long)xdr->tail[0].iov_base & PAGE_MASK);
		vec->sge[sge_no].iov_base = frva + frmr->map_len + page_off;

		frmr->page_list->page_list[page_no] =
			ib_dma_map_single(xprt->sc_cm_id->device, va, PAGE_SIZE,
					  DMA_TO_DEVICE);
		if (ib_dma_mapping_error(xprt->sc_cm_id->device,
					 frmr->page_list->page_list[page_no]))
			goto fatal_err;
		atomic_inc(&xprt->sc_dma_used);
		frmr->map_len += PAGE_SIZE;
		frmr->page_list_len++;
	}

 done:
	if (svc_rdma_fastreg(xprt, frmr))
		goto fatal_err;

	return 0;

 fatal_err:
	printk("svcrdma: Error fast registering memory for xprt %p\n", xprt);
	svc_rdma_put_frmr(xprt, frmr);
	return -EIO;
}
Esempio n. 12
0
/* This function prepares the portion of the RPCRDMA message to be
 * sent in the RDMA_SEND. This function is called after data sent via
 * RDMA has already been transmitted. There are three cases:
 * - The RPCRDMA header, RPC header, and payload are all sent in a
 *   single RDMA_SEND. This is the "inline" case.
 * - The RPCRDMA header and some portion of the RPC header and data
 *   are sent via this RDMA_SEND and another portion of the data is
 *   sent via RDMA.
 * - The RPCRDMA header [NOMSG] is sent in this RDMA_SEND and the RPC
 *   header and data are all transmitted via RDMA.
 * In all three cases, this function prepares the RPCRDMA header in
 * sge[0], the 'type' parameter indicates the type to place in the
 * RPCRDMA header, and the 'byte_count' field indicates how much of
 * the XDR to include in this RDMA_SEND.
 */
static int send_reply(struct svcxprt_rdma *rdma,
		      struct svc_rqst *rqstp,
		      struct page *page,
		      struct rpcrdma_msg *rdma_resp,
		      struct svc_rdma_op_ctxt *ctxt,
		      struct svc_rdma_req_map *vec,
		      int byte_count)
{
	struct ib_send_wr send_wr;
	struct ib_send_wr inv_wr;
	int sge_no;
	int sge_bytes;
	int page_no;
	int ret;

	/* Post a recv buffer to handle another request. */
	ret = svc_rdma_post_recv(rdma);
	if (ret) {
		printk(KERN_INFO
		       "svcrdma: could not post a receive buffer, err=%d."
		       "Closing transport %p.\n", ret, rdma);
		set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
		svc_rdma_put_context(ctxt, 0);
		return -ENOTCONN;
	}

	/* Prepare the context */
	ctxt->pages[0] = page;
	ctxt->count = 1;
	ctxt->frmr = vec->frmr;
	if (vec->frmr)
		set_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags);
	else
		clear_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags);

	/* Prepare the SGE for the RPCRDMA Header */
	ctxt->sge[0].addr =
		ib_dma_map_page(rdma->sc_cm_id->device,
				page, 0, PAGE_SIZE, DMA_TO_DEVICE);
	if (ib_dma_mapping_error(rdma->sc_cm_id->device, ctxt->sge[0].addr))
		goto err;
	atomic_inc(&rdma->sc_dma_used);

	ctxt->direction = DMA_TO_DEVICE;

	ctxt->sge[0].length = svc_rdma_xdr_get_reply_hdr_len(rdma_resp);
	ctxt->sge[0].lkey = rdma->sc_dma_lkey;

	/* Determine how many of our SGE are to be transmitted */
	for (sge_no = 1; byte_count && sge_no < vec->count; sge_no++) {
		sge_bytes = min_t(size_t, vec->sge[sge_no].iov_len, byte_count);
		byte_count -= sge_bytes;
		if (!vec->frmr) {
			ctxt->sge[sge_no].addr =
				ib_dma_map_single(rdma->sc_cm_id->device,
						  vec->sge[sge_no].iov_base,
						  sge_bytes, DMA_TO_DEVICE);
			if (ib_dma_mapping_error(rdma->sc_cm_id->device,
						 ctxt->sge[sge_no].addr))
				goto err;
			atomic_inc(&rdma->sc_dma_used);
			ctxt->sge[sge_no].lkey = rdma->sc_dma_lkey;
		} else {
			ctxt->sge[sge_no].addr = (unsigned long)
				vec->sge[sge_no].iov_base;
			ctxt->sge[sge_no].lkey = vec->frmr->mr->lkey;
		}
		ctxt->sge[sge_no].length = sge_bytes;
	}
	BUG_ON(byte_count != 0);

	/* Save all respages in the ctxt and remove them from the
	 * respages array. They are our pages until the I/O
	 * completes.
	 */
	for (page_no = 0; page_no < rqstp->rq_resused; page_no++) {
		ctxt->pages[page_no+1] = rqstp->rq_respages[page_no];
		ctxt->count++;
		rqstp->rq_respages[page_no] = NULL;
		/*
		 * If there are more pages than SGE, terminate SGE
		 * list so that svc_rdma_unmap_dma doesn't attempt to
		 * unmap garbage.
		 */
		if (page_no+1 >= sge_no)
			ctxt->sge[page_no+1].length = 0;
	}
	BUG_ON(sge_no > rdma->sc_max_sge);
	memset(&send_wr, 0, sizeof send_wr);
	ctxt->wr_op = IB_WR_SEND;
	send_wr.wr_id = (unsigned long)ctxt;
	send_wr.sg_list = ctxt->sge;
	send_wr.num_sge = sge_no;
	send_wr.opcode = IB_WR_SEND;
	send_wr.send_flags =  IB_SEND_SIGNALED;
	if (vec->frmr) {
		/* Prepare INVALIDATE WR */
		memset(&inv_wr, 0, sizeof inv_wr);
		inv_wr.opcode = IB_WR_LOCAL_INV;
		inv_wr.send_flags = IB_SEND_SIGNALED;
		inv_wr.ex.invalidate_rkey =
			vec->frmr->mr->lkey;
		send_wr.next = &inv_wr;
	}

	ret = svc_rdma_send(rdma, &send_wr);
	if (ret)
		goto err;

	return 0;

 err:
	svc_rdma_put_frmr(rdma, vec->frmr);
	svc_rdma_put_context(ctxt, 1);
	return -EIO;
}