static struct sk_buff *ipoib_cm_alloc_rx_skb(struct net_device *dev,
					     struct ipoib_cm_rx_buf *rx_ring,
					     int id, int frags,
					     u64 mapping[IPOIB_CM_RX_SG])
{
	struct ipoib_dev_priv *priv = netdev_priv(dev);
	struct sk_buff *skb;
	int i;

	skb = dev_alloc_skb(IPOIB_CM_HEAD_SIZE + 12);
	if (unlikely(!skb))
		return NULL;

	/*
	 * IPoIB adds a 4 byte header. So we need 12 more bytes to align the
	 * IP header to a multiple of 16.
	 */
	skb_reserve(skb, 12);

	mapping[0] = ib_dma_map_single(priv->ca, skb->data, IPOIB_CM_HEAD_SIZE,
				       DMA_FROM_DEVICE);
	if (unlikely(ib_dma_mapping_error(priv->ca, mapping[0]))) {
		dev_kfree_skb_any(skb);
		return NULL;
	}

	for (i = 0; i < frags; i++) {
		struct page *page = alloc_page(GFP_ATOMIC);

		if (!page)
			goto partial_error;
		skb_fill_page_desc(skb, i, page, 0, PAGE_SIZE);

		mapping[i + 1] = ib_dma_map_page(priv->ca, skb_shinfo(skb)->frags[i].page,
						 0, PAGE_SIZE, DMA_FROM_DEVICE);
		if (unlikely(ib_dma_mapping_error(priv->ca, mapping[i + 1])))
			goto partial_error;
	}

	rx_ring[id].skb = skb;
	return skb;

partial_error:

	ib_dma_unmap_single(priv->ca, mapping[0], IPOIB_CM_HEAD_SIZE, DMA_FROM_DEVICE);

	for (; i > 0; --i)
		ib_dma_unmap_single(priv->ca, mapping[i], PAGE_SIZE, DMA_FROM_DEVICE);

	dev_kfree_skb_any(skb);
	return NULL;
}
int iser_alloc_rx_descriptors(struct iser_conn *iser_conn,
			      struct iscsi_session *session)
{
	int i, j;
	u64 dma_addr;
	struct iser_rx_desc *rx_desc;
	struct ib_sge       *rx_sg;
	struct ib_conn *ib_conn = &iser_conn->ib_conn;
	struct iser_device *device = ib_conn->device;

	iser_conn->qp_max_recv_dtos = session->cmds_max;
	iser_conn->qp_max_recv_dtos_mask = session->cmds_max - 1; /* cmds_max is 2^N */
	iser_conn->min_posted_rx = iser_conn->qp_max_recv_dtos >> 2;

	if (device->iser_alloc_rdma_reg_res(ib_conn, session->scsi_cmds_max))
		goto create_rdma_reg_res_failed;

	if (iser_alloc_login_buf(iser_conn))
		goto alloc_login_buf_fail;

	iser_conn->num_rx_descs = session->cmds_max;
	iser_conn->rx_descs = kmalloc(iser_conn->num_rx_descs *
				sizeof(struct iser_rx_desc), GFP_KERNEL);
	if (!iser_conn->rx_descs)
		goto rx_desc_alloc_fail;

	rx_desc = iser_conn->rx_descs;

	for (i = 0; i < iser_conn->qp_max_recv_dtos; i++, rx_desc++)  {
		dma_addr = ib_dma_map_single(device->ib_device, (void *)rx_desc,
					ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
		if (ib_dma_mapping_error(device->ib_device, dma_addr))
			goto rx_desc_dma_map_failed;

		rx_desc->dma_addr = dma_addr;

		rx_sg = &rx_desc->rx_sg;
		rx_sg->addr   = rx_desc->dma_addr;
		rx_sg->length = ISER_RX_PAYLOAD_SIZE;
		rx_sg->lkey   = device->mr->lkey;
	}

	iser_conn->rx_desc_head = 0;
	return 0;

rx_desc_dma_map_failed:
	rx_desc = iser_conn->rx_descs;
	for (j = 0; j < i; j++, rx_desc++)
		ib_dma_unmap_single(device->ib_device, rx_desc->dma_addr,
				    ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
	kfree(iser_conn->rx_descs);
	iser_conn->rx_descs = NULL;
rx_desc_alloc_fail:
	iser_free_login_buf(iser_conn);
alloc_login_buf_fail:
	device->iser_free_rdma_reg_res(ib_conn);
create_rdma_reg_res_failed:
	iser_err("failed allocating rx descriptors / data buffers\n");
	return -ENOMEM;
}
static int rdma_set_ctxt_sge(struct svcxprt_rdma *xprt,
			     struct svc_rdma_op_ctxt *ctxt,
			     struct svc_rdma_fastreg_mr *frmr,
			     struct kvec *vec,
			     u64 *sgl_offset,
			     int count)
{
	int i;

	ctxt->count = count;
	ctxt->direction = DMA_FROM_DEVICE;
	for (i = 0; i < count; i++) {
		ctxt->sge[i].length = 0; /* in case map fails */
		if (!frmr) {
			ctxt->sge[i].addr =
				ib_dma_map_single(xprt->sc_cm_id->device,
						  vec[i].iov_base,
						  vec[i].iov_len,
						  DMA_FROM_DEVICE);
			if (ib_dma_mapping_error(xprt->sc_cm_id->device,
						 ctxt->sge[i].addr))
				return -EINVAL;
			ctxt->sge[i].lkey = xprt->sc_dma_lkey;
			atomic_inc(&xprt->sc_dma_used);
		} else {
			ctxt->sge[i].addr = (unsigned long)vec[i].iov_base;
			ctxt->sge[i].lkey = frmr->mr->lkey;
		}
		ctxt->sge[i].length = vec[i].iov_len;
		*sgl_offset = *sgl_offset + vec[i].iov_len;
	}
	return 0;
}
struct sk_buff *vnic_alloc_rx_skb(struct vnic_rx_ring *ring, int buf_ind,
				  gfp_t gfp_flag)
{
	struct ib_device *ca = ring->port->dev->ca;
	struct sk_buff *skb;
	u64 mapping;
	int buf_size = VNIC_BUF_SIZE(ring->port);

	skb = alloc_skb(buf_size, gfp_flag);
	if (!skb) {
		vnic_dbg_data(ring->port->name,
			      "alloc_skb for size %d failed\n", buf_size);
		goto err_alloc;
	}

	mapping = ib_dma_map_single(ca, skb->data, buf_size, DMA_FROM_DEVICE);
	if (unlikely(ib_dma_mapping_error(ca, mapping))) {
		vnic_dbg_data(ring->port->name,
			      "ib_dma_map_single len %d failed\n", buf_size);
		goto err_map;
	}

	ring->rx_info[buf_ind].skb = skb;
	ring->rx_info[buf_ind].dma_addr[0] = mapping;

	return skb;

err_map:
	dev_kfree_skb_any(skb);
err_alloc:
	return NULL;
}
Beispiel #5
0
static int
post_recv(struct p9_client *client, struct p9_rdma_context *c)
{
	struct p9_trans_rdma *rdma = client->trans;
	struct ib_recv_wr wr;
	struct ib_sge sge;

	c->busa = ib_dma_map_single(rdma->cm_id->device,
				    c->rc.sdata, client->msize,
				    DMA_FROM_DEVICE);
	if (ib_dma_mapping_error(rdma->cm_id->device, c->busa))
		goto error;

	c->cqe.done = recv_done;

	sge.addr = c->busa;
	sge.length = client->msize;
	sge.lkey = rdma->pd->local_dma_lkey;

	wr.next = NULL;
	wr.wr_cqe = &c->cqe;
	wr.sg_list = &sge;
	wr.num_sge = 1;
	return ib_post_recv(rdma->qp, &wr, NULL);

 error:
	p9_debug(P9_DEBUG_ERROR, "EIO\n");
	return -EIO;
}
Beispiel #6
0
static int
post_recv(struct p9_client *client, struct p9_rdma_context *c)
{
	struct p9_trans_rdma *rdma = client->trans;
	struct ib_recv_wr wr, *bad_wr;
	struct ib_sge sge;

	c->busa = ib_dma_map_single(rdma->cm_id->device,
				    c->rc->sdata, client->msize,
				    DMA_FROM_DEVICE);
	if (ib_dma_mapping_error(rdma->cm_id->device, c->busa))
		goto error;

	sge.addr = c->busa;
	sge.length = client->msize;
	sge.lkey = rdma->lkey;

	wr.next = NULL;
	c->wc_op = IB_WC_RECV;
	wr.wr_id = (unsigned long) c;
	wr.sg_list = &sge;
	wr.num_sge = 1;
	return ib_post_recv(rdma->qp, &wr, &bad_wr);

 error:
	p9_debug(P9_DEBUG_ERROR, "EIO\n");
	return -EIO;
}
static int iser_alloc_login_buf(struct iser_conn *iser_conn)
{
	struct iser_device *device = iser_conn->ib_conn.device;
	int			req_err, resp_err;

	BUG_ON(device == NULL);

	iser_conn->login_buf = kmalloc(ISCSI_DEF_MAX_RECV_SEG_LEN +
				     ISER_RX_LOGIN_SIZE, GFP_KERNEL);
	if (!iser_conn->login_buf)
		goto out_err;

	iser_conn->login_req_buf  = iser_conn->login_buf;
	iser_conn->login_resp_buf = iser_conn->login_buf +
						ISCSI_DEF_MAX_RECV_SEG_LEN;

	iser_conn->login_req_dma = ib_dma_map_single(device->ib_device,
						     iser_conn->login_req_buf,
						     ISCSI_DEF_MAX_RECV_SEG_LEN,
						     DMA_TO_DEVICE);

	iser_conn->login_resp_dma = ib_dma_map_single(device->ib_device,
						      iser_conn->login_resp_buf,
						      ISER_RX_LOGIN_SIZE,
						      DMA_FROM_DEVICE);

	req_err  = ib_dma_mapping_error(device->ib_device,
					iser_conn->login_req_dma);
	resp_err = ib_dma_mapping_error(device->ib_device,
					iser_conn->login_resp_dma);

	if (req_err || resp_err) {
		if (req_err)
			iser_conn->login_req_dma = 0;
		if (resp_err)
			iser_conn->login_resp_dma = 0;
		goto free_login_buf;
	}
	return 0;

free_login_buf:
	iser_free_login_buf(iser_conn);

out_err:
	iser_err("unable to alloc or map login buf\n");
	return -ENOMEM;
}
/* Send a backwards direction RPC call.
 *
 * Caller holds the connection's mutex and has already marshaled
 * the RPC/RDMA request.
 *
 * This is similar to svc_rdma_reply, but takes an rpc_rqst
 * instead, does not support chunks, and avoids blocking memory
 * allocation.
 *
 * XXX: There is still an opportunity to block in svc_rdma_send()
 * if there are no SQ entries to post the Send. This may occur if
 * the adapter has a small maximum SQ depth.
 */
static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma,
			      struct rpc_rqst *rqst)
{
	struct xdr_buf *sndbuf = &rqst->rq_snd_buf;
	struct svc_rdma_op_ctxt *ctxt;
	struct svc_rdma_req_map *vec;
	struct ib_send_wr send_wr;
	int ret;

	vec = svc_rdma_get_req_map(rdma);
	ret = svc_rdma_map_xdr(rdma, sndbuf, vec, false);
	if (ret)
		goto out_err;

	ret = svc_rdma_repost_recv(rdma, GFP_NOIO);
	if (ret)
		goto out_err;

	ctxt = svc_rdma_get_context(rdma);
	ctxt->pages[0] = virt_to_page(rqst->rq_buffer);
	ctxt->count = 1;

	ctxt->direction = DMA_TO_DEVICE;
	ctxt->sge[0].lkey = rdma->sc_pd->local_dma_lkey;
	ctxt->sge[0].length = sndbuf->len;
	ctxt->sge[0].addr =
	    ib_dma_map_page(rdma->sc_cm_id->device, ctxt->pages[0], 0,
			    sndbuf->len, DMA_TO_DEVICE);
	if (ib_dma_mapping_error(rdma->sc_cm_id->device, ctxt->sge[0].addr)) {
		ret = -EIO;
		goto out_unmap;
	}
	svc_rdma_count_mappings(rdma, ctxt);

	memset(&send_wr, 0, sizeof(send_wr));
	ctxt->cqe.done = svc_rdma_wc_send;
	send_wr.wr_cqe = &ctxt->cqe;
	send_wr.sg_list = ctxt->sge;
	send_wr.num_sge = 1;
	send_wr.opcode = IB_WR_SEND;
	send_wr.send_flags = IB_SEND_SIGNALED;

	ret = svc_rdma_send(rdma, &send_wr);
	if (ret) {
		ret = -EIO;
		goto out_unmap;
	}

out_err:
	svc_rdma_put_req_map(rdma, vec);
	dprintk("svcrdma: %s returns %d\n", __func__, ret);
	return ret;

out_unmap:
	svc_rdma_unmap_dma(ctxt);
	svc_rdma_put_context(ctxt, 1);
	goto out_err;
}
/*
 * allocate a single fragment on a single ring entry and map it
 * to HW address.
 */
static int vnic_alloc_frag(struct vnic_rx_ring *ring,
			   struct vnic_frag_data *frags_data, int i)
{
	struct vnic_frag_info *frag_info = &ring->frag_info[i];
	struct vnic_rx_alloc *page_alloc = &ring->page_alloc[i];
	struct skb_frag_struct *skb_frags = &frags_data->frags[i];
	struct skb_frag_struct skbf = *skb_frags;
	struct page *page;	
	struct ib_device *ib_device = ring->port->dev->ca;
	u64 dma;
	int decision;

	if (vnic_rx_linear)
		return 0;

	if (page_alloc->offset >= frag_info->last_offset) {
		decision = 0;
		/* Allocate new page */
		page = alloc_pages(GFP_ATOMIC | __GFP_COMP, VNIC_ALLOC_ORDER);
		if (!page) {
			/*frags_data->dma_addr[i] = NULL;
			   ring->rx_info[wr_id].info = VNIC_FRAG_ALLOC_FAIL;
			   ring->need_refill = 1; */
			return -ENOMEM;
		}
		skbf.page.p = page_alloc->page;
		skbf.page_offset = page_alloc->offset;
	} else {
		decision = 1;
		page = page_alloc->page;
		get_page(page);
		skbf.page.p = page;
		skbf.page_offset = page_alloc->offset;
	}

	skbf.size = frag_info->frag_size;
	dma = ib_dma_map_single(ib_device, page_address(skbf.page.p) +
			     skbf.page_offset, frag_info->frag_size,
			     PCI_DMA_FROMDEVICE);
	if (unlikely(ib_dma_mapping_error(ib_device, dma))) {
		vnic_dbg_data(ring->port->name,
			      "ib_dma_map_single len %d failed\n",
			      frag_info->frag_size);
		put_page(page);
		return -ENOMEM;
	}

	if (!decision) {
		page_alloc->page = page;
		page_alloc->offset = frag_info->frag_align;
	} else
		page_alloc->offset += frag_info->frag_stride;

	*skb_frags = skbf;
	frags_data->dma_addr[i] = dma;

	return 0;
}
Beispiel #10
0
/*
 * Map for DMA and insert a single page into the on-demand paging page tables.
 *
 * @umem: the umem to insert the page to.
 * @page_index: index in the umem to add the page to.
 * @page: the page struct to map and add.
 * @access_mask: access permissions needed for this page.
 * @current_seq: sequence number for synchronization with invalidations.
 *               the sequence number is taken from
 *               umem->odp_data->notifiers_seq.
 *
 * The function returns -EFAULT if the DMA mapping operation fails. It returns
 * -EAGAIN if a concurrent invalidation prevents us from updating the page.
 *
 * The page is released via put_page even if the operation failed. For
 * on-demand pinning, the page is released whenever it isn't stored in the
 * umem.
 */
static int ib_umem_odp_map_dma_single_page(
		struct ib_umem *umem,
		int page_index,
		struct page *page,
		u64 access_mask,
		unsigned long current_seq,
		enum ib_odp_dma_map_flags flags)
{
	struct ib_device *dev = umem->context->device;
	dma_addr_t dma_addr;
	int stored_page = 0;
	int ret = 0;
	mutex_lock(&umem->odp_data->umem_mutex);
	/*
	 * Note: we avoid writing if seq is different from the initial seq, to
	 * handle case of a racing notifier. This check also allows us to bail
	 * early if we have a notifier running in parallel with us.
	 */
	if (ib_umem_mmu_notifier_retry(umem, current_seq)) {
		ret = -EAGAIN;
		goto out;
	}
	if (!(umem->odp_data->dma_list[page_index])) {
		dma_addr = ib_dma_map_page(dev,
					   page,
					   0, PAGE_SIZE,
					   DMA_BIDIRECTIONAL);
		if (ib_dma_mapping_error(dev, dma_addr)) {
			ret = -EFAULT;
			goto out;
		}
		umem->odp_data->dma_list[page_index] = dma_addr | access_mask;
		umem->odp_data->page_list[page_index] = page;
		if (flags & IB_ODP_DMA_MAP_FOR_PREFETCH)
			atomic_inc(&dev->odp_statistics.num_prefetch_pages);
		else
			atomic_inc(&dev->odp_statistics.num_page_fault_pages);

		stored_page = 1;
	} else if (umem->odp_data->page_list[page_index] == page) {
		umem->odp_data->dma_list[page_index] |= access_mask;
	} else {
		pr_err("error: got different pages in IB device and from get_user_pages. IB device page: %p, gup page: %p\n"
		       , umem->odp_data->page_list[page_index], page);
	}

out:
	mutex_unlock(&umem->odp_data->umem_mutex);

	/* On Demand Paging - avoid pinning the page */
	if (umem->context->invalidate_range || !stored_page)
		put_page(page);

	return ret;
}
static int
svc_rdma_post_recv(struct svcxprt_rdma *xprt)
{
	struct ib_recv_wr recv_wr, *bad_recv_wr;
	struct svc_rdma_op_ctxt *ctxt;
	struct page *page;
	dma_addr_t pa;
	int sge_no;
	int buflen;
	int ret;

	ctxt = svc_rdma_get_context(xprt);
	buflen = 0;
	ctxt->direction = DMA_FROM_DEVICE;
	ctxt->cqe.done = svc_rdma_wc_receive;
	for (sge_no = 0; buflen < xprt->sc_max_req_size; sge_no++) {
		if (sge_no >= xprt->sc_max_sge) {
			pr_err("svcrdma: Too many sges (%d)\n", sge_no);
			goto err_put_ctxt;
		}
		page = alloc_page(GFP_KERNEL);
		if (!page)
			goto err_put_ctxt;
		ctxt->pages[sge_no] = page;
		pa = ib_dma_map_page(xprt->sc_cm_id->device,
				     page, 0, PAGE_SIZE,
				     DMA_FROM_DEVICE);
		if (ib_dma_mapping_error(xprt->sc_cm_id->device, pa))
			goto err_put_ctxt;
		svc_rdma_count_mappings(xprt, ctxt);
		ctxt->sge[sge_no].addr = pa;
		ctxt->sge[sge_no].length = PAGE_SIZE;
		ctxt->sge[sge_no].lkey = xprt->sc_pd->local_dma_lkey;
		ctxt->count = sge_no + 1;
		buflen += PAGE_SIZE;
	}
	recv_wr.next = NULL;
	recv_wr.sg_list = &ctxt->sge[0];
	recv_wr.num_sge = ctxt->count;
	recv_wr.wr_cqe = &ctxt->cqe;

	svc_xprt_get(&xprt->sc_xprt);
	ret = ib_post_recv(xprt->sc_qp, &recv_wr, &bad_recv_wr);
	if (ret) {
		svc_rdma_unmap_dma(ctxt);
		svc_rdma_put_context(ctxt, 1);
		svc_xprt_put(&xprt->sc_xprt);
	}
	return ret;

 err_put_ctxt:
	svc_rdma_unmap_dma(ctxt);
	svc_rdma_put_context(ctxt, 1);
	return -ENOMEM;
}
Beispiel #12
0
static int iser_alloc_login_buf(struct iser_conn *iser_conn)
{
	struct iser_device *device = iser_conn->ib_conn.device;
	struct iser_login_desc *desc = &iser_conn->login_desc;

	desc->req = kmalloc(ISCSI_DEF_MAX_RECV_SEG_LEN, GFP_KERNEL);
	if (!desc->req)
		return -ENOMEM;

	desc->req_dma = ib_dma_map_single(device->ib_device, desc->req,
					  ISCSI_DEF_MAX_RECV_SEG_LEN,
					  DMA_TO_DEVICE);
	if (ib_dma_mapping_error(device->ib_device,
				desc->req_dma))
		goto free_req;

	desc->rsp = kmalloc(ISER_RX_LOGIN_SIZE, GFP_KERNEL);
	if (!desc->rsp)
		goto unmap_req;

	desc->rsp_dma = ib_dma_map_single(device->ib_device, desc->rsp,
					   ISER_RX_LOGIN_SIZE,
					   DMA_FROM_DEVICE);
	if (ib_dma_mapping_error(device->ib_device,
				desc->rsp_dma))
		goto free_rsp;

	return 0;

free_rsp:
	kfree(desc->rsp);
unmap_req:
	ib_dma_unmap_single(device->ib_device, desc->req_dma,
			    ISCSI_DEF_MAX_RECV_SEG_LEN,
			    DMA_TO_DEVICE);
free_req:
	kfree(desc->req);

	return -ENOMEM;
}
Beispiel #13
0
u64 rdma_map_address(void* addr, int length)
{
    u64 dma_addr;

    LOG_KERN(LOG_INFO, ("Mapping addr\n"));

    dma_addr = ib_dma_map_single(rdma_ib_device.dev, addr, length, DMA_BIDIRECTIONAL);
    if (ib_dma_mapping_error(rdma_ib_device.dev, dma_addr) != 0) {
        LOG_KERN(LOG_INFO, ("Error mapping myaddr"));
        return 0; //error
    }

    return dma_addr;
}
int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
{
	struct ib_recv_wr recv_wr, *bad_recv_wr;
	struct svc_rdma_op_ctxt *ctxt;
	struct page *page;
	dma_addr_t pa;
	int sge_no;
	int buflen;
	int ret;

	ctxt = svc_rdma_get_context(xprt);
	buflen = 0;
	ctxt->direction = DMA_FROM_DEVICE;
	for (sge_no = 0; buflen < xprt->sc_max_req_size; sge_no++) {
		BUG_ON(sge_no >= xprt->sc_max_sge);
		page = svc_rdma_get_page();
		ctxt->pages[sge_no] = page;
		pa = ib_dma_map_page(xprt->sc_cm_id->device,
				     page, 0, PAGE_SIZE,
				     DMA_FROM_DEVICE);
		if (ib_dma_mapping_error(xprt->sc_cm_id->device, pa))
			goto err_put_ctxt;
		atomic_inc(&xprt->sc_dma_used);
		ctxt->sge[sge_no].addr = pa;
		ctxt->sge[sge_no].length = PAGE_SIZE;
		ctxt->sge[sge_no].lkey = xprt->sc_dma_lkey;
		ctxt->count = sge_no + 1;
		buflen += PAGE_SIZE;
	}
	recv_wr.next = NULL;
	recv_wr.sg_list = &ctxt->sge[0];
	recv_wr.num_sge = ctxt->count;
	recv_wr.wr_id = (u64)(unsigned long)ctxt;

	svc_xprt_get(&xprt->sc_xprt);
	ret = ib_post_recv(xprt->sc_qp, &recv_wr, &bad_recv_wr);
	if (ret) {
		svc_rdma_unmap_dma(ctxt);
		svc_rdma_put_context(ctxt, 1);
		svc_xprt_put(&xprt->sc_xprt);
	}
	return ret;

 err_put_ctxt:
	svc_rdma_unmap_dma(ctxt);
	svc_rdma_put_context(ctxt, 1);
	return -ENOMEM;
}
Beispiel #15
0
/**
 * iser_reg_single - fills registered buffer descriptor with
 *		     registration information
 */
void iser_reg_single(struct iser_device *device,
		     struct iser_regd_buf *regd_buf,
		     enum dma_data_direction direction)
{
	u64 dma_addr;

	dma_addr = ib_dma_map_single(device->ib_device,
				     regd_buf->virt_addr,
				     regd_buf->data_size, direction);
	BUG_ON(ib_dma_mapping_error(device->ib_device, dma_addr));

	regd_buf->reg.lkey = device->mr->lkey;
	regd_buf->reg.len  = regd_buf->data_size;
	regd_buf->reg.va   = dma_addr;
	regd_buf->reg.is_fmr = 0;

	regd_buf->dma_addr  = dma_addr;
	regd_buf->direction = direction;
}
Beispiel #16
0
int iser_alloc_rx_descriptors(struct iser_conn *ib_conn)
{
	int i, j;
	u64 dma_addr;
	struct iser_rx_desc *rx_desc;
	struct ib_sge       *rx_sg;
	struct iser_device  *device = ib_conn->device;

	ib_conn->rx_descs = kmalloc(ISER_QP_MAX_RECV_DTOS *
				sizeof(struct iser_rx_desc), GFP_KERNEL);
	if (!ib_conn->rx_descs)
		goto rx_desc_alloc_fail;

	rx_desc = ib_conn->rx_descs;

	for (i = 0; i < ISER_QP_MAX_RECV_DTOS; i++, rx_desc++)  {
		dma_addr = ib_dma_map_single(device->ib_device, (void *)rx_desc,
					ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
		if (ib_dma_mapping_error(device->ib_device, dma_addr))
			goto rx_desc_dma_map_failed;

		rx_desc->dma_addr = dma_addr;

		rx_sg = &rx_desc->rx_sg;
		rx_sg->addr   = rx_desc->dma_addr;
		rx_sg->length = ISER_RX_PAYLOAD_SIZE;
		rx_sg->lkey   = device->mr->lkey;
	}

	ib_conn->rx_desc_head = 0;
	return 0;

rx_desc_dma_map_failed:
	rx_desc = ib_conn->rx_descs;
	for (j = 0; j < i; j++, rx_desc++)
		ib_dma_unmap_single(device->ib_device, rx_desc->dma_addr,
			ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
	kfree(ib_conn->rx_descs);
	ib_conn->rx_descs = NULL;
rx_desc_alloc_fail:
	iser_err("failed allocating rx descriptors / data buffers\n");
	return -ENOMEM;
}
Beispiel #17
0
int iser_initialize_task_headers(struct iscsi_task *task,
						struct iser_tx_desc *tx_desc)
{
	struct iser_conn       *ib_conn   = task->conn->dd_data;
	struct iser_device     *device    = ib_conn->device;
	struct iscsi_iser_task *iser_task = task->dd_data;
	u64 dma_addr;

	dma_addr = ib_dma_map_single(device->ib_device, (void *)tx_desc,
				ISER_HEADERS_LEN, DMA_TO_DEVICE);
	if (ib_dma_mapping_error(device->ib_device, dma_addr))
		return -ENOMEM;

	tx_desc->dma_addr = dma_addr;
	tx_desc->tx_sg[0].addr   = tx_desc->dma_addr;
	tx_desc->tx_sg[0].length = ISER_HEADERS_LEN;
	tx_desc->tx_sg[0].lkey   = device->mr->lkey;

	iser_task->ib_conn = ib_conn;
	return 0;
}
Beispiel #18
0
/**
 * iser_initialize_task_headers() - Initialize task headers
 * @task:       iscsi task
 * @tx_desc:    iser tx descriptor
 *
 * Notes:
 * This routine may race with iser teardown flow for scsi
 * error handling TMFs. So for TMF we should acquire the
 * state mutex to avoid dereferencing the IB device which
 * may have already been terminated.
 */
int
iser_initialize_task_headers(struct iscsi_task *task,
			     struct iser_tx_desc *tx_desc)
{
	struct iser_conn *iser_conn = task->conn->dd_data;
	struct iser_device *device = iser_conn->ib_conn.device;
	struct iscsi_iser_task *iser_task = task->dd_data;
	u64 dma_addr;
	const bool mgmt_task = !task->sc && !in_interrupt();
	int ret = 0;

	if (unlikely(mgmt_task))
		mutex_lock(&iser_conn->state_mutex);

	if (unlikely(iser_conn->state != ISER_CONN_UP)) {
		ret = -ENODEV;
		goto out;
	}

	dma_addr = ib_dma_map_single(device->ib_device, (void *)tx_desc,
				ISER_HEADERS_LEN, DMA_TO_DEVICE);
	if (ib_dma_mapping_error(device->ib_device, dma_addr)) {
		ret = -ENOMEM;
		goto out;
	}

	tx_desc->wr_idx = 0;
	tx_desc->mapped = true;
	tx_desc->dma_addr = dma_addr;
	tx_desc->tx_sg[0].addr   = tx_desc->dma_addr;
	tx_desc->tx_sg[0].length = ISER_HEADERS_LEN;
	tx_desc->tx_sg[0].lkey   = device->pd->local_dma_lkey;

	iser_task->iser_conn = iser_conn;
out:
	if (unlikely(mgmt_task))
		mutex_unlock(&iser_conn->state_mutex);

	return ret;
}
Beispiel #19
0
static struct svc_rdma_recv_ctxt *
svc_rdma_recv_ctxt_alloc(struct svcxprt_rdma *rdma)
{
	struct svc_rdma_recv_ctxt *ctxt;
	dma_addr_t addr;
	void *buffer;

	ctxt = kmalloc(sizeof(*ctxt), GFP_KERNEL);
	if (!ctxt)
		goto fail0;
	buffer = kmalloc(rdma->sc_max_req_size, GFP_KERNEL);
	if (!buffer)
		goto fail1;
	addr = ib_dma_map_single(rdma->sc_pd->device, buffer,
				 rdma->sc_max_req_size, DMA_FROM_DEVICE);
	if (ib_dma_mapping_error(rdma->sc_pd->device, addr))
		goto fail2;

	ctxt->rc_recv_wr.next = NULL;
	ctxt->rc_recv_wr.wr_cqe = &ctxt->rc_cqe;
	ctxt->rc_recv_wr.sg_list = &ctxt->rc_recv_sge;
	ctxt->rc_recv_wr.num_sge = 1;
	ctxt->rc_cqe.done = svc_rdma_wc_receive;
	ctxt->rc_recv_sge.addr = addr;
	ctxt->rc_recv_sge.length = rdma->sc_max_req_size;
	ctxt->rc_recv_sge.lkey = rdma->sc_pd->local_dma_lkey;
	ctxt->rc_recv_buf = buffer;
	ctxt->rc_temp = false;
	return ctxt;

fail2:
	kfree(buffer);
fail1:
	kfree(ctxt);
fail0:
	return NULL;
}
Beispiel #20
0
static int rdma_setup(rdma_ctx_t ctx)
{
    // create receive buffer
    ctx->rdma_recv_buffer = kmalloc(RDMA_BUFFER_SIZE, GFP_KERNEL);
    CHECK_MSG_RET(ctx->rdma_recv_buffer != 0, "Error kmalloc", -1);

    // create memory region
    ctx->mr = ib_get_dma_mr(ctx->pd, IB_ACCESS_REMOTE_READ | 
                                     IB_ACCESS_REMOTE_WRITE | 
                                     IB_ACCESS_LOCAL_WRITE);
    CHECK_MSG_RET(ctx->mr != 0, "Error creating MR", -1);

    ctx->rkey = ctx->mr->rkey;

    // get dma_addr
    ctx->dma_addr = ib_dma_map_single(rdma_ib_device.dev, ctx->rdma_recv_buffer, 
            RDMA_BUFFER_SIZE, DMA_BIDIRECTIONAL);
    CHECK_MSG_RET(ib_dma_mapping_error(rdma_ib_device.dev, ctx->dma_addr) == 0,
            "Error ib_dma_map_single", -1);

    // modify QP until RTS
    modify_qp(ctx);
    return 0;
}
static int rdma_request(struct p9_client *client, struct p9_req_t *req)
{
	struct p9_trans_rdma *rdma = client->trans;
	struct ib_send_wr wr, *bad_wr;
	struct ib_sge sge;
	int err = 0;
	unsigned long flags;
	struct p9_rdma_context *c = NULL;
	struct p9_rdma_context *rpl_context = NULL;

	/* Allocate an fcall for the reply */
	rpl_context = kmalloc(sizeof *rpl_context, GFP_KERNEL);
	if (!rpl_context)
		goto err_close;

	/*
	 * If the request has a buffer, steal it, otherwise
	 * allocate a new one.  Typically, requests should already
	 * have receive buffers allocated and just swap them around
	 */
	if (!req->rc) {
		req->rc = kmalloc(sizeof(struct p9_fcall)+client->msize,
								GFP_KERNEL);
		if (req->rc) {
			req->rc->sdata = (char *) req->rc +
						sizeof(struct p9_fcall);
			req->rc->capacity = client->msize;
		}
	}
	rpl_context->rc = req->rc;
	if (!rpl_context->rc) {
		kfree(rpl_context);
		goto err_close;
	}

	/*
	 * Post a receive buffer for this request. We need to ensure
	 * there is a reply buffer available for every outstanding
	 * request. A flushed request can result in no reply for an
	 * outstanding request, so we must keep a count to avoid
	 * overflowing the RQ.
	 */
	if (atomic_inc_return(&rdma->rq_count) <= rdma->rq_depth) {
		err = post_recv(client, rpl_context);
		if (err) {
			kfree(rpl_context->rc);
			kfree(rpl_context);
			goto err_close;
		}
	} else
		atomic_dec(&rdma->rq_count);

	/* remove posted receive buffer from request structure */
	req->rc = NULL;

	/* Post the request */
	c = kmalloc(sizeof *c, GFP_KERNEL);
	if (!c)
		goto err_close;
	c->req = req;

	c->busa = ib_dma_map_single(rdma->cm_id->device,
				    c->req->tc->sdata, c->req->tc->size,
				    DMA_TO_DEVICE);
	if (ib_dma_mapping_error(rdma->cm_id->device, c->busa))
		goto error;

	sge.addr = c->busa;
	sge.length = c->req->tc->size;
	sge.lkey = rdma->lkey;

	wr.next = NULL;
	c->wc_op = IB_WC_SEND;
	wr.wr_id = (unsigned long) c;
	wr.opcode = IB_WR_SEND;
	wr.send_flags = IB_SEND_SIGNALED;
	wr.sg_list = &sge;
	wr.num_sge = 1;

	if (down_interruptible(&rdma->sq_sem))
		goto error;

	return ib_post_send(rdma->qp, &wr, &bad_wr);

 error:
	P9_DPRINTK(P9_DEBUG_ERROR, "EIO\n");
	return -EIO;

 err_close:
	spin_lock_irqsave(&rdma->req_lock, flags);
	if (rdma->state < P9_RDMA_CLOSING) {
		rdma->state = P9_RDMA_CLOSING;
		spin_unlock_irqrestore(&rdma->req_lock, flags);
		rdma_disconnect(rdma->cm_id);
	} else
		spin_unlock_irqrestore(&rdma->req_lock, flags);
	return err;
}
static int send_reply(struct svcxprt_rdma *rdma,
		      struct svc_rqst *rqstp,
		      struct page *page,
		      struct rpcrdma_msg *rdma_resp,
		      struct svc_rdma_op_ctxt *ctxt,
		      struct svc_rdma_req_map *vec,
		      int byte_count)
{
	struct ib_send_wr send_wr;
	struct ib_send_wr inv_wr;
	int sge_no;
	int sge_bytes;
	int page_no;
	int ret;

	/* Post a recv buffer to handle another request. */
	ret = svc_rdma_post_recv(rdma);
	if (ret) {
		printk(KERN_INFO
		       "svcrdma: could not post a receive buffer, err=%d."
		       "Closing transport %p.\n", ret, rdma);
		set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
		svc_rdma_put_frmr(rdma, vec->frmr);
		svc_rdma_put_context(ctxt, 0);
		return -ENOTCONN;
	}

	/* Prepare the context */
	ctxt->pages[0] = page;
	ctxt->count = 1;
	ctxt->frmr = vec->frmr;
	if (vec->frmr)
		set_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags);
	else
		clear_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags);

	/* Prepare the SGE for the RPCRDMA Header */
	ctxt->sge[0].lkey = rdma->sc_dma_lkey;
	ctxt->sge[0].length = svc_rdma_xdr_get_reply_hdr_len(rdma_resp);
	ctxt->sge[0].addr =
		ib_dma_map_single(rdma->sc_cm_id->device, page_address(page),
				  ctxt->sge[0].length, DMA_TO_DEVICE);
	if (ib_dma_mapping_error(rdma->sc_cm_id->device, ctxt->sge[0].addr))
		goto err;
	atomic_inc(&rdma->sc_dma_used);

	ctxt->direction = DMA_TO_DEVICE;

	/* Determine how many of our SGE are to be transmitted */
	for (sge_no = 1; byte_count && sge_no < vec->count; sge_no++) {
		sge_bytes = min_t(size_t, vec->sge[sge_no].iov_len, byte_count);
		byte_count -= sge_bytes;
		if (!vec->frmr) {
			ctxt->sge[sge_no].addr =
				ib_dma_map_single(rdma->sc_cm_id->device,
						  vec->sge[sge_no].iov_base,
						  sge_bytes, DMA_TO_DEVICE);
			if (ib_dma_mapping_error(rdma->sc_cm_id->device,
						 ctxt->sge[sge_no].addr))
				goto err;
			atomic_inc(&rdma->sc_dma_used);
			ctxt->sge[sge_no].lkey = rdma->sc_dma_lkey;
		} else {
			ctxt->sge[sge_no].addr = (unsigned long)
				vec->sge[sge_no].iov_base;
			ctxt->sge[sge_no].lkey = vec->frmr->mr->lkey;
		}
		ctxt->sge[sge_no].length = sge_bytes;
	}
	BUG_ON(byte_count != 0);

	/* Save all respages in the ctxt and remove them from the
	 * respages array. They are our pages until the I/O
	 * completes.
	 */
	for (page_no = 0; page_no < rqstp->rq_resused; page_no++) {
		ctxt->pages[page_no+1] = rqstp->rq_respages[page_no];
		ctxt->count++;
		rqstp->rq_respages[page_no] = NULL;
		/*
		 * If there are more pages than SGE, terminate SGE
		 * list so that svc_rdma_unmap_dma doesn't attempt to
		 * unmap garbage.
		 */
		if (page_no+1 >= sge_no)
			ctxt->sge[page_no+1].length = 0;
	}
	BUG_ON(sge_no > rdma->sc_max_sge);
	memset(&send_wr, 0, sizeof send_wr);
	ctxt->wr_op = IB_WR_SEND;
	send_wr.wr_id = (unsigned long)ctxt;
	send_wr.sg_list = ctxt->sge;
	send_wr.num_sge = sge_no;
	send_wr.opcode = IB_WR_SEND;
	send_wr.send_flags =  IB_SEND_SIGNALED;
	if (vec->frmr) {
		/* Prepare INVALIDATE WR */
		memset(&inv_wr, 0, sizeof inv_wr);
		inv_wr.opcode = IB_WR_LOCAL_INV;
		inv_wr.send_flags = IB_SEND_SIGNALED;
		inv_wr.ex.invalidate_rkey =
			vec->frmr->mr->lkey;
		send_wr.next = &inv_wr;
	}

	ret = svc_rdma_send(rdma, &send_wr);
	if (ret)
		goto err;

	return 0;

 err:
	svc_rdma_unmap_dma(ctxt);
	svc_rdma_put_frmr(rdma, vec->frmr);
	svc_rdma_put_context(ctxt, 1);
	return -EIO;
}
static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
		      u32 rmr, u64 to,
		      u32 xdr_off, int write_len,
		      struct svc_rdma_req_map *vec)
{
	struct ib_send_wr write_wr;
	struct ib_sge *sge;
	int xdr_sge_no;
	int sge_no;
	int sge_bytes;
	int sge_off;
	int bc;
	struct svc_rdma_op_ctxt *ctxt;

	BUG_ON(vec->count > RPCSVC_MAXPAGES);
	dprintk("svcrdma: RDMA_WRITE rmr=%x, to=%llx, xdr_off=%d, "
		"write_len=%d, vec->sge=%p, vec->count=%lu\n",
		rmr, (unsigned long long)to, xdr_off,
		write_len, vec->sge, vec->count);

	ctxt = svc_rdma_get_context(xprt);
	ctxt->direction = DMA_TO_DEVICE;
	sge = ctxt->sge;

	/* Find the SGE associated with xdr_off */
	for (bc = xdr_off, xdr_sge_no = 1; bc && xdr_sge_no < vec->count;
	     xdr_sge_no++) {
		if (vec->sge[xdr_sge_no].iov_len > bc)
			break;
		bc -= vec->sge[xdr_sge_no].iov_len;
	}

	sge_off = bc;
	bc = write_len;
	sge_no = 0;

	/* Copy the remaining SGE */
	while (bc != 0) {
		sge_bytes = min_t(size_t,
			  bc, vec->sge[xdr_sge_no].iov_len-sge_off);
		sge[sge_no].length = sge_bytes;
		if (!vec->frmr) {
			sge[sge_no].addr =
				ib_dma_map_single(xprt->sc_cm_id->device,
						  (void *)
						  vec->sge[xdr_sge_no].iov_base + sge_off,
						  sge_bytes, DMA_TO_DEVICE);
			if (ib_dma_mapping_error(xprt->sc_cm_id->device,
						 sge[sge_no].addr))
				goto err;
			atomic_inc(&xprt->sc_dma_used);
			sge[sge_no].lkey = xprt->sc_dma_lkey;
		} else {
			sge[sge_no].addr = (unsigned long)
				vec->sge[xdr_sge_no].iov_base + sge_off;
			sge[sge_no].lkey = vec->frmr->mr->lkey;
		}
		ctxt->count++;
		ctxt->frmr = vec->frmr;
		sge_off = 0;
		sge_no++;
		xdr_sge_no++;
		BUG_ON(xdr_sge_no > vec->count);
		bc -= sge_bytes;
	}

	/* Prepare WRITE WR */
	memset(&write_wr, 0, sizeof write_wr);
	ctxt->wr_op = IB_WR_RDMA_WRITE;
	write_wr.wr_id = (unsigned long)ctxt;
	write_wr.sg_list = &sge[0];
	write_wr.num_sge = sge_no;
	write_wr.opcode = IB_WR_RDMA_WRITE;
	write_wr.send_flags = IB_SEND_SIGNALED;
	write_wr.wr.rdma.rkey = rmr;
	write_wr.wr.rdma.remote_addr = to;

	/* Post It */
	atomic_inc(&rdma_stat_write);
	if (svc_rdma_send(xprt, &write_wr))
		goto err;
	return 0;
 err:
	svc_rdma_put_context(ctxt, 0);
	/* Fatal error, close transport */
	return -EIO;
}
static int fast_reg_xdr(struct svcxprt_rdma *xprt,
		 struct xdr_buf *xdr,
		 struct svc_rdma_req_map *vec)
{
	int sge_no;
	u32 sge_bytes;
	u32 page_bytes;
	u32 page_off;
	int page_no = 0;
	u8 *frva;
	struct svc_rdma_fastreg_mr *frmr;

	frmr = svc_rdma_get_frmr(xprt);
	if (IS_ERR(frmr))
		return -ENOMEM;
	vec->frmr = frmr;

	/* Skip the RPCRDMA header */
	sge_no = 1;

	/* Map the head. */
	frva = (void *)((unsigned long)(xdr->head[0].iov_base) & PAGE_MASK);
	vec->sge[sge_no].iov_base = xdr->head[0].iov_base;
	vec->sge[sge_no].iov_len = xdr->head[0].iov_len;
	vec->count = 2;
	sge_no++;

	/* Build the FRMR */
	frmr->kva = frva;
	frmr->direction = DMA_TO_DEVICE;
	frmr->access_flags = 0;
	frmr->map_len = PAGE_SIZE;
	frmr->page_list_len = 1;
	frmr->page_list->page_list[page_no] =
		ib_dma_map_single(xprt->sc_cm_id->device,
				  (void *)xdr->head[0].iov_base,
				  PAGE_SIZE, DMA_TO_DEVICE);
	if (ib_dma_mapping_error(xprt->sc_cm_id->device,
				 frmr->page_list->page_list[page_no]))
		goto fatal_err;
	atomic_inc(&xprt->sc_dma_used);

	page_off = xdr->page_base;
	page_bytes = xdr->page_len + page_off;
	if (!page_bytes)
		goto encode_tail;

	/* Map the pages */
	vec->sge[sge_no].iov_base = frva + frmr->map_len + page_off;
	vec->sge[sge_no].iov_len = page_bytes;
	sge_no++;
	while (page_bytes) {
		struct page *page;

		page = xdr->pages[page_no++];
		sge_bytes = min_t(u32, page_bytes, (PAGE_SIZE - page_off));
		page_bytes -= sge_bytes;

		frmr->page_list->page_list[page_no] =
			ib_dma_map_single(xprt->sc_cm_id->device,
					  page_address(page),
					  PAGE_SIZE, DMA_TO_DEVICE);
		if (ib_dma_mapping_error(xprt->sc_cm_id->device,
					 frmr->page_list->page_list[page_no]))
			goto fatal_err;

		atomic_inc(&xprt->sc_dma_used);
		page_off = 0; /* reset for next time through loop */
		frmr->map_len += PAGE_SIZE;
		frmr->page_list_len++;
	}
	vec->count++;

 encode_tail:
	/* Map tail */
	if (0 == xdr->tail[0].iov_len)
		goto done;

	vec->count++;
	vec->sge[sge_no].iov_len = xdr->tail[0].iov_len;

	if (((unsigned long)xdr->tail[0].iov_base & PAGE_MASK) ==
	    ((unsigned long)xdr->head[0].iov_base & PAGE_MASK)) {
		/*
		 * If head and tail use the same page, we don't need
		 * to map it again.
		 */
		vec->sge[sge_no].iov_base = xdr->tail[0].iov_base;
	} else {
		void *va;

		/* Map another page for the tail */
		page_off = (unsigned long)xdr->tail[0].iov_base & ~PAGE_MASK;
		va = (void *)((unsigned long)xdr->tail[0].iov_base & PAGE_MASK);
		vec->sge[sge_no].iov_base = frva + frmr->map_len + page_off;

		frmr->page_list->page_list[page_no] =
			ib_dma_map_single(xprt->sc_cm_id->device, va, PAGE_SIZE,
					  DMA_TO_DEVICE);
		if (ib_dma_mapping_error(xprt->sc_cm_id->device,
					 frmr->page_list->page_list[page_no]))
			goto fatal_err;
		atomic_inc(&xprt->sc_dma_used);
		frmr->map_len += PAGE_SIZE;
		frmr->page_list_len++;
	}

 done:
	if (svc_rdma_fastreg(xprt, frmr))
		goto fatal_err;

	return 0;

 fatal_err:
	printk("svcrdma: Error fast registering memory for xprt %p\n", xprt);
	vec->frmr = NULL;
	svc_rdma_put_frmr(xprt, frmr);
	return -EIO;
}
Beispiel #25
0
/* Map a read-chunk-list to an XDR and fast register the page-list.
 *
 * Assumptions:
 * - chunk[0]	position points to pages[0] at an offset of 0
 * - pages[]	will be made physically contiguous by creating a one-off memory
 *		region using the fastreg verb.
 * - byte_count is # of bytes in read-chunk-list
 * - ch_count	is # of chunks in read-chunk-list
 *
 * Output:
 * - sge array pointing into pages[] array.
 * - chunk_sge array specifying sge index and count for each
 *   chunk in the read list
 */
static int fast_reg_read_chunks(struct svcxprt_rdma *xprt,
				struct svc_rqst *rqstp,
				struct svc_rdma_op_ctxt *head,
				struct rpcrdma_msg *rmsgp,
				struct svc_rdma_req_map *rpl_map,
				struct svc_rdma_req_map *chl_map,
				int ch_count,
				int byte_count)
{
	int page_no;
	int ch_no;
	u32 offset;
	struct rpcrdma_read_chunk *ch;
	struct svc_rdma_fastreg_mr *frmr;
	int ret = 0;

	frmr = svc_rdma_get_frmr(xprt);
	if (IS_ERR(frmr))
		return -ENOMEM;

	head->frmr = frmr;
	head->arg.head[0] = rqstp->rq_arg.head[0];
	head->arg.tail[0] = rqstp->rq_arg.tail[0];
	head->arg.pages = &head->pages[head->count];
	head->hdr_count = head->count; /* save count of hdr pages */
	head->arg.page_base = 0;
	head->arg.page_len = byte_count;
	head->arg.len = rqstp->rq_arg.len + byte_count;
	head->arg.buflen = rqstp->rq_arg.buflen + byte_count;

	/* Fast register the page list */
	frmr->kva = page_address(rqstp->rq_arg.pages[0]);
	frmr->direction = DMA_FROM_DEVICE;
	frmr->access_flags = (IB_ACCESS_LOCAL_WRITE|IB_ACCESS_REMOTE_WRITE);
	frmr->map_len = byte_count;
	frmr->page_list_len = PAGE_ALIGN(byte_count) >> PAGE_SHIFT;
	for (page_no = 0; page_no < frmr->page_list_len; page_no++) {
		frmr->page_list->page_list[page_no] =
			ib_dma_map_page(xprt->sc_cm_id->device,
					rqstp->rq_arg.pages[page_no], 0,
					PAGE_SIZE, DMA_FROM_DEVICE);
		if (ib_dma_mapping_error(xprt->sc_cm_id->device,
					 frmr->page_list->page_list[page_no]))
			goto fatal_err;
		atomic_inc(&xprt->sc_dma_used);
		head->arg.pages[page_no] = rqstp->rq_arg.pages[page_no];
	}
	head->count += page_no;

	/* rq_respages points one past arg pages */
	rqstp->rq_respages = &rqstp->rq_arg.pages[page_no];

	/* Create the reply and chunk maps */
	offset = 0;
	ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0];
	for (ch_no = 0; ch_no < ch_count; ch_no++) {
		rpl_map->sge[ch_no].iov_base = frmr->kva + offset;
		rpl_map->sge[ch_no].iov_len = ch->rc_target.rs_length;
		chl_map->ch[ch_no].count = 1;
		chl_map->ch[ch_no].start = ch_no;
		offset += ch->rc_target.rs_length;
		ch++;
	}

	ret = svc_rdma_fastreg(xprt, frmr);
	if (ret)
		goto fatal_err;

	return ch_no;

 fatal_err:
	printk("svcrdma: error fast registering xdr for xprt %p", xprt);
	svc_rdma_put_frmr(xprt, frmr);
	return -EIO;
}
Beispiel #26
0
static int rdma_request(struct p9_client *client, struct p9_req_t *req)
{
	struct p9_trans_rdma *rdma = client->trans;
	struct ib_send_wr wr, *bad_wr;
	struct ib_sge sge;
	int err = 0;
	unsigned long flags;
	struct p9_rdma_context *c = NULL;
	struct p9_rdma_context *rpl_context = NULL;

	/* When an error occurs between posting the recv and the send,
	 * there will be a receive context posted without a pending request.
	 * Since there is no way to "un-post" it, we remember it and skip
	 * post_recv() for the next request.
	 * So here,
	 * see if we are this `next request' and need to absorb an excess rc.
	 * If yes, then drop and free our own, and do not recv_post().
	 **/
	if (unlikely(atomic_read(&rdma->excess_rc) > 0)) {
		if ((atomic_sub_return(1, &rdma->excess_rc) >= 0)) {
			/* Got one ! */
			kfree(req->rc);
			req->rc = NULL;
			goto dont_need_post_recv;
		} else {
			/* We raced and lost. */
			atomic_inc(&rdma->excess_rc);
		}
	}

	/* Allocate an fcall for the reply */
	rpl_context = kmalloc(sizeof *rpl_context, GFP_NOFS);
	if (!rpl_context) {
		err = -ENOMEM;
		goto recv_error;
	}
	rpl_context->rc = req->rc;

	/*
	 * Post a receive buffer for this request. We need to ensure
	 * there is a reply buffer available for every outstanding
	 * request. A flushed request can result in no reply for an
	 * outstanding request, so we must keep a count to avoid
	 * overflowing the RQ.
	 */
	if (down_interruptible(&rdma->rq_sem)) {
		err = -EINTR;
		goto recv_error;
	}

	err = post_recv(client, rpl_context);
	if (err) {
		p9_debug(P9_DEBUG_FCALL, "POST RECV failed\n");
		goto recv_error;
	}
	/* remove posted receive buffer from request structure */
	req->rc = NULL;

dont_need_post_recv:
	/* Post the request */
	c = kmalloc(sizeof *c, GFP_NOFS);
	if (!c) {
		err = -ENOMEM;
		goto send_error;
	}
	c->req = req;

	c->busa = ib_dma_map_single(rdma->cm_id->device,
				    c->req->tc->sdata, c->req->tc->size,
				    DMA_TO_DEVICE);
	if (ib_dma_mapping_error(rdma->cm_id->device, c->busa)) {
		err = -EIO;
		goto send_error;
	}

	sge.addr = c->busa;
	sge.length = c->req->tc->size;
	sge.lkey = rdma->lkey;

	wr.next = NULL;
	c->wc_op = IB_WC_SEND;
	wr.wr_id = (unsigned long) c;
	wr.opcode = IB_WR_SEND;
	wr.send_flags = IB_SEND_SIGNALED;
	wr.sg_list = &sge;
	wr.num_sge = 1;

	if (down_interruptible(&rdma->sq_sem)) {
		err = -EINTR;
		goto send_error;
	}

	/* Mark request as `sent' *before* we actually send it,
	 * because doing if after could erase the REQ_STATUS_RCVD
	 * status in case of a very fast reply.
	 */
	req->status = REQ_STATUS_SENT;
	err = ib_post_send(rdma->qp, &wr, &bad_wr);
	if (err)
		goto send_error;

	/* Success */
	return 0;

 /* Handle errors that happened during or while preparing the send: */
 send_error:
	req->status = REQ_STATUS_ERROR;
	kfree(c);
	p9_debug(P9_DEBUG_ERROR, "Error %d in rdma_request()\n", err);

	/* Ach.
	 *  We did recv_post(), but not send. We have one recv_post in excess.
	 */
	atomic_inc(&rdma->excess_rc);
	return err;

 /* Handle errors that happened during or while preparing post_recv(): */
 recv_error:
	kfree(rpl_context);
	spin_lock_irqsave(&rdma->req_lock, flags);
	if (rdma->state < P9_RDMA_CLOSING) {
		rdma->state = P9_RDMA_CLOSING;
		spin_unlock_irqrestore(&rdma->req_lock, flags);
		rdma_disconnect(rdma->cm_id);
	} else
		spin_unlock_irqrestore(&rdma->req_lock, flags);
	return err;
}
static int iser_create_ib_conn_res(struct iser_conn *ib_conn)
{
	struct iser_device	*device;
	struct ib_qp_init_attr	init_attr;
	int			req_err, resp_err, ret = -ENOMEM;
	struct ib_fmr_pool_param params;

	BUG_ON(ib_conn->device == NULL);

	device = ib_conn->device;

	ib_conn->login_buf = kmalloc(ISCSI_DEF_MAX_RECV_SEG_LEN +
					ISER_RX_LOGIN_SIZE, GFP_KERNEL);
	if (!ib_conn->login_buf)
		goto out_err;

	ib_conn->login_req_buf  = ib_conn->login_buf;
	ib_conn->login_resp_buf = ib_conn->login_buf + ISCSI_DEF_MAX_RECV_SEG_LEN;

	ib_conn->login_req_dma = ib_dma_map_single(ib_conn->device->ib_device,
				(void *)ib_conn->login_req_buf,
				ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE);

	ib_conn->login_resp_dma = ib_dma_map_single(ib_conn->device->ib_device,
				(void *)ib_conn->login_resp_buf,
				ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE);

	req_err  = ib_dma_mapping_error(device->ib_device, ib_conn->login_req_dma);
	resp_err = ib_dma_mapping_error(device->ib_device, ib_conn->login_resp_dma);

	if (req_err || resp_err) {
		if (req_err)
			ib_conn->login_req_dma = 0;
		if (resp_err)
			ib_conn->login_resp_dma = 0;
		goto out_err;
	}

	ib_conn->page_vec = kmalloc(sizeof(struct iser_page_vec) +
				    (sizeof(u64) * (ISCSI_ISER_SG_TABLESIZE +1)),
				    GFP_KERNEL);
	if (!ib_conn->page_vec)
		goto out_err;

	ib_conn->page_vec->pages = (u64 *) (ib_conn->page_vec + 1);

	params.page_shift        = SHIFT_4K;
	params.max_pages_per_fmr = ISCSI_ISER_SG_TABLESIZE + 1;
	params.pool_size	 = ISCSI_DEF_XMIT_CMDS_MAX * 2;
	params.dirty_watermark	 = ISCSI_DEF_XMIT_CMDS_MAX;
	params.cache		 = 0;
	params.flush_function	 = NULL;
	params.access		 = (IB_ACCESS_LOCAL_WRITE  |
				    IB_ACCESS_REMOTE_WRITE |
				    IB_ACCESS_REMOTE_READ);

	ib_conn->fmr_pool = ib_create_fmr_pool(device->pd, &params);
	if (IS_ERR(ib_conn->fmr_pool)) {
		ret = PTR_ERR(ib_conn->fmr_pool);
		ib_conn->fmr_pool = NULL;
		goto out_err;
	}

	memset(&init_attr, 0, sizeof init_attr);

	init_attr.event_handler = iser_qp_event_callback;
	init_attr.qp_context	= (void *)ib_conn;
	init_attr.send_cq	= device->tx_cq;
	init_attr.recv_cq	= device->rx_cq;
	init_attr.cap.max_send_wr  = ISER_QP_MAX_REQ_DTOS;
	init_attr.cap.max_recv_wr  = ISER_QP_MAX_RECV_DTOS;
	init_attr.cap.max_send_sge = 2;
	init_attr.cap.max_recv_sge = 1;
	init_attr.sq_sig_type	= IB_SIGNAL_REQ_WR;
	init_attr.qp_type	= IB_QPT_RC;

	ret = rdma_create_qp(ib_conn->cma_id, device->pd, &init_attr);
	if (ret)
		goto out_err;

	ib_conn->qp = ib_conn->cma_id->qp;
	iser_err("setting conn %p cma_id %p: fmr_pool %p qp %p\n",
		 ib_conn, ib_conn->cma_id,
		 ib_conn->fmr_pool, ib_conn->cma_id->qp);
	return ret;

out_err:
	iser_err("unable to alloc mem or create resource, err %d\n", ret);
	return ret;
}
/* Issue an RDMA_READ using the local lkey to map the data sink */
static int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt,
			       struct svc_rqst *rqstp,
			       struct svc_rdma_op_ctxt *head,
			       int *page_no,
			       u32 *page_offset,
			       u32 rs_handle,
			       u32 rs_length,
			       u64 rs_offset,
			       int last)
{
	struct ib_send_wr read_wr;
	int pages_needed = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT;
	struct svc_rdma_op_ctxt *ctxt = svc_rdma_get_context(xprt);
	int ret, read, pno;
	u32 pg_off = *page_offset;
	u32 pg_no = *page_no;

	ctxt->direction = DMA_FROM_DEVICE;
	ctxt->read_hdr = head;
	pages_needed =
		min_t(int, pages_needed, rdma_read_max_sge(xprt, pages_needed));
	read = min_t(int, pages_needed << PAGE_SHIFT, rs_length);

	for (pno = 0; pno < pages_needed; pno++) {
		int len = min_t(int, rs_length, PAGE_SIZE - pg_off);

		head->arg.pages[pg_no] = rqstp->rq_arg.pages[pg_no];
		head->arg.page_len += len;
		head->arg.len += len;
		if (!pg_off)
			head->count++;
		rqstp->rq_respages = &rqstp->rq_arg.pages[pg_no+1];
#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,8,0))
		rqstp->rq_next_page = rqstp->rq_respages + 1;
#endif
		ctxt->sge[pno].addr =
			ib_dma_map_page(xprt->sc_cm_id->device,
					head->arg.pages[pg_no], pg_off,
					PAGE_SIZE - pg_off,
					DMA_FROM_DEVICE);
		ret = ib_dma_mapping_error(xprt->sc_cm_id->device,
					   ctxt->sge[pno].addr);
		if (ret)
			goto err;
		atomic_inc(&xprt->sc_dma_used);

		/* The lkey here is either a local dma lkey or a dma_mr lkey */
		ctxt->sge[pno].lkey = xprt->sc_dma_lkey;
		ctxt->sge[pno].length = len;
		ctxt->count++;

		/* adjust offset and wrap to next page if needed */
		pg_off += len;
		if (pg_off == PAGE_SIZE) {
			pg_off = 0;
			pg_no++;
		}
		rs_length -= len;
	}

	if (last && rs_length == 0)
		set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
	else
		clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);

	memset(&read_wr, 0, sizeof(read_wr));
	read_wr.wr_id = (unsigned long)ctxt;
	read_wr.opcode = IB_WR_RDMA_READ;
	ctxt->wr_op = read_wr.opcode;
	read_wr.send_flags = IB_SEND_SIGNALED;
	read_wr.wr.rdma.rkey = rs_handle;
	read_wr.wr.rdma.remote_addr = rs_offset;
	read_wr.sg_list = ctxt->sge;
	read_wr.num_sge = pages_needed;

	ret = svc_rdma_send(xprt, &read_wr);
	if (ret) {
		pr_err("svcrdma: Error %d posting RDMA_READ\n", ret);
		set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
		goto err;
	}

	/* return current location in page array */
	*page_no = pg_no;
	*page_offset = pg_off;
	ret = read;
	atomic_inc(&rdma_stat_read);
	return ret;
 err:
	svc_rdma_unmap_dma(ctxt);
	svc_rdma_put_context(ctxt, 0);
	return ret;
}
Beispiel #29
0
void
sdp_post_send(struct sdp_sock *ssk, struct mbuf *mb)
{
	struct sdp_buf *tx_req;
	struct sdp_bsdh *h;
	unsigned long mseq;
	struct ib_device *dev;
	struct ib_send_wr *bad_wr;
	struct ib_sge ibsge[SDP_MAX_SEND_SGES];
	struct ib_sge *sge;
	struct ib_send_wr tx_wr = { NULL };
	int i, rc;
	u64 addr;

	SDPSTATS_COUNTER_MID_INC(post_send, h->mid);
	SDPSTATS_HIST(send_size, mb->len);

	if (!ssk->qp_active) {
		m_freem(mb);
		return;
	}

	mseq = ring_head(ssk->tx_ring);
	h = mtod(mb, struct sdp_bsdh *);
	ssk->tx_packets++;
	ssk->tx_bytes += mb->m_pkthdr.len;

#ifdef SDP_ZCOPY
	if (unlikely(h->mid == SDP_MID_SRCAVAIL)) {
		struct tx_srcavail_state *tx_sa = TX_SRCAVAIL_STATE(mb);
		if (ssk->tx_sa != tx_sa) {
			sdp_dbg_data(ssk->socket, "SrcAvail cancelled "
					"before being sent!\n");
			WARN_ON(1);
			m_freem(mb);
			return;
		}
		TX_SRCAVAIL_STATE(mb)->mseq = mseq;
	}
#endif

	if (unlikely(mb->m_flags & M_URG))
		h->flags = SDP_OOB_PRES | SDP_OOB_PEND;
	else
		h->flags = 0;

	mb->m_flags |= M_RDONLY; /* Don't allow compression once sent. */
	h->bufs = htons(rx_ring_posted(ssk));
	h->len = htonl(mb->m_pkthdr.len);
	h->mseq = htonl(mseq);
	h->mseq_ack = htonl(mseq_ack(ssk));

	sdp_prf1(ssk->socket, mb, "TX: %s bufs: %d mseq:%ld ack:%d",
			mid2str(h->mid), rx_ring_posted(ssk), mseq,
			ntohl(h->mseq_ack));

	SDP_DUMP_PACKET(ssk->socket, "TX", mb, h);

	tx_req = &ssk->tx_ring.buffer[mseq & (SDP_TX_SIZE - 1)];
	tx_req->mb = mb;
	dev = ssk->ib_device;
	sge = &ibsge[0];
	for (i = 0;  mb != NULL; i++, mb = mb->m_next, sge++) {
		addr = ib_dma_map_single(dev, mb->m_data, mb->m_len,
		    DMA_TO_DEVICE);
		/* TODO: proper error handling */
		BUG_ON(ib_dma_mapping_error(dev, addr));
		BUG_ON(i >= SDP_MAX_SEND_SGES);
		tx_req->mapping[i] = addr;
		sge->addr = addr;
		sge->length = mb->m_len;
		sge->lkey = ssk->sdp_dev->mr->lkey;
	}
	tx_wr.next = NULL;
	tx_wr.wr_id = mseq | SDP_OP_SEND;
	tx_wr.sg_list = ibsge;
	tx_wr.num_sge = i;
	tx_wr.opcode = IB_WR_SEND;
	tx_wr.send_flags = IB_SEND_SIGNALED;
	if (unlikely(tx_req->mb->m_flags & M_URG))
		tx_wr.send_flags |= IB_SEND_SOLICITED;

	rc = ib_post_send(ssk->qp, &tx_wr, &bad_wr);
	if (unlikely(rc)) {
		sdp_dbg(ssk->socket,
				"ib_post_send failed with status %d.\n", rc);

		sdp_cleanup_sdp_buf(ssk, tx_req, DMA_TO_DEVICE);

		sdp_notify(ssk, ECONNRESET);
		m_freem(tx_req->mb);
		return;
	}

	atomic_inc(&ssk->tx_ring.head);
	atomic_dec(&ssk->tx_ring.credits);
	atomic_set(&ssk->remote_credits, rx_ring_posted(ssk));

	return;
}
/* Issue an RDMA_READ using an FRMR to map the data sink */
static int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
				struct svc_rqst *rqstp,
				struct svc_rdma_op_ctxt *head,
				int *page_no,
				u32 *page_offset,
				u32 rs_handle,
				u32 rs_length,
				u64 rs_offset,
				int last)
{
	struct ib_send_wr read_wr;
	struct ib_send_wr inv_wr;
	struct ib_send_wr fastreg_wr;
	u8 key;
	int pages_needed = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT;
	struct svc_rdma_op_ctxt *ctxt = svc_rdma_get_context(xprt);
	struct svc_rdma_fastreg_mr *frmr = svc_rdma_get_frmr(xprt);
	int ret, read, pno;
	u32 pg_off = *page_offset;
	u32 pg_no = *page_no;

	if (IS_ERR(frmr))
		return -ENOMEM;

	ctxt->direction = DMA_FROM_DEVICE;
	ctxt->frmr = frmr;
	pages_needed = min_t(int, pages_needed, xprt->sc_frmr_pg_list_len);
	read = min_t(int, pages_needed << PAGE_SHIFT, rs_length);

	frmr->kva = page_address(rqstp->rq_arg.pages[pg_no]);
	frmr->direction = DMA_FROM_DEVICE;
	frmr->access_flags = (IB_ACCESS_LOCAL_WRITE|IB_ACCESS_REMOTE_WRITE);
	frmr->map_len = pages_needed << PAGE_SHIFT;
	frmr->page_list_len = pages_needed;

	for (pno = 0; pno < pages_needed; pno++) {
		int len = min_t(int, rs_length, PAGE_SIZE - pg_off);

		head->arg.pages[pg_no] = rqstp->rq_arg.pages[pg_no];
		head->arg.page_len += len;
		head->arg.len += len;
		if (!pg_off)
			head->count++;
		rqstp->rq_respages = &rqstp->rq_arg.pages[pg_no+1];
#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,8,0))
		rqstp->rq_next_page = rqstp->rq_respages + 1;
#endif
		frmr->page_list->page_list[pno] =
			ib_dma_map_page(xprt->sc_cm_id->device,
					head->arg.pages[pg_no], 0,
					PAGE_SIZE, DMA_FROM_DEVICE);
		ret = ib_dma_mapping_error(xprt->sc_cm_id->device,
					   frmr->page_list->page_list[pno]);
		if (ret)
			goto err;
		atomic_inc(&xprt->sc_dma_used);

		/* adjust offset and wrap to next page if needed */
		pg_off += len;
		if (pg_off == PAGE_SIZE) {
			pg_off = 0;
			pg_no++;
		}
		rs_length -= len;
	}

	if (last && rs_length == 0)
		set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
	else
		clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);

	/* Bump the key */
	key = (u8)(frmr->mr->lkey & 0x000000FF);
	ib_update_fast_reg_key(frmr->mr, ++key);

	ctxt->sge[0].addr = (unsigned long)frmr->kva + *page_offset;
	ctxt->sge[0].lkey = frmr->mr->lkey;
	ctxt->sge[0].length = read;
	ctxt->count = 1;
	ctxt->read_hdr = head;

	/* Prepare FASTREG WR */
	memset(&fastreg_wr, 0, sizeof(fastreg_wr));
	fastreg_wr.opcode = IB_WR_FAST_REG_MR;
	fastreg_wr.send_flags = IB_SEND_SIGNALED;
	fastreg_wr.wr.fast_reg.iova_start = (unsigned long)frmr->kva;
	fastreg_wr.wr.fast_reg.page_list = frmr->page_list;
	fastreg_wr.wr.fast_reg.page_list_len = frmr->page_list_len;
	fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
	fastreg_wr.wr.fast_reg.length = frmr->map_len;
	fastreg_wr.wr.fast_reg.access_flags = frmr->access_flags;
	fastreg_wr.wr.fast_reg.rkey = frmr->mr->lkey;
	fastreg_wr.next = &read_wr;

	/* Prepare RDMA_READ */
	memset(&read_wr, 0, sizeof(read_wr));
	read_wr.send_flags = IB_SEND_SIGNALED;
	read_wr.wr.rdma.rkey = rs_handle;
	read_wr.wr.rdma.remote_addr = rs_offset;
	read_wr.sg_list = ctxt->sge;
	read_wr.num_sge = 1;
	if (xprt->sc_dev_caps & SVCRDMA_DEVCAP_READ_W_INV) {
		read_wr.opcode = IB_WR_RDMA_READ_WITH_INV;
		read_wr.wr_id = (unsigned long)ctxt;
		read_wr.ex.invalidate_rkey = ctxt->frmr->mr->lkey;
	} else {
		read_wr.opcode = IB_WR_RDMA_READ;
		read_wr.next = &inv_wr;
		/* Prepare invalidate */
		memset(&inv_wr, 0, sizeof(inv_wr));
		inv_wr.wr_id = (unsigned long)ctxt;
		inv_wr.opcode = IB_WR_LOCAL_INV;
		inv_wr.send_flags = IB_SEND_SIGNALED | IB_SEND_FENCE;
		inv_wr.ex.invalidate_rkey = frmr->mr->lkey;
	}
	ctxt->wr_op = read_wr.opcode;

	/* Post the chain */
	ret = svc_rdma_send(xprt, &fastreg_wr);
	if (ret) {
		pr_err("svcrdma: Error %d posting RDMA_READ\n", ret);
		set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
		goto err;
	}

	/* return current location in page array */
	*page_no = pg_no;
	*page_offset = pg_off;
	ret = read;
	atomic_inc(&rdma_stat_read);
	return ret;
 err:
	svc_rdma_unmap_dma(ctxt);
	svc_rdma_put_context(ctxt, 0);
	svc_rdma_put_frmr(xprt, frmr);
	return ret;
}