Beispiel #1
0
static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
			   const struct blk_mq_queue_data *bd)
{
	struct virtio_blk *vblk = hctx->queue->queuedata;
	struct request *req = bd->rq;
	struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
	unsigned long flags;
	unsigned int num;
	int qid = hctx->queue_num;
	int err;
	bool notify = false;

	BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);

	vbr->req = req;
	if (req->cmd_flags & REQ_FLUSH) {
		vbr->out_hdr.type = cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_FLUSH);
		vbr->out_hdr.sector = 0;
		vbr->out_hdr.ioprio = cpu_to_virtio32(vblk->vdev, req_get_ioprio(vbr->req));
	} else {
		switch (req->cmd_type) {
		case REQ_TYPE_FS:
			vbr->out_hdr.type = 0;
			vbr->out_hdr.sector = cpu_to_virtio64(vblk->vdev, blk_rq_pos(vbr->req));
			vbr->out_hdr.ioprio = cpu_to_virtio32(vblk->vdev, req_get_ioprio(vbr->req));
			break;
		case REQ_TYPE_BLOCK_PC:
			vbr->out_hdr.type = cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_SCSI_CMD);
			vbr->out_hdr.sector = 0;
			vbr->out_hdr.ioprio = cpu_to_virtio32(vblk->vdev, req_get_ioprio(vbr->req));
			break;
		case REQ_TYPE_DRV_PRIV:
			vbr->out_hdr.type = cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_GET_ID);
			vbr->out_hdr.sector = 0;
			vbr->out_hdr.ioprio = cpu_to_virtio32(vblk->vdev, req_get_ioprio(vbr->req));
			break;
		default:
			/* We don't put anything else in the queue. */
			BUG();
		}
	}

	blk_mq_start_request(req);

	num = blk_rq_map_sg(hctx->queue, vbr->req, vbr->sg);
	if (num) {
		if (rq_data_dir(vbr->req) == WRITE)
			vbr->out_hdr.type |= cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_OUT);
		else
			vbr->out_hdr.type |= cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_IN);
	}

	spin_lock_irqsave(&vblk->vqs[qid].lock, flags);
	err = __virtblk_add_req(vblk->vqs[qid].vq, vbr, vbr->sg, num);
	if (err) {
		virtqueue_kick(vblk->vqs[qid].vq);
		blk_mq_stop_hw_queue(hctx);
		spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags);
		/* Out of mem doesn't actually happen, since we fall back
		 * to direct descriptors */
		if (err == -ENOMEM || err == -ENOSPC)
			return BLK_MQ_RQ_QUEUE_BUSY;
		return BLK_MQ_RQ_QUEUE_ERROR;
	}

	if (bd->last && virtqueue_kick_prepare(vblk->vqs[qid].vq))
		notify = true;
	spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags);

	if (notify)
		virtqueue_notify(vblk->vqs[qid].vq);
	return BLK_MQ_RQ_QUEUE_OK;
}
Beispiel #2
0
static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
			   const struct blk_mq_queue_data *bd)
{
	struct virtio_blk *vblk = hctx->queue->queuedata;
	struct request *req = bd->rq;
	struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
	unsigned long flags;
	unsigned int num;
	int qid = hctx->queue_num;
	int err;
	bool notify = false;
	u32 type;

	BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);

	switch (req_op(req)) {
	case REQ_OP_READ:
	case REQ_OP_WRITE:
		type = 0;
		break;
	case REQ_OP_FLUSH:
		type = VIRTIO_BLK_T_FLUSH;
		break;
	case REQ_OP_SCSI_IN:
	case REQ_OP_SCSI_OUT:
		type = VIRTIO_BLK_T_SCSI_CMD;
		break;
	case REQ_OP_DRV_IN:
		type = VIRTIO_BLK_T_GET_ID;
		break;
	default:
		WARN_ON_ONCE(1);
		return BLK_MQ_RQ_QUEUE_ERROR;
	}

	vbr->out_hdr.type = cpu_to_virtio32(vblk->vdev, type);
	vbr->out_hdr.sector = type ?
		0 : cpu_to_virtio64(vblk->vdev, blk_rq_pos(req));
	vbr->out_hdr.ioprio = cpu_to_virtio32(vblk->vdev, req_get_ioprio(req));

	blk_mq_start_request(req);

	num = blk_rq_map_sg(hctx->queue, req, vbr->sg);
	if (num) {
		if (rq_data_dir(req) == WRITE)
			vbr->out_hdr.type |= cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_OUT);
		else
			vbr->out_hdr.type |= cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_IN);
	}

	spin_lock_irqsave(&vblk->vqs[qid].lock, flags);
	if (req_op(req) == REQ_OP_SCSI_IN || req_op(req) == REQ_OP_SCSI_OUT)
		err = virtblk_add_req_scsi(vblk->vqs[qid].vq, vbr, vbr->sg, num);
	else
		err = virtblk_add_req(vblk->vqs[qid].vq, vbr, vbr->sg, num);
	if (err) {
		virtqueue_kick(vblk->vqs[qid].vq);
		blk_mq_stop_hw_queue(hctx);
		spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags);
		/* Out of mem doesn't actually happen, since we fall back
		 * to direct descriptors */
		if (err == -ENOMEM || err == -ENOSPC)
			return BLK_MQ_RQ_QUEUE_BUSY;
		return BLK_MQ_RQ_QUEUE_ERROR;
	}

	if (bd->last && virtqueue_kick_prepare(vblk->vqs[qid].vq))
		notify = true;
	spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags);

	if (notify)
		virtqueue_notify(vblk->vqs[qid].vq);
	return BLK_MQ_RQ_QUEUE_OK;
}
BOOLEAN
RhelDoFlush(
    PVOID DeviceExtension,
    PSRB_TYPE Srb,
    BOOLEAN resend,
    BOOLEAN bIsr
    )
{
    PADAPTER_EXTENSION  adaptExt = (PADAPTER_EXTENSION)DeviceExtension;
    PSRB_EXTENSION      srbExt   = SRB_EXTENSION(Srb);
    ULONG               fragLen = 0UL;
    PVOID               va = NULL;
    ULONGLONG           pa = 0ULL;

    ULONG               QueueNumber = 0;
    ULONG               OldIrql = 0;
    ULONG               MessageId = 0;
    BOOLEAN             result = FALSE;
    bool                notify = FALSE;
    STOR_LOCK_HANDLE    LockHandle = { 0 };
    ULONG               status = STOR_STATUS_SUCCESS;
    struct virtqueue    *vq = NULL;

    SET_VA_PA();

    if (resend) {
        MessageId = srbExt->MessageID;
        QueueNumber = MessageId - 1;
    } else if (adaptExt->num_queues > 1) {
        STARTIO_PERFORMANCE_PARAMETERS param;
        param.Size = sizeof(STARTIO_PERFORMANCE_PARAMETERS);
        status = StorPortGetStartIoPerfParams(DeviceExtension, (PSCSI_REQUEST_BLOCK)Srb, &param);
        if (status == STOR_STATUS_SUCCESS && param.MessageNumber != 0) {
           MessageId = param.MessageNumber;
           QueueNumber = MessageId - 1;
           RhelDbgPrint(TRACE_LEVEL_INFORMATION, ("%s srb %p, cpu %d :: QueueNumber %lu, MessageNumber %lu, ChannelNumber %lu.\n",  __FUNCTION__, Srb, srbExt->cpu, QueueNumber, param.MessageNumber, param.ChannelNumber));
        }
        else {
           RhelDbgPrint(TRACE_LEVEL_ERROR, ("%s StorPortGetStartIoPerfParams failed. srb %p cpu %d status 0x%x.\n",__FUNCTION__, Srb, srbExt->cpu, status));
           QueueNumber = 0;
           MessageId = 1;
        }
    } else {
        QueueNumber = 0;
        MessageId = 1;
    }

    srbExt->MessageID = MessageId;
    vq = adaptExt->vq[QueueNumber];

    srbExt->vbr.out_hdr.sector = 0;
    srbExt->vbr.out_hdr.ioprio = 0;
    srbExt->vbr.req            = (struct request *)Srb;
    srbExt->vbr.out_hdr.type   = VIRTIO_BLK_T_FLUSH;
    srbExt->out                = 1;
    srbExt->in                 = 1;

    srbExt->vbr.sg[0].physAddr = StorPortGetPhysicalAddress(DeviceExtension, NULL, &srbExt->vbr.out_hdr, &fragLen);
    srbExt->vbr.sg[0].length   = sizeof(srbExt->vbr.out_hdr);
    srbExt->vbr.sg[1].physAddr = StorPortGetPhysicalAddress(DeviceExtension, NULL, &srbExt->vbr.status, &fragLen);
    srbExt->vbr.sg[1].length   = sizeof(srbExt->vbr.status);

    VioStorVQLock(DeviceExtension, MessageId, &LockHandle, FALSE);
    if (virtqueue_add_buf(vq,
                     &srbExt->vbr.sg[0],
                     srbExt->out, srbExt->in,
                     &srbExt->vbr, va, pa) >= 0) {
        notify = virtqueue_kick_prepare(vq);
        VioStorVQUnlock(DeviceExtension, MessageId, &LockHandle, FALSE);
        result = TRUE;
#ifdef DBG
        InterlockedIncrement((LONG volatile*)&adaptExt->inqueue_cnt);
#endif
    }
    else {
        VioStorVQUnlock(DeviceExtension, MessageId, &LockHandle, FALSE);
        RhelDbgPrint(TRACE_LEVEL_FATAL, ("%s Can not add packet to queue %d.\n", __FUNCTION__, QueueNumber));
        StorPortBusy(DeviceExtension, 2);
    }
    if (notify) {
        virtqueue_notify(vq);
    }

    return result;
}
BOOLEAN
RhelDoReadWrite(PVOID DeviceExtension,
                PSRB_TYPE Srb)
{
    PADAPTER_EXTENSION  adaptExt = (PADAPTER_EXTENSION)DeviceExtension;
    PSRB_EXTENSION      srbExt   = SRB_EXTENSION(Srb);
    PVOID               va = NULL;
    ULONGLONG           pa = 0ULL;

    ULONG               QueueNumber = 0;
    ULONG               OldIrql = 0;
    ULONG               MessageId = 0;
    BOOLEAN             result = FALSE;
    bool                notify = FALSE;
    STOR_LOCK_HANDLE    LockHandle = { 0 };
    ULONG               status = STOR_STATUS_SUCCESS;
    struct virtqueue    *vq = NULL;

    SET_VA_PA();

    if (adaptExt->num_queues > 1) {
        STARTIO_PERFORMANCE_PARAMETERS param;
        param.Size = sizeof(STARTIO_PERFORMANCE_PARAMETERS);
        status = StorPortGetStartIoPerfParams(DeviceExtension, (PSCSI_REQUEST_BLOCK)Srb, &param);
        if (status == STOR_STATUS_SUCCESS && param.MessageNumber != 0) {
           MessageId = param.MessageNumber;
           QueueNumber = MessageId - 1;
           RhelDbgPrint(TRACE_LEVEL_INFORMATION, ("%s srb %p, cpu %d :: QueueNumber %lu, MessageNumber %lu, ChannelNumber %lu.\n", __FUNCTION__, Srb, srbExt->cpu, QueueNumber, param.MessageNumber, param.ChannelNumber));
        }
        else {
           RhelDbgPrint(TRACE_LEVEL_ERROR, ("%s StorPortGetStartIoPerfParams failed srb %p cpu %d status 0x%x.\n", __FUNCTION__, Srb, srbExt->cpu, status));
           QueueNumber = 0;
           MessageId = 1;
        }
    }
    else {
        QueueNumber = 0;
        MessageId = 1;
    }

    srbExt->MessageID = MessageId;
    vq = adaptExt->vq[QueueNumber];
    RhelDbgPrint(TRACE_LEVEL_VERBOSE, ("<--->%s : QueueNumber 0x%x vq = %p\n", __FUNCTION__, QueueNumber, vq));

    VioStorVQLock(DeviceExtension, MessageId, &LockHandle, FALSE);
    if (virtqueue_add_buf(vq,
                     &srbExt->vbr.sg[0],
                     srbExt->out, srbExt->in,
                     &srbExt->vbr, va, pa) >= 0) {
        notify = virtqueue_kick_prepare(vq);
        VioStorVQUnlock(DeviceExtension, MessageId, &LockHandle, FALSE);
#ifdef DBG
        InterlockedIncrement((LONG volatile*)&adaptExt->inqueue_cnt);
#endif
        result = TRUE;
    }
    else {
        VioStorVQUnlock(DeviceExtension, MessageId, &LockHandle, FALSE);
        RhelDbgPrint(TRACE_LEVEL_FATAL, ("%s Can not add packet to queue %d.\n", __FUNCTION__, QueueNumber));
        StorPortBusy(DeviceExtension, 2);
    }
    if (notify) {
        virtqueue_notify(vq);
    }

#if (NTDDI_VERSION > NTDDI_WIN7)
    if (adaptExt->num_queues > 1) {
        if (CHECKFLAG(adaptExt->perfFlags, STOR_PERF_OPTIMIZE_FOR_COMPLETION_DURING_STARTIO)) {
           VioStorCompleteRequest(DeviceExtension, MessageId, FALSE);
        }
    }
#endif
    return result;
}
/* virtio vPMD receive routine, only accept(nb_pkts >= RTE_VIRTIO_DESC_PER_LOOP)
 *
 * This routine is for non-mergeable RX, one desc for each guest buffer.
 * This routine is based on the RX ring layout optimization. Each entry in the
 * avail ring points to the desc with the same index in the desc ring and this
 * will never be changed in the driver.
 *
 * - nb_pkts < RTE_VIRTIO_DESC_PER_LOOP, just return no packet
 */
uint16_t
virtio_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
	uint16_t nb_pkts)
{
	struct virtnet_rx *rxvq = rx_queue;
	struct virtqueue *vq = rxvq->vq;
	uint16_t nb_used;
	uint16_t desc_idx;
	struct vring_used_elem *rused;
	struct rte_mbuf **sw_ring;
	struct rte_mbuf **sw_ring_end;
	uint16_t nb_pkts_received;

	uint8x16_t shuf_msk1 = {
		0xFF, 0xFF, 0xFF, 0xFF, /* packet type */
		4, 5, 0xFF, 0xFF,       /* pkt len */
		4, 5,                   /* dat len */
		0xFF, 0xFF,             /* vlan tci */
		0xFF, 0xFF, 0xFF, 0xFF
	};

	uint8x16_t shuf_msk2 = {
		0xFF, 0xFF, 0xFF, 0xFF, /* packet type */
		12, 13, 0xFF, 0xFF,     /* pkt len */
		12, 13,                 /* dat len */
		0xFF, 0xFF,             /* vlan tci */
		0xFF, 0xFF, 0xFF, 0xFF
	};

	/* Subtract the header length.
	 *  In which case do we need the header length in used->len ?
	 */
	uint16x8_t len_adjust = {
		0, 0,
		(uint16_t)vq->hw->vtnet_hdr_size, 0,
		(uint16_t)vq->hw->vtnet_hdr_size,
		0,
		0, 0
	};

	if (unlikely(nb_pkts < RTE_VIRTIO_DESC_PER_LOOP))
		return 0;

	nb_used = VIRTQUEUE_NUSED(vq);

	rte_rmb();

	if (unlikely(nb_used == 0))
		return 0;

	nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, RTE_VIRTIO_DESC_PER_LOOP);
	nb_used = RTE_MIN(nb_used, nb_pkts);

	desc_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
	rused = &vq->vq_ring.used->ring[desc_idx];
	sw_ring  = &vq->sw_ring[desc_idx];
	sw_ring_end = &vq->sw_ring[vq->vq_nentries];

	rte_prefetch_non_temporal(rused);

	if (vq->vq_free_cnt >= RTE_VIRTIO_VPMD_RX_REARM_THRESH) {
		virtio_rxq_rearm_vec(rxvq);
		if (unlikely(virtqueue_kick_prepare(vq)))
			virtqueue_notify(vq);
	}

	for (nb_pkts_received = 0;
		nb_pkts_received < nb_used;) {
		uint64x2_t desc[RTE_VIRTIO_DESC_PER_LOOP / 2];
		uint64x2_t mbp[RTE_VIRTIO_DESC_PER_LOOP / 2];
		uint64x2_t pkt_mb[RTE_VIRTIO_DESC_PER_LOOP];

		mbp[0] = vld1q_u64((uint64_t *)(sw_ring + 0));
		desc[0] = vld1q_u64((uint64_t *)(rused + 0));
		vst1q_u64((uint64_t *)&rx_pkts[0], mbp[0]);

		mbp[1] = vld1q_u64((uint64_t *)(sw_ring + 2));
		desc[1] = vld1q_u64((uint64_t *)(rused + 2));
		vst1q_u64((uint64_t *)&rx_pkts[2], mbp[1]);

		mbp[2] = vld1q_u64((uint64_t *)(sw_ring + 4));
		desc[2] = vld1q_u64((uint64_t *)(rused + 4));
		vst1q_u64((uint64_t *)&rx_pkts[4], mbp[2]);

		mbp[3] = vld1q_u64((uint64_t *)(sw_ring + 6));
		desc[3] = vld1q_u64((uint64_t *)(rused + 6));
		vst1q_u64((uint64_t *)&rx_pkts[6], mbp[3]);

		pkt_mb[1] = vreinterpretq_u64_u8(vqtbl1q_u8(
				vreinterpretq_u8_u64(desc[0]), shuf_msk2));
		pkt_mb[0] = vreinterpretq_u64_u8(vqtbl1q_u8(
				vreinterpretq_u8_u64(desc[0]), shuf_msk1));
		pkt_mb[1] = vreinterpretq_u64_u16(vsubq_u16(
				vreinterpretq_u16_u64(pkt_mb[1]), len_adjust));
		pkt_mb[0] = vreinterpretq_u64_u16(vsubq_u16(
				vreinterpretq_u16_u64(pkt_mb[0]), len_adjust));
		vst1q_u64((void *)&rx_pkts[1]->rx_descriptor_fields1,
			pkt_mb[1]);
		vst1q_u64((void *)&rx_pkts[0]->rx_descriptor_fields1,
			pkt_mb[0]);

		pkt_mb[3] = vreinterpretq_u64_u8(vqtbl1q_u8(
				vreinterpretq_u8_u64(desc[1]), shuf_msk2));
		pkt_mb[2] = vreinterpretq_u64_u8(vqtbl1q_u8(
				vreinterpretq_u8_u64(desc[1]), shuf_msk1));
		pkt_mb[3] = vreinterpretq_u64_u16(vsubq_u16(
				vreinterpretq_u16_u64(pkt_mb[3]), len_adjust));
		pkt_mb[2] = vreinterpretq_u64_u16(vsubq_u16(
				vreinterpretq_u16_u64(pkt_mb[2]), len_adjust));
		vst1q_u64((void *)&rx_pkts[3]->rx_descriptor_fields1,
			pkt_mb[3]);
		vst1q_u64((void *)&rx_pkts[2]->rx_descriptor_fields1,
			pkt_mb[2]);

		pkt_mb[5] = vreinterpretq_u64_u8(vqtbl1q_u8(
				vreinterpretq_u8_u64(desc[2]), shuf_msk2));
		pkt_mb[4] = vreinterpretq_u64_u8(vqtbl1q_u8(
				vreinterpretq_u8_u64(desc[2]), shuf_msk1));
		pkt_mb[5] = vreinterpretq_u64_u16(vsubq_u16(
				vreinterpretq_u16_u64(pkt_mb[5]), len_adjust));
		pkt_mb[4] = vreinterpretq_u64_u16(vsubq_u16(
				vreinterpretq_u16_u64(pkt_mb[4]), len_adjust));
		vst1q_u64((void *)&rx_pkts[5]->rx_descriptor_fields1,
			pkt_mb[5]);
		vst1q_u64((void *)&rx_pkts[4]->rx_descriptor_fields1,
			pkt_mb[4]);

		pkt_mb[7] = vreinterpretq_u64_u8(vqtbl1q_u8(
				vreinterpretq_u8_u64(desc[3]), shuf_msk2));
		pkt_mb[6] = vreinterpretq_u64_u8(vqtbl1q_u8(
				vreinterpretq_u8_u64(desc[3]), shuf_msk1));
		pkt_mb[7] = vreinterpretq_u64_u16(vsubq_u16(
				vreinterpretq_u16_u64(pkt_mb[7]), len_adjust));
		pkt_mb[6] = vreinterpretq_u64_u16(vsubq_u16(
				vreinterpretq_u16_u64(pkt_mb[6]), len_adjust));
		vst1q_u64((void *)&rx_pkts[7]->rx_descriptor_fields1,
			pkt_mb[7]);
		vst1q_u64((void *)&rx_pkts[6]->rx_descriptor_fields1,
			pkt_mb[6]);

		if (unlikely(nb_used <= RTE_VIRTIO_DESC_PER_LOOP)) {
			if (sw_ring + nb_used <= sw_ring_end)
				nb_pkts_received += nb_used;
			else
				nb_pkts_received += sw_ring_end - sw_ring;
			break;
		} else {
			if (unlikely(sw_ring + RTE_VIRTIO_DESC_PER_LOOP >=
				sw_ring_end)) {
				nb_pkts_received += sw_ring_end - sw_ring;
				break;
			} else {
				nb_pkts_received += RTE_VIRTIO_DESC_PER_LOOP;

				rx_pkts += RTE_VIRTIO_DESC_PER_LOOP;
				sw_ring += RTE_VIRTIO_DESC_PER_LOOP;
				rused   += RTE_VIRTIO_DESC_PER_LOOP;
				nb_used -= RTE_VIRTIO_DESC_PER_LOOP;
			}
		}
	}

	vq->vq_used_cons_idx += nb_pkts_received;
	vq->vq_free_cnt += nb_pkts_received;
	rxvq->stats.packets += nb_pkts_received;
	return nb_pkts_received;
}
Beispiel #6
0
static int rpmsg_probe(struct virtio_device *vdev)
{
	vq_callback_t *vq_cbs[] = { rpmsg_recv_done, rpmsg_xmit_done };
	static const char * const names[] = { "input", "output" };
	struct virtqueue *vqs[2];
	struct virtproc_info *vrp;
	void *bufs_va;
	int err = 0, i;
	size_t total_buf_space;
	bool notify;

	vrp = kzalloc(sizeof(*vrp), GFP_KERNEL);
	if (!vrp)
		return -ENOMEM;

	vrp->vdev = vdev;

	idr_init(&vrp->endpoints);
	mutex_init(&vrp->endpoints_lock);
	mutex_init(&vrp->tx_lock);
	init_waitqueue_head(&vrp->sendq);

	/* We expect two virtqueues, rx and tx (and in this order) */
	err = vdev->config->find_vqs(vdev, 2, vqs, vq_cbs, names);
	if (err)
		goto free_vrp;

	vrp->rvq = vqs[0];
	vrp->svq = vqs[1];

	/* we expect symmetric tx/rx vrings */
	WARN_ON(virtqueue_get_vring_size(vrp->rvq) !=
		virtqueue_get_vring_size(vrp->svq));

	/* we need less buffers if vrings are small */
	if (virtqueue_get_vring_size(vrp->rvq) < MAX_RPMSG_NUM_BUFS / 2)
		vrp->num_bufs = virtqueue_get_vring_size(vrp->rvq) * 2;
	else
		vrp->num_bufs = MAX_RPMSG_NUM_BUFS;

	total_buf_space = vrp->num_bufs * RPMSG_BUF_SIZE;

	/* allocate coherent memory for the buffers */
	bufs_va = dma_alloc_coherent(vdev->dev.parent->parent,
				     total_buf_space, &vrp->bufs_dma,
				     GFP_KERNEL);
	if (!bufs_va) {
		err = -ENOMEM;
		goto vqs_del;
	}

	dev_dbg(&vdev->dev, "buffers: va %p, dma 0x%llx\n", bufs_va,
					(unsigned long long)vrp->bufs_dma);

	/* half of the buffers is dedicated for RX */
	vrp->rbufs = bufs_va;

	/* and half is dedicated for TX */
	vrp->sbufs = bufs_va + total_buf_space / 2;

	/* set up the receive buffers */
	for (i = 0; i < vrp->num_bufs / 2; i++) {
		struct scatterlist sg;
		void *cpu_addr = vrp->rbufs + i * RPMSG_BUF_SIZE;

		sg_init_one(&sg, cpu_addr, RPMSG_BUF_SIZE);

		err = virtqueue_add_inbuf(vrp->rvq, &sg, 1, cpu_addr,
								GFP_KERNEL);
		WARN_ON(err); /* sanity check; this can't really happen */
	}

	/* suppress "tx-complete" interrupts */
	virtqueue_disable_cb(vrp->svq);

	vdev->priv = vrp;

	/* if supported by the remote processor, enable the name service */
	if (virtio_has_feature(vdev, VIRTIO_RPMSG_F_NS)) {
		/* a dedicated endpoint handles the name service msgs */
		vrp->ns_ept = __rpmsg_create_ept(vrp, NULL, rpmsg_ns_cb,
						vrp, RPMSG_NS_ADDR);
		if (!vrp->ns_ept) {
			dev_err(&vdev->dev, "failed to create the ns ept\n");
			err = -ENOMEM;
			goto free_coherent;
		}
	}

	/*
	 * Prepare to kick but don't notify yet - we can't do this before
	 * device is ready.
	 */
	notify = virtqueue_kick_prepare(vrp->rvq);

	/* From this point on, we can notify and get callbacks. */
	virtio_device_ready(vdev);

	/* tell the remote processor it can start sending messages */
	/*
	 * this might be concurrent with callbacks, but we are only
	 * doing notify, not a full kick here, so that's ok.
	 */
	if (notify)
		virtqueue_notify(vrp->rvq);

	dev_info(&vdev->dev, "rpmsg host is online\n");

	return 0;

free_coherent:
	dma_free_coherent(vdev->dev.parent->parent, total_buf_space,
			  bufs_va, vrp->bufs_dma);
vqs_del:
	vdev->config->del_vqs(vrp->vdev);
free_vrp:
	kfree(vrp);
	return err;
}