Beispiel #1
0
/**
 * @brief                    Initiate a capture
 *
 * @param devId              Port number
 * @param captureData        A pointer to the capture buffer
 * @param captureDataLength  Size of a capture buffer
 *
 * @return                   true on success
 */
bool DPDKAdapter::startRx(uint8_t devId, char *captureData, unsigned int captureDataLength)
{
    if(devId > RTE_MAX_ETHPORTS)
    {
        qCritical("Device ID is out of range");
        return false;
    }

    qDebug("devId %u, allocated a capture buffer of size %u bytes ", devId, captureDataLength);

    memset(captureData, 0, captureDataLength);

    DeviceInfo& devInfo = devices[devId];

    devInfo.captureDataLength = captureDataLength;
    devInfo.captureDataSize = 0;
    devInfo.captureData = captureData;

    // store the number of used descriptors in RX ring
    devInfo.rxQueueCount = rte_eth_rx_queue_count(devId, 0);
    qDebug("RX queue 0 count %d\n", devInfo.rxQueueCount);

    devInfo.rxDevStart = true;

    devInfo.rxTicksStart = rte_get_tsc_cycles();

    rte_mb();

    return true;
}
Beispiel #2
0
static void
check_io(void)
{
	uint64_t end, tsc_complete;
	rte_mb();
#if HAVE_LIBAIO
	if (g_ns->type == ENTRY_TYPE_AIO_FILE) {
		aio_check_io();
	} else
#endif
	{
		spdk_nvme_qpair_process_completions(g_ns->u.nvme.qpair, 0);
	}
	rte_mb();
	end = rte_get_tsc_cycles();
	if (g_ns->current_queue_depth == 1) {
		/*
		 * Account for race condition in AIO case where interrupt occurs
		 *  after checking for queue depth.  If the timestamp capture
		 *  is too big compared to the last capture, assume that an
		 *  interrupt fired, and do not bump the start tsc forward.  This
		 *  will ensure this extra time is accounted for next time through
		 *  when we see current_queue_depth drop to 0.
		 */
		if (g_ns->type == ENTRY_TYPE_NVME_NS || (end - g_complete_tsc_start) < 500) {
			g_complete_tsc_start = end;
		}
	} else {
		tsc_complete = end - g_complete_tsc_start;
		g_tsc_complete += tsc_complete;
		if (tsc_complete < g_tsc_complete_min) {
			g_tsc_complete_min = tsc_complete;
		}
		if (tsc_complete > g_tsc_complete_max) {
			g_tsc_complete_max = tsc_complete;
		}
		g_io_completed++;
		if (!g_ns->is_draining) {
			submit_single_io();
		}
		g_complete_tsc_start = rte_get_tsc_cycles();
	}
}
Beispiel #3
0
static void
submit_single_io(void)
{
	uint64_t		offset_in_ios;
	uint64_t		start;
	int			rc;
	struct ns_entry		*entry = g_ns;
	uint64_t		tsc_submit;

	offset_in_ios = rand_r(&seed) % entry->size_in_ios;

	start = rte_get_tsc_cycles();
	rte_mb();
#if HAVE_LIBAIO
	if (entry->type == ENTRY_TYPE_AIO_FILE) {
		rc = aio_submit(g_ns->u.aio.ctx, &g_task->iocb, entry->u.aio.fd, IO_CMD_PREAD, g_task->buf,
				g_io_size_bytes, offset_in_ios * g_io_size_bytes, g_task);
	} else
#endif
	{
		rc = spdk_nvme_ns_cmd_read(entry->u.nvme.ns, g_ns->u.nvme.qpair, g_task->buf,
					   offset_in_ios * entry->io_size_blocks,
					   entry->io_size_blocks, io_complete, g_task, 0);
	}

	rte_mb();
	tsc_submit = rte_get_tsc_cycles() - start;
	g_tsc_submit += tsc_submit;
	if (tsc_submit < g_tsc_submit_min) {
		g_tsc_submit_min = tsc_submit;
	}
	if (tsc_submit > g_tsc_submit_max) {
		g_tsc_submit_max = tsc_submit;
	}

	if (rc != 0) {
		fprintf(stderr, "starting I/O failed\n");
	}

	g_ns->current_queue_depth++;
}
Beispiel #4
0
/**
 * @brief           Check if capture is started on the port
 *
 * @param   devId   Port number
 *
 * @return          true if started
 */
bool DPDKAdapter::isRxStarted(uint8_t devId)
{
    if(devId > RTE_MAX_ETHPORTS)
    {
        qCritical("Device ID is out of range");
        return false;
    }

    rte_mb();
    DeviceInfo& devInfo = devices[devId];
    return devInfo.rxDevStart;
}
Beispiel #5
0
/**
 * @brief           Start TX on device
 *
 * @param devId     Port number
 *
 * @return          true on success
 */
bool DPDKAdapter::startTx(uint8_t devId)
{
    if(devId > RTE_MAX_ETHPORTS)
    {
        qCritical("Device ID is out of range");
        return false;
    }

    qDebug("devId %u", devId);

    DeviceInfo& devInfo = devices[devId];
    devInfo.txDevStart = true;
    rte_mb();

    return true;
}
Beispiel #6
0
/**
 * @brief                   Stop a capture
 *
 * @param devId             Port number
 * @param pCaptureDataSize  Number of captured bytes
 *
 * @return                  true on success
 */
bool DPDKAdapter::stopRx(uint8_t devId, unsigned int *pCaptureDataSize)
{
    if(devId > RTE_MAX_ETHPORTS)
    {
        qCritical("Device ID is out of range");
        return false;
    }

    qDebug("devId %u", devId);

    DeviceInfo& devInfo = devices[devId];
    devInfo.rxDevStart = false;
    *pCaptureDataSize = devInfo.captureDataSize;

    rte_mb();

    qDebug("Captured %u bytes", devInfo.captureDataSize);

    return true;
}
Beispiel #7
0
static int vmbus_read_and_signal(struct vmbus_channel *chan,
				 void *data, size_t dlen, size_t skip)
{
	struct vmbus_br *rbr = &chan->rxbr;
	uint32_t write_sz, pending_sz, bytes_read;
	int error;

	/* Record where host was when we started read (for debug) */
	rbr->windex = rbr->vbr->windex;

	/* Read data and skip packet header */
	error = vmbus_rxbr_read(rbr, data, dlen, skip);
	if (error)
		return error;

	/* No need for signaling on older versions */
	if (!rbr->vbr->feature_bits.feat_pending_send_sz)
		return 0;

	/* Make sure reading of pending happens after new read index */
	rte_mb();

	pending_sz = rbr->vbr->pending_send;
	if (!pending_sz)
		return 0;

	rte_smp_rmb();
	write_sz = vmbus_br_availwrite(rbr, rbr->vbr->windex);
	bytes_read = dlen + skip + sizeof(uint64_t);

	/* If there was space before then host was not blocked */
	if (write_sz - bytes_read > pending_sz)
		return 0;

	/* If pending write will not fit */
	if (write_sz <= pending_sz)
		return 0;

	vmbus_set_event(chan->device, chan);
	return 0;
}
Beispiel #8
0
virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
	struct rte_mbuf **pkts, uint32_t count)
{
	struct vhost_virtqueue *vq;
	struct vring_desc *desc;
	struct rte_mbuf *buff;
	/* The virtio_hdr is initialised to 0. */
	struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0};
	uint64_t buff_addr = 0;
	uint64_t buff_hdr_addr = 0;
	uint32_t head[MAX_PKT_BURST];
	uint32_t head_idx, packet_success = 0;
	uint16_t avail_idx, res_cur_idx;
	uint16_t res_base_idx, res_end_idx;
	uint16_t free_entries;
	uint8_t success = 0;

	LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_rx()\n", dev->device_fh);
	if (unlikely(queue_id != VIRTIO_RXQ)) {
		LOG_DEBUG(VHOST_DATA, "mq isn't supported in this version.\n");
		return 0;
	}

	vq = dev->virtqueue[VIRTIO_RXQ];
	count = (count > MAX_PKT_BURST) ? MAX_PKT_BURST : count;

	/*
	 * As many data cores may want access to available buffers,
	 * they need to be reserved.
	 */
	do {
		res_base_idx = vq->last_used_idx_res;
		avail_idx = *((volatile uint16_t *)&vq->avail->idx);

		free_entries = (avail_idx - res_base_idx);
		/*check that we have enough buffers*/
		if (unlikely(count > free_entries))
			count = free_entries;

		if (count == 0)
			return 0;

		res_end_idx = res_base_idx + count;
		/* vq->last_used_idx_res is atomically updated. */
		/* TODO: Allow to disable cmpset if no concurrency in application. */
		success = rte_atomic16_cmpset(&vq->last_used_idx_res,
				res_base_idx, res_end_idx);
	} while (unlikely(success == 0));
	res_cur_idx = res_base_idx;
	LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Current Index %d| End Index %d\n",
			dev->device_fh, res_cur_idx, res_end_idx);

	/* Prefetch available ring to retrieve indexes. */
	rte_prefetch0(&vq->avail->ring[res_cur_idx & (vq->size - 1)]);

	/* Retrieve all of the head indexes first to avoid caching issues. */
	for (head_idx = 0; head_idx < count; head_idx++)
		head[head_idx] = vq->avail->ring[(res_cur_idx + head_idx) &
					(vq->size - 1)];

	/*Prefetch descriptor index. */
	rte_prefetch0(&vq->desc[head[packet_success]]);

	while (res_cur_idx != res_end_idx) {
		uint32_t offset = 0, vb_offset = 0;
		uint32_t pkt_len, len_to_cpy, data_len, total_copied = 0;
		uint8_t hdr = 0, uncompleted_pkt = 0;

		/* Get descriptor from available ring */
		desc = &vq->desc[head[packet_success]];

		buff = pkts[packet_success];

		/* Convert from gpa to vva (guest physical addr -> vhost virtual addr) */
		buff_addr = gpa_to_vva(dev, desc->addr);
		/* Prefetch buffer address. */
		rte_prefetch0((void *)(uintptr_t)buff_addr);

		/* Copy virtio_hdr to packet and increment buffer address */
		buff_hdr_addr = buff_addr;

		/*
		 * If the descriptors are chained the header and data are
		 * placed in separate buffers.
		 */
		if ((desc->flags & VRING_DESC_F_NEXT) &&
			(desc->len == vq->vhost_hlen)) {
			desc = &vq->desc[desc->next];
			/* Buffer address translation. */
			buff_addr = gpa_to_vva(dev, desc->addr);
		} else {
			vb_offset += vq->vhost_hlen;
			hdr = 1;
		}

		pkt_len = rte_pktmbuf_pkt_len(buff);
		data_len = rte_pktmbuf_data_len(buff);
		len_to_cpy = RTE_MIN(data_len,
			hdr ? desc->len - vq->vhost_hlen : desc->len);
		while (total_copied < pkt_len) {
			/* Copy mbuf data to buffer */
			rte_memcpy((void *)(uintptr_t)(buff_addr + vb_offset),
				(const void *)(rte_pktmbuf_mtod(buff, const char *) + offset),
				len_to_cpy);
			PRINT_PACKET(dev, (uintptr_t)(buff_addr + vb_offset),
				len_to_cpy, 0);

			offset += len_to_cpy;
			vb_offset += len_to_cpy;
			total_copied += len_to_cpy;

			/* The whole packet completes */
			if (total_copied == pkt_len)
				break;

			/* The current segment completes */
			if (offset == data_len) {
				buff = buff->next;
				offset = 0;
				data_len = rte_pktmbuf_data_len(buff);
			}

			/* The current vring descriptor done */
			if (vb_offset == desc->len) {
				if (desc->flags & VRING_DESC_F_NEXT) {
					desc = &vq->desc[desc->next];
					buff_addr = gpa_to_vva(dev, desc->addr);
					vb_offset = 0;
				} else {
					/* Room in vring buffer is not enough */
					uncompleted_pkt = 1;
					break;
				}
			}
			len_to_cpy = RTE_MIN(data_len - offset, desc->len - vb_offset);
		};

		/* Update used ring with desc information */
		vq->used->ring[res_cur_idx & (vq->size - 1)].id =
							head[packet_success];

		/* Drop the packet if it is uncompleted */
		if (unlikely(uncompleted_pkt == 1))
			vq->used->ring[res_cur_idx & (vq->size - 1)].len =
							vq->vhost_hlen;
		else
			vq->used->ring[res_cur_idx & (vq->size - 1)].len =
							pkt_len + vq->vhost_hlen;

		res_cur_idx++;
		packet_success++;

		if (unlikely(uncompleted_pkt == 1))
			continue;

		rte_memcpy((void *)(uintptr_t)buff_hdr_addr,
			(const void *)&virtio_hdr, vq->vhost_hlen);

		PRINT_PACKET(dev, (uintptr_t)buff_hdr_addr, vq->vhost_hlen, 1);

		if (res_cur_idx < res_end_idx) {
			/* Prefetch descriptor index. */
			rte_prefetch0(&vq->desc[head[packet_success]]);
		}
	}

	rte_compiler_barrier();

	/* Wait until it's our turn to add our buffer to the used ring. */
	while (unlikely(vq->last_used_idx != res_base_idx))
		rte_pause();

	*(volatile uint16_t *)&vq->used->idx += count;
	vq->last_used_idx = res_end_idx;

	/* flush used->idx update before we read avail->flags. */
	rte_mb();

	/* Kick the guest if necessary. */
	if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))
		eventfd_write((int)vq->callfd, 1);
	return count;
}
Beispiel #9
0
virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id,
	struct rte_mbuf **pkts, uint32_t count)
{
	struct vhost_virtqueue *vq;
	uint32_t pkt_idx = 0, entry_success = 0;
	uint16_t avail_idx;
	uint16_t res_base_idx, res_cur_idx;
	uint8_t success = 0;

	LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_merge_rx()\n",
		dev->device_fh);
	if (unlikely(queue_id != VIRTIO_RXQ)) {
		LOG_DEBUG(VHOST_DATA, "mq isn't supported in this version.\n");
	}

	vq = dev->virtqueue[VIRTIO_RXQ];
	count = RTE_MIN((uint32_t)MAX_PKT_BURST, count);

	if (count == 0)
		return 0;

	for (pkt_idx = 0; pkt_idx < count; pkt_idx++) {
		uint32_t pkt_len = pkts[pkt_idx]->pkt_len + vq->vhost_hlen;

		do {
			/*
			 * As many data cores may want access to available
			 * buffers, they need to be reserved.
			 */
			uint32_t secure_len = 0;
			uint32_t vec_idx = 0;

			res_base_idx = vq->last_used_idx_res;
			res_cur_idx = res_base_idx;

			do {
				avail_idx = *((volatile uint16_t *)&vq->avail->idx);
				if (unlikely(res_cur_idx == avail_idx)) {
					LOG_DEBUG(VHOST_DATA,
						"(%"PRIu64") Failed "
						"to get enough desc from "
						"vring\n",
						dev->device_fh);
					return pkt_idx;
				} else {
					update_secure_len(vq, res_cur_idx, &secure_len, &vec_idx);
					res_cur_idx++;
				}
			} while (pkt_len > secure_len);

			/* vq->last_used_idx_res is atomically updated. */
			success = rte_atomic16_cmpset(&vq->last_used_idx_res,
							res_base_idx,
							res_cur_idx);
		} while (success == 0);

		entry_success = copy_from_mbuf_to_vring(dev, res_base_idx,
			res_cur_idx, pkts[pkt_idx]);

		rte_compiler_barrier();

		/*
		 * Wait until it's our turn to add our buffer
		 * to the used ring.
		 */
		while (unlikely(vq->last_used_idx != res_base_idx))
			rte_pause();

		*(volatile uint16_t *)&vq->used->idx += entry_success;
		vq->last_used_idx = res_cur_idx;

		/* flush used->idx update before we read avail->flags. */
		rte_mb();

		/* Kick the guest if necessary. */
		if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))
			eventfd_write((int)vq->callfd, 1);
	}

	return count;
}
Beispiel #10
0
/**
 * Configure secondary process queues from a private data pointer (primary
 * or secondary) and update burst callbacks. Can take place only once.
 *
 * All queues must have been previously created by the primary process to
 * avoid undefined behavior.
 *
 * @param priv
 *   Private data pointer from either primary or secondary process.
 *
 * @return
 *   Private data pointer from secondary process, NULL in case of error.
 */
struct priv *
mlx5_secondary_data_setup(struct priv *priv)
{
	unsigned int port_id = 0;
	struct mlx5_secondary_data *sd;
	void **tx_queues;
	void **rx_queues;
	unsigned int nb_tx_queues;
	unsigned int nb_rx_queues;
	unsigned int i;

	/* priv must be valid at this point. */
	assert(priv != NULL);
	/* priv->dev must also be valid but may point to local memory from
	 * another process, possibly with the same address and must not
	 * be dereferenced yet. */
	assert(priv->dev != NULL);
	/* Determine port ID by finding out where priv comes from. */
	while (1) {
		sd = &mlx5_secondary_data[port_id];
		rte_spinlock_lock(&sd->lock);
		/* Primary process? */
		if (sd->primary_priv == priv)
			break;
		/* Secondary process? */
		if (sd->data.dev_private == priv)
			break;
		rte_spinlock_unlock(&sd->lock);
		if (++port_id == RTE_DIM(mlx5_secondary_data))
			port_id = 0;
	}
	/* Switch to secondary private structure. If private data has already
	 * been updated by another thread, there is nothing else to do. */
	priv = sd->data.dev_private;
	if (priv->dev->data == &sd->data)
		goto end;
	/* Sanity checks. Secondary private structure is supposed to point
	 * to local eth_dev, itself still pointing to the shared device data
	 * structure allocated by the primary process. */
	assert(sd->shared_dev_data != &sd->data);
	assert(sd->data.nb_tx_queues == 0);
	assert(sd->data.tx_queues == NULL);
	assert(sd->data.nb_rx_queues == 0);
	assert(sd->data.rx_queues == NULL);
	assert(priv != sd->primary_priv);
	assert(priv->dev->data == sd->shared_dev_data);
	assert(priv->txqs_n == 0);
	assert(priv->txqs == NULL);
	assert(priv->rxqs_n == 0);
	assert(priv->rxqs == NULL);
	nb_tx_queues = sd->shared_dev_data->nb_tx_queues;
	nb_rx_queues = sd->shared_dev_data->nb_rx_queues;
	/* Allocate local storage for queues. */
	tx_queues = rte_zmalloc("secondary ethdev->tx_queues",
				sizeof(sd->data.tx_queues[0]) * nb_tx_queues,
				RTE_CACHE_LINE_SIZE);
	rx_queues = rte_zmalloc("secondary ethdev->rx_queues",
				sizeof(sd->data.rx_queues[0]) * nb_rx_queues,
				RTE_CACHE_LINE_SIZE);
	if (tx_queues == NULL || rx_queues == NULL)
		goto error;
	/* Lock to prevent control operations during setup. */
	priv_lock(priv);
	/* TX queues. */
	for (i = 0; i != nb_tx_queues; ++i) {
		struct txq *primary_txq = (*sd->primary_priv->txqs)[i];
		struct txq *txq;

		if (primary_txq == NULL)
			continue;
		txq = rte_calloc_socket("TXQ", 1, sizeof(*txq), 0,
					primary_txq->socket);
		if (txq != NULL) {
			if (txq_setup(priv->dev,
				      txq,
				      primary_txq->elts_n * MLX5_PMD_SGE_WR_N,
				      primary_txq->socket,
				      NULL) == 0) {
				txq->stats.idx = primary_txq->stats.idx;
				tx_queues[i] = txq;
				continue;
			}
			rte_free(txq);
		}
		while (i) {
			txq = tx_queues[--i];
			txq_cleanup(txq);
			rte_free(txq);
		}
		goto error;
	}
	/* RX queues. */
	for (i = 0; i != nb_rx_queues; ++i) {
		struct rxq *primary_rxq = (*sd->primary_priv->rxqs)[i];

		if (primary_rxq == NULL)
			continue;
		/* Not supported yet. */
		rx_queues[i] = NULL;
	}
	/* Update everything. */
	priv->txqs = (void *)tx_queues;
	priv->txqs_n = nb_tx_queues;
	priv->rxqs = (void *)rx_queues;
	priv->rxqs_n = nb_rx_queues;
	sd->data.rx_queues = rx_queues;
	sd->data.tx_queues = tx_queues;
	sd->data.nb_rx_queues = nb_rx_queues;
	sd->data.nb_tx_queues = nb_tx_queues;
	sd->data.dev_link = sd->shared_dev_data->dev_link;
	sd->data.mtu = sd->shared_dev_data->mtu;
	memcpy(sd->data.rx_queue_state, sd->shared_dev_data->rx_queue_state,
	       sizeof(sd->data.rx_queue_state));
	memcpy(sd->data.tx_queue_state, sd->shared_dev_data->tx_queue_state,
	       sizeof(sd->data.tx_queue_state));
	sd->data.dev_flags = sd->shared_dev_data->dev_flags;
	/* Use local data from now on. */
	rte_mb();
	priv->dev->data = &sd->data;
	rte_mb();
	priv->dev->tx_pkt_burst = mlx5_tx_burst;
	priv->dev->rx_pkt_burst = removed_rx_burst;
	priv_unlock(priv);
end:
	/* More sanity checks. */
	assert(priv->dev->tx_pkt_burst == mlx5_tx_burst);
	assert(priv->dev->rx_pkt_burst == removed_rx_burst);
	assert(priv->dev->data == &sd->data);
	rte_spinlock_unlock(&sd->lock);
	return priv;
error:
	priv_unlock(priv);
	rte_free(tx_queues);
	rte_free(rx_queues);
	rte_spinlock_unlock(&sd->lock);
	return NULL;
}
dpdk_virtio_dev_to_vm_tx_burst(struct dpdk_virtio_writer *p,
        vr_dpdk_virtioq_t *vq, struct rte_mbuf **pkts, uint32_t count)
{
    struct vring_desc *desc;
    struct rte_mbuf *buff;
    /* The virtio_hdr is initialised to 0. */
    struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0};
    uint64_t buff_addr = 0;
    uint64_t buff_hdr_addr = 0;
    uint32_t head[VR_DPDK_VIRTIO_TX_BURST_SZ];
    uint32_t head_idx, packet_success = 0;
    uint16_t avail_idx, res_cur_idx;
    uint16_t res_base_idx, res_end_idx;
    uint16_t free_entries;
    uint8_t success = 0;
    vr_uvh_client_t *vru_cl;

    if (unlikely(vq->vdv_ready_state == VQ_NOT_READY))
        return 0;

    vru_cl = vr_dpdk_virtio_get_vif_client(vq->vdv_vif_idx);
    if (unlikely(vru_cl == NULL))
        return 0;

    /*
     * As many data cores may want access to available buffers,
     * they need to be reserved.
     */
    do {
        res_base_idx = vq->vdv_last_used_idx_res;
        avail_idx = *((volatile uint16_t *)&vq->vdv_avail->idx);

        free_entries = (avail_idx - res_base_idx);
        /*check that we have enough buffers*/
        if (unlikely(count > free_entries))
            count = free_entries;

        if (unlikely(count == 0))
            return 0;

        res_end_idx = res_base_idx + count;
        /* vq->vdv_last_used_idx_res is atomically updated. */
        /* TODO: Allow to disable cmpset if no concurrency in application. */
        success = rte_atomic16_cmpset(&vq->vdv_last_used_idx_res,
                res_base_idx, res_end_idx);
    } while (unlikely(success == 0));
    res_cur_idx = res_base_idx;
    RTE_LOG(DEBUG, VROUTER, "%s: Current Index %d| End Index %d\n",
            __func__, res_cur_idx, res_end_idx);

    /* Prefetch available ring to retrieve indexes. */
    rte_prefetch0(&vq->vdv_avail->ring[res_cur_idx & (vq->vdv_size - 1)]);

    /* Retrieve all of the head indexes first to avoid caching issues. */
    for (head_idx = 0; head_idx < count; head_idx++)
        head[head_idx] = vq->vdv_avail->ring[(res_cur_idx + head_idx) &
                    (vq->vdv_size - 1)];

    /* Prefetch descriptor index. */
    rte_prefetch0(&vq->vdv_desc[head[packet_success]]);

    while (res_cur_idx != res_end_idx) {
        uint32_t offset = 0, vb_offset = 0;
        uint32_t pkt_len, len_to_cpy, data_len, total_copied = 0;
        uint8_t hdr = 0, uncompleted_pkt = 0;

        /* Get descriptor from available ring */
        desc = &vq->vdv_desc[head[packet_success]];

        buff = pkts[packet_success];

        /* Convert from gpa to vva (guest physical addr -> vhost virtual addr) */
        buff_addr = (uintptr_t)vr_dpdk_guest_phys_to_host_virt(vru_cl, desc->addr);
        /* Prefetch buffer address. */
        rte_prefetch0((void *)(uintptr_t)buff_addr);

        /* Copy virtio_hdr to packet and increment buffer address */
        buff_hdr_addr = buff_addr;

        /*
         * If the descriptors are chained the header and data are
         * placed in separate buffers.
         */
        if (likely(desc->flags & VRING_DESC_F_NEXT)
            && (desc->len == sizeof(struct virtio_net_hdr))) {
            /*
             * TODO: verify that desc->next is sane below.
             */
            desc = &vq->vdv_desc[desc->next];
            /* Buffer address translation. */
            buff_addr = (uintptr_t)vr_dpdk_guest_phys_to_host_virt(vru_cl, desc->addr);
        } else {
            vb_offset += sizeof(struct virtio_net_hdr);
            hdr = 1;
        }

        pkt_len = rte_pktmbuf_pkt_len(buff);
        data_len = rte_pktmbuf_data_len(buff);
        len_to_cpy = RTE_MIN(data_len,
            hdr ? desc->len - sizeof(struct virtio_net_hdr) : desc->len);
        while (total_copied < pkt_len) {
            /* Copy mbuf data to buffer */
            rte_memcpy((void *)(uintptr_t)(buff_addr + vb_offset),
                rte_pktmbuf_mtod_offset(buff, const void *, offset),
                len_to_cpy);

            offset += len_to_cpy;
            vb_offset += len_to_cpy;
            total_copied += len_to_cpy;

            /* The whole packet completes */
            if (likely(total_copied == pkt_len))
                break;

            /* The current segment completes */
            if (offset == data_len) {
                buff = buff->next;
                offset = 0;
                data_len = rte_pktmbuf_data_len(buff);
            }

            /* The current vring descriptor done */
            if (vb_offset == desc->len) {
                if (desc->flags & VRING_DESC_F_NEXT) {
                    desc = &vq->vdv_desc[desc->next];
                    buff_addr = (uintptr_t)vr_dpdk_guest_phys_to_host_virt(vru_cl, desc->addr);
                    vb_offset = 0;
                } else {
                    /* Room in vring buffer is not enough */
                    uncompleted_pkt = 1;
                    break;
                }
            }
            len_to_cpy = RTE_MIN(data_len - offset, desc->len - vb_offset);
        };

        /* Update used ring with desc information */
        vq->vdv_used->ring[res_cur_idx & (vq->vdv_size - 1)].id =
                            head[packet_success];

        /* Drop the packet if it is uncompleted */
        if (unlikely(uncompleted_pkt == 1))
            vq->vdv_used->ring[res_cur_idx & (vq->vdv_size - 1)].len =
                            sizeof(struct virtio_net_hdr);
        else
            vq->vdv_used->ring[res_cur_idx & (vq->vdv_size - 1)].len =
                            pkt_len + sizeof(struct virtio_net_hdr);

        res_cur_idx++;
        packet_success++;

        /* TODO: in DPDK 2.1 we do not copy the header
        if (unlikely(uncompleted_pkt == 1))
            continue;
        */
        rte_memcpy((void *)(uintptr_t)buff_hdr_addr,
            (const void *)&virtio_hdr, sizeof(struct virtio_net_hdr));

        if (likely(res_cur_idx < res_end_idx)) {
            /* Prefetch descriptor index. */
            rte_prefetch0(&vq->vdv_desc[head[packet_success]]);
        }
    }

    rte_compiler_barrier();

    /* Wait until it's our turn to add our buffer to the used ring. */
    while (unlikely(vq->vdv_last_used_idx != res_base_idx))
        rte_pause();

    *(volatile uint16_t *)&vq->vdv_used->idx += count;
    vq->vdv_last_used_idx = res_end_idx;
    RTE_LOG(DEBUG, VROUTER, "%s: vif %d vq %p last_used_idx %d used->idx %d\n",
            __func__, vq->vdv_vif_idx, vq, vq->vdv_last_used_idx, vq->vdv_used->idx);

    /* flush used->idx update before we read avail->flags. */
    rte_mb();

    /* Kick the guest if necessary. */
    if (unlikely(!(vq->vdv_avail->flags & VRING_AVAIL_F_NO_INTERRUPT))) {
        p->nb_syscalls++;
        eventfd_write(vq->vdv_callfd, 1);
    }
    return count;
}
Beispiel #12
0
/**
 * @brief           Check if RX is started
 *
 * @return          true if started
 */
bool DPDKAdapter::isRxStarted()
{
    rte_mb();
    return rxGlobalStart;
}
Beispiel #13
0
/**
 * @brief           Stop RX
 */
void DPDKAdapter::stopRx()
{
    rxGlobalStart = false;
    rte_mb();
}
Beispiel #14
0
/**
 * @brief           Start RX
 */
void DPDKAdapter::startRx()
{
    rxGlobalStart = true;
    rte_mb();
}
Beispiel #15
0
/**
 * @brief           Start TX
 */
void DPDKAdapter::startTx()
{
    txGlobalStart = true;
    rte_mb();
}