Пример #1
1
/* test case to time the number of cycles to round-trip a cache line between
 * two cores and back again.
 */
static void
time_cache_line_switch(void)
{
	/* allocate a full cache line for data, we use only first byte of it */
	uint64_t data[RTE_CACHE_LINE_SIZE*3 / sizeof(uint64_t)];

	unsigned i, slaveid = rte_get_next_lcore(rte_lcore_id(), 0, 0);
	volatile uint64_t *pdata = &data[0];
	*pdata = 1;
	rte_eal_remote_launch((lcore_function_t *)flip_bit, &data[0], slaveid);
	while (*pdata)
		rte_pause();

	const uint64_t start_time = rte_rdtsc();
	for (i = 0; i < (1 << ITER_POWER); i++) {
		while (*pdata)
			rte_pause();
		*pdata = 1;
	}
	const uint64_t end_time = rte_rdtsc();

	while (*pdata)
		rte_pause();
	*pdata = 2;
	rte_eal_wait_lcore(slaveid);
	printf("==== Cache line switch test ===\n");
	printf("Time for %u iterations = %"PRIu64" ticks\n", (1<<ITER_POWER),
			end_time-start_time);
	printf("Ticks per iteration = %"PRIu64"\n\n",
			(end_time-start_time) >> ITER_POWER);
}
Пример #2
0
static int
order_queue_worker(void *arg)
{
	ORDER_WORKER_INIT;
	struct rte_event ev;

	while (t->err == false) {
		uint16_t event = rte_event_dequeue_burst(dev_id, port,
					&ev, 1, 0);
		if (!event) {
			if (rte_atomic64_read(outstand_pkts) <= 0)
				break;
			rte_pause();
			continue;
		}

		if (ev.queue_id == 0) { /* from ordered queue */
			order_queue_process_stage_0(&ev);
			while (rte_event_enqueue_burst(dev_id, port, &ev, 1)
					!= 1)
				rte_pause();
		} else if (ev.queue_id == 1) { /* from atomic queue */
			order_process_stage_1(t, &ev, nb_flows,
					expected_flow_seq, outstand_pkts);
		} else {
			order_process_stage_invalid(t, &ev);
		}
	}
	return 0;
}
Пример #3
0
static int
perf_atq_worker(void *arg, const int enable_fwd_latency)
{
	PERF_WORKER_INIT;
	struct rte_event ev;

	while (t->done == false) {
		uint16_t event = rte_event_dequeue_burst(dev, port, &ev, 1, 0);

		if (!event) {
			rte_pause();
			continue;
		}

		if (enable_fwd_latency && !prod_timer_type)
		/* first stage in pipeline, mark ts to compute fwd latency */
			atq_mark_fwd_latency(&ev);

		/* last stage in pipeline */
		if (unlikely((ev.sub_event_type % nb_stages) == laststage)) {
			if (enable_fwd_latency)
				cnt = perf_process_last_stage_latency(pool,
					&ev, w, bufs, sz, cnt);
			else
				cnt = perf_process_last_stage(pool, &ev, w,
					 bufs, sz, cnt);
		} else {
			atq_fwd_event(&ev, sched_type_list, nb_stages);
			while (rte_event_enqueue_burst(dev, port, &ev, 1) != 1)
				rte_pause();
		}
	}
	return 0;
}
Пример #4
0
void
rte_distributor_request_pkt_v1705(struct rte_distributor *d,
		unsigned int worker_id, struct rte_mbuf **oldpkt,
		unsigned int count)
{
	struct rte_distributor_buffer *buf = &(d->bufs[worker_id]);
	unsigned int i;

	volatile int64_t *retptr64;

	if (unlikely(d->alg_type == RTE_DIST_ALG_SINGLE)) {
		rte_distributor_request_pkt_v20(d->d_v20,
			worker_id, oldpkt[0]);
		return;
	}

	retptr64 = &(buf->retptr64[0]);
	/* Spin while handshake bits are set (scheduler clears it) */
	while (unlikely(*retptr64 & RTE_DISTRIB_GET_BUF)) {
		rte_pause();
		uint64_t t = rte_rdtsc()+100;

		while (rte_rdtsc() < t)
			rte_pause();
	}

	/*
	 * OK, if we've got here, then the scheduler has just cleared the
	 * handshake bits. Populate the retptrs with returning packets.
	 */

	for (i = count; i < RTE_DIST_BURST_SIZE; i++)
		buf->retptr64[i] = 0;

	/* Set Return bit for each packet returned */
	for (i = count; i-- > 0; )
		buf->retptr64[i] =
			(((int64_t)(uintptr_t)(oldpkt[i])) <<
			RTE_DISTRIB_FLAG_BITS) | RTE_DISTRIB_RETURN_BUF;

	/*
	 * Finally, set the GET_BUF  to signal to distributor that cache
	 * line is ready for processing
	 */
	*retptr64 |= RTE_DISTRIB_GET_BUF;
}
void
rte_delay_us_block(unsigned int us)
{
	const uint64_t start = rte_get_timer_cycles();
	const uint64_t ticks = (uint64_t)us * rte_get_timer_hz() / 1E6;
	while ((rte_get_timer_cycles() - start) < ticks)
		rte_pause();
}
Пример #6
0
/**
 * @brief           Pause for a requested time in ns
 */
void DPDKAdapter::StreamInfo::nPause()
{
    const uint64_t start = rte_get_tsc_cycles();

    while ((rte_get_tsc_cycles() - start) < ticksDelay_)
    {
        rte_pause();
    }
}
Пример #7
0
/* worker thread used for testing the time to do a round-trip of a cache
 * line between two cores and back again
 */
static void
flip_bit(volatile uint64_t *arg)
{
	uint64_t old_val = 0;
	while (old_val != 2) {
		while (!*arg)
			rte_pause();
		old_val = *arg;
		*arg = 0;
	}
}
Пример #8
0
static int
perf_atq_worker_burst(void *arg, const int enable_fwd_latency)
{
	PERF_WORKER_INIT;
	uint16_t i;
	/* +1 to avoid prefetch out of array check */
	struct rte_event ev[BURST_SIZE + 1];

	while (t->done == false) {
		uint16_t const nb_rx = rte_event_dequeue_burst(dev, port, ev,
				BURST_SIZE, 0);

		if (!nb_rx) {
			rte_pause();
			continue;
		}

		for (i = 0; i < nb_rx; i++) {
			if (enable_fwd_latency && !prod_timer_type) {
				rte_prefetch0(ev[i+1].event_ptr);
				/* first stage in pipeline.
				 * mark time stamp to compute fwd latency
				 */
				atq_mark_fwd_latency(&ev[i]);
			}
			/* last stage in pipeline */
			if (unlikely((ev[i].sub_event_type % nb_stages)
						== laststage)) {
				if (enable_fwd_latency)
					cnt = perf_process_last_stage_latency(
						pool, &ev[i], w, bufs, sz, cnt);
				else
					cnt = perf_process_last_stage(pool,
						&ev[i], w, bufs, sz, cnt);

				ev[i].op = RTE_EVENT_OP_RELEASE;
			} else {
				atq_fwd_event(&ev[i], sched_type_list,
						nb_stages);
			}
		}

		uint16_t enq;

		enq = rte_event_enqueue_burst(dev, port, ev, nb_rx);
		while (enq < nb_rx) {
			enq += rte_event_enqueue_burst(dev, port,
							ev + enq, nb_rx - enq);
		}
	}
	return 0;
}
Пример #9
0
static int
test_misc(void)
{
	char memdump[] = "memdump_test";
	if (rte_bsf32(129))
		FAIL("rte_bsf32");

	rte_memdump(stdout, "test", memdump, sizeof(memdump));
	rte_hexdump(stdout, "test", memdump, sizeof(memdump));

	rte_pause();

	return 0;
}
Пример #10
0
static inline int
perf_producer(void *arg)
{
	struct prod_data *p  = arg;
	struct test_perf *t = p->t;
	struct evt_options *opt = t->opt;
	const uint8_t dev_id = p->dev_id;
	const uint8_t port = p->port_id;
	struct rte_mempool *pool = t->pool;
	const uint64_t nb_pkts = t->nb_pkts;
	const uint32_t nb_flows = t->nb_flows;
	uint32_t flow_counter = 0;
	uint64_t count = 0;
	struct perf_elt *m;
	struct rte_event ev;

	if (opt->verbose_level > 1)
		printf("%s(): lcore %d dev_id %d port=%d queue %d\n", __func__,
				rte_lcore_id(), dev_id, port, p->queue_id);

	ev.event = 0;
	ev.op = RTE_EVENT_OP_NEW;
	ev.queue_id = p->queue_id;
	ev.sched_type = t->opt->sched_type_list[0];
	ev.priority = RTE_EVENT_DEV_PRIORITY_NORMAL;
	ev.event_type =  RTE_EVENT_TYPE_CPU;
	ev.sub_event_type = 0; /* stage 0 */

	while (count < nb_pkts && t->done == false) {
		if (rte_mempool_get(pool, (void **)&m) < 0)
			continue;

		ev.flow_id = flow_counter++ % nb_flows;
		ev.event_ptr = m;
		m->timestamp = rte_get_timer_cycles();
		while (rte_event_enqueue_burst(dev_id, port, &ev, 1) != 1) {
			if (t->done)
				break;
			rte_pause();
			m->timestamp = rte_get_timer_cycles();
		}
		count++;
	}

	return 0;
}
Пример #11
0
static int
order_queue_worker_burst(void *arg)
{
	ORDER_WORKER_INIT;
	struct rte_event ev[BURST_SIZE];
	uint16_t i;

	while (t->err == false) {
		uint16_t const nb_rx = rte_event_dequeue_burst(dev_id, port, ev,
				BURST_SIZE, 0);

		if (nb_rx == 0) {
			if (rte_atomic64_read(outstand_pkts) <= 0)
				break;
			rte_pause();
			continue;
		}

		for (i = 0; i < nb_rx; i++) {
			if (ev[i].queue_id == 0) { /* from ordered queue */
				order_queue_process_stage_0(&ev[i]);
			} else if (ev[i].queue_id == 1) {/* from atomic queue */
				order_process_stage_1(t, &ev[i], nb_flows,
					expected_flow_seq, outstand_pkts);
				ev[i].op = RTE_EVENT_OP_RELEASE;
			} else {
				order_process_stage_invalid(t, &ev[i]);
			}
		}

		uint16_t enq;

		enq = rte_event_enqueue_burst(dev_id, port, ev, nb_rx);
		while (enq < nb_rx) {
			enq += rte_event_enqueue_burst(dev_id, port,
							ev + enq, nb_rx - enq);
		}
	}
	return 0;
}
Пример #12
0
/*
 * Remove a device from ovs_dpdk data path. Synchonization occurs through the
 * use of the lcore dev_removal_flag. Device is made volatile here to avoid
 * re-ordering of dev->remove=1 which can cause an infinite loop in the
 * rte_pause loop.
 */
static void
destroy_device (volatile struct virtio_net *dev)
{
	unsigned lcore;

	/* Remove device from ovs_dpdk port. */
	if (vport_vhost_down((struct virtio_net*) dev) < 0) {
		RTE_LOG(INFO, APP,
			"Device could not be removed from ovs_dpdk port %s\n",
			dev->port_name);
		dev->flags &= ~VIRTIO_DEV_RUNNING;
		return;
	}
	
	/* Set the dev_removal_flag on each lcore. */
	RTE_LCORE_FOREACH(lcore) {
		dev_removal_flag[lcore] = REQUEST_DEV_REMOVAL;
	}

	/*
	 * Once each core has set the dev_removal_flag to ACK_DEV_REMOVAL we can be sure that
	 * they can no longer access the device removed from the data path and that the devices
	 * are no longer in use.
	 */

	RTE_LCORE_FOREACH(lcore) {
		while (dev_removal_flag[lcore] != ACK_DEV_REMOVAL) {
			rte_pause();
		}
	}
	
	dev->flags &= ~VIRTIO_DEV_RUNNING;
	
	RTE_LOG(INFO, APP, "(%"PRIu64") Device has been removed from ovs_dpdk \
		            port %s\n", dev->device_fh, dev->port_name);
}
Пример #13
0
virtio_dev_rx(struct virtio_net *dev, struct rte_mbuf **pkts, uint32_t count)
{
	struct vhost_virtqueue *vq;
	struct vring_desc *desc;
	struct rte_mbuf *buff;
	/* The virtio_hdr is initialised to 0. */
	struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0,0,0,0,0,0},0};
	uint64_t buff_addr = 0;
	uint64_t buff_hdr_addr = 0;
	uint32_t head[MAX_PKT_BURST], packet_len = 0;
	uint32_t head_idx, packet_success = 0;
	uint16_t avail_idx, res_cur_idx;
	uint16_t res_base_idx, res_end_idx;
	uint16_t free_entries;
	uint8_t success = 0;

	LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_rx()\n", dev->device_fh);
	vq = dev->virtqueue_rx;
	count = (count > MAX_PKT_BURST) ? MAX_PKT_BURST : count;
	/* As many data cores may want access to available buffers, they need to be reserved. */
	do {

		res_base_idx = vq->last_used_idx_res;

		avail_idx = *((volatile uint16_t *)&vq->avail->idx);

		free_entries = (avail_idx - res_base_idx);

		/*check that we have enough buffers*/
		if (unlikely(count > free_entries))
			count = free_entries;

		if (count == 0)
			return 0;

		res_end_idx = res_base_idx + count;
		/* vq->last_used_idx_res is atomically updated. */
		success = rte_atomic16_cmpset(&vq->last_used_idx_res, res_base_idx,
									res_end_idx);
	} while (unlikely(success == 0));
	res_cur_idx = res_base_idx;
	LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Current Index %d| End Index %d\n", dev->device_fh, res_cur_idx, res_end_idx);

	/* Prefetch available ring to retrieve indexes. */
	rte_prefetch0(&vq->avail->ring[res_cur_idx & (vq->size - 1)]);

	/* Retrieve all of the head indexes first to avoid caching issues. */
	for (head_idx = 0; head_idx < count; head_idx++)
		head[head_idx] = vq->avail->ring[(res_cur_idx + head_idx) & (vq->size - 1)];

	/*Prefetch descriptor index. */
	rte_prefetch0(&vq->desc[head[packet_success]]);

	while (res_cur_idx != res_end_idx) {
		/* Get descriptor from available ring */
		desc = &vq->desc[head[packet_success]];
		/* Prefetch descriptor address. */
		rte_prefetch0(desc);

		buff = pkts[packet_success];

		/* Convert from gpa to vva (guest physical addr -> vhost virtual addr) */
		buff_addr = gpa_to_vva(dev, desc->addr);
		/* Prefetch buffer address. */
		rte_prefetch0((void*)(uintptr_t)buff_addr);

		{
			/* Copy virtio_hdr to packet and increment buffer address */
			buff_hdr_addr = buff_addr;
			packet_len = rte_pktmbuf_data_len(buff) + vq->vhost_hlen;

			/*
			 * If the descriptors are chained the header and data are placed in
			 * separate buffers.
			 */
			if (desc->flags & VRING_DESC_F_NEXT) {
				desc->len = vq->vhost_hlen;
				desc = &vq->desc[desc->next];
				/* Buffer address translation. */
				buff_addr = gpa_to_vva(dev, desc->addr);
				desc->len = rte_pktmbuf_data_len(buff);
			} else {
				buff_addr += vq->vhost_hlen;
				desc->len = packet_len;
			}
		}

		/* Update used ring with desc information */
		vq->used->ring[res_cur_idx & (vq->size - 1)].id = head[packet_success];
		vq->used->ring[res_cur_idx & (vq->size - 1)].len = packet_len;

		/* Copy mbuf data to buffer */
		rte_memcpy((void *)(uintptr_t)buff_addr, (const void*)buff->pkt.data, rte_pktmbuf_data_len(buff));

		res_cur_idx++;
		packet_success++;

		/* mergeable is disabled then a header is required per buffer. */
		rte_memcpy((void *)(uintptr_t)buff_hdr_addr, (const void*)&virtio_hdr, vq->vhost_hlen);
		if (res_cur_idx < res_end_idx) {
			/* Prefetch descriptor index. */
			rte_prefetch0(&vq->desc[head[packet_success]]);
		}
	}

	rte_compiler_barrier();

	/* Wait until it's our turn to add our buffer to the used ring. */
	while (unlikely(vq->last_used_idx != res_base_idx))
		rte_pause();

	*(volatile uint16_t *)&vq->used->idx += count;

	vq->last_used_idx = res_end_idx;

	return count;
}
Пример #14
0
virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
	struct rte_mbuf **pkts, uint32_t count)
{
	struct vhost_virtqueue *vq;
	struct vring_desc *desc;
	struct rte_mbuf *buff;
	/* The virtio_hdr is initialised to 0. */
	struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0};
	uint64_t buff_addr = 0;
	uint64_t buff_hdr_addr = 0;
	uint32_t head[MAX_PKT_BURST];
	uint32_t head_idx, packet_success = 0;
	uint16_t avail_idx, res_cur_idx;
	uint16_t res_base_idx, res_end_idx;
	uint16_t free_entries;
	uint8_t success = 0;

	LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_rx()\n", dev->device_fh);
	if (unlikely(queue_id != VIRTIO_RXQ)) {
		LOG_DEBUG(VHOST_DATA, "mq isn't supported in this version.\n");
		return 0;
	}

	vq = dev->virtqueue[VIRTIO_RXQ];
	count = (count > MAX_PKT_BURST) ? MAX_PKT_BURST : count;

	/*
	 * As many data cores may want access to available buffers,
	 * they need to be reserved.
	 */
	do {
		res_base_idx = vq->last_used_idx_res;
		avail_idx = *((volatile uint16_t *)&vq->avail->idx);

		free_entries = (avail_idx - res_base_idx);
		/*check that we have enough buffers*/
		if (unlikely(count > free_entries))
			count = free_entries;

		if (count == 0)
			return 0;

		res_end_idx = res_base_idx + count;
		/* vq->last_used_idx_res is atomically updated. */
		/* TODO: Allow to disable cmpset if no concurrency in application. */
		success = rte_atomic16_cmpset(&vq->last_used_idx_res,
				res_base_idx, res_end_idx);
	} while (unlikely(success == 0));
	res_cur_idx = res_base_idx;
	LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Current Index %d| End Index %d\n",
			dev->device_fh, res_cur_idx, res_end_idx);

	/* Prefetch available ring to retrieve indexes. */
	rte_prefetch0(&vq->avail->ring[res_cur_idx & (vq->size - 1)]);

	/* Retrieve all of the head indexes first to avoid caching issues. */
	for (head_idx = 0; head_idx < count; head_idx++)
		head[head_idx] = vq->avail->ring[(res_cur_idx + head_idx) &
					(vq->size - 1)];

	/*Prefetch descriptor index. */
	rte_prefetch0(&vq->desc[head[packet_success]]);

	while (res_cur_idx != res_end_idx) {
		uint32_t offset = 0, vb_offset = 0;
		uint32_t pkt_len, len_to_cpy, data_len, total_copied = 0;
		uint8_t hdr = 0, uncompleted_pkt = 0;

		/* Get descriptor from available ring */
		desc = &vq->desc[head[packet_success]];

		buff = pkts[packet_success];

		/* Convert from gpa to vva (guest physical addr -> vhost virtual addr) */
		buff_addr = gpa_to_vva(dev, desc->addr);
		/* Prefetch buffer address. */
		rte_prefetch0((void *)(uintptr_t)buff_addr);

		/* Copy virtio_hdr to packet and increment buffer address */
		buff_hdr_addr = buff_addr;

		/*
		 * If the descriptors are chained the header and data are
		 * placed in separate buffers.
		 */
		if ((desc->flags & VRING_DESC_F_NEXT) &&
			(desc->len == vq->vhost_hlen)) {
			desc = &vq->desc[desc->next];
			/* Buffer address translation. */
			buff_addr = gpa_to_vva(dev, desc->addr);
		} else {
			vb_offset += vq->vhost_hlen;
			hdr = 1;
		}

		pkt_len = rte_pktmbuf_pkt_len(buff);
		data_len = rte_pktmbuf_data_len(buff);
		len_to_cpy = RTE_MIN(data_len,
			hdr ? desc->len - vq->vhost_hlen : desc->len);
		while (total_copied < pkt_len) {
			/* Copy mbuf data to buffer */
			rte_memcpy((void *)(uintptr_t)(buff_addr + vb_offset),
				(const void *)(rte_pktmbuf_mtod(buff, const char *) + offset),
				len_to_cpy);
			PRINT_PACKET(dev, (uintptr_t)(buff_addr + vb_offset),
				len_to_cpy, 0);

			offset += len_to_cpy;
			vb_offset += len_to_cpy;
			total_copied += len_to_cpy;

			/* The whole packet completes */
			if (total_copied == pkt_len)
				break;

			/* The current segment completes */
			if (offset == data_len) {
				buff = buff->next;
				offset = 0;
				data_len = rte_pktmbuf_data_len(buff);
			}

			/* The current vring descriptor done */
			if (vb_offset == desc->len) {
				if (desc->flags & VRING_DESC_F_NEXT) {
					desc = &vq->desc[desc->next];
					buff_addr = gpa_to_vva(dev, desc->addr);
					vb_offset = 0;
				} else {
					/* Room in vring buffer is not enough */
					uncompleted_pkt = 1;
					break;
				}
			}
			len_to_cpy = RTE_MIN(data_len - offset, desc->len - vb_offset);
		};

		/* Update used ring with desc information */
		vq->used->ring[res_cur_idx & (vq->size - 1)].id =
							head[packet_success];

		/* Drop the packet if it is uncompleted */
		if (unlikely(uncompleted_pkt == 1))
			vq->used->ring[res_cur_idx & (vq->size - 1)].len =
							vq->vhost_hlen;
		else
			vq->used->ring[res_cur_idx & (vq->size - 1)].len =
							pkt_len + vq->vhost_hlen;

		res_cur_idx++;
		packet_success++;

		if (unlikely(uncompleted_pkt == 1))
			continue;

		rte_memcpy((void *)(uintptr_t)buff_hdr_addr,
			(const void *)&virtio_hdr, vq->vhost_hlen);

		PRINT_PACKET(dev, (uintptr_t)buff_hdr_addr, vq->vhost_hlen, 1);

		if (res_cur_idx < res_end_idx) {
			/* Prefetch descriptor index. */
			rte_prefetch0(&vq->desc[head[packet_success]]);
		}
	}

	rte_compiler_barrier();

	/* Wait until it's our turn to add our buffer to the used ring. */
	while (unlikely(vq->last_used_idx != res_base_idx))
		rte_pause();

	*(volatile uint16_t *)&vq->used->idx += count;
	vq->last_used_idx = res_end_idx;

	/* flush used->idx update before we read avail->flags. */
	rte_mb();

	/* Kick the guest if necessary. */
	if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))
		eventfd_write((int)vq->callfd, 1);
	return count;
}
Пример #15
0
virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id,
	struct rte_mbuf **pkts, uint32_t count)
{
	struct vhost_virtqueue *vq;
	uint32_t pkt_idx = 0, entry_success = 0;
	uint16_t avail_idx;
	uint16_t res_base_idx, res_cur_idx;
	uint8_t success = 0;

	LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_merge_rx()\n",
		dev->device_fh);
	if (unlikely(queue_id != VIRTIO_RXQ)) {
		LOG_DEBUG(VHOST_DATA, "mq isn't supported in this version.\n");
	}

	vq = dev->virtqueue[VIRTIO_RXQ];
	count = RTE_MIN((uint32_t)MAX_PKT_BURST, count);

	if (count == 0)
		return 0;

	for (pkt_idx = 0; pkt_idx < count; pkt_idx++) {
		uint32_t pkt_len = pkts[pkt_idx]->pkt_len + vq->vhost_hlen;

		do {
			/*
			 * As many data cores may want access to available
			 * buffers, they need to be reserved.
			 */
			uint32_t secure_len = 0;
			uint32_t vec_idx = 0;

			res_base_idx = vq->last_used_idx_res;
			res_cur_idx = res_base_idx;

			do {
				avail_idx = *((volatile uint16_t *)&vq->avail->idx);
				if (unlikely(res_cur_idx == avail_idx)) {
					LOG_DEBUG(VHOST_DATA,
						"(%"PRIu64") Failed "
						"to get enough desc from "
						"vring\n",
						dev->device_fh);
					return pkt_idx;
				} else {
					update_secure_len(vq, res_cur_idx, &secure_len, &vec_idx);
					res_cur_idx++;
				}
			} while (pkt_len > secure_len);

			/* vq->last_used_idx_res is atomically updated. */
			success = rte_atomic16_cmpset(&vq->last_used_idx_res,
							res_base_idx,
							res_cur_idx);
		} while (success == 0);

		entry_success = copy_from_mbuf_to_vring(dev, res_base_idx,
			res_cur_idx, pkts[pkt_idx]);

		rte_compiler_barrier();

		/*
		 * Wait until it's our turn to add our buffer
		 * to the used ring.
		 */
		while (unlikely(vq->last_used_idx != res_base_idx))
			rte_pause();

		*(volatile uint16_t *)&vq->used->idx += entry_success;
		vq->last_used_idx = res_cur_idx;

		/* flush used->idx update before we read avail->flags. */
		rte_mb();

		/* Kick the guest if necessary. */
		if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))
			eventfd_write((int)vq->callfd, 1);
	}

	return count;
}
dpdk_virtio_dev_to_vm_tx_burst(struct dpdk_virtio_writer *p,
        vr_dpdk_virtioq_t *vq, struct rte_mbuf **pkts, uint32_t count)
{
    struct vring_desc *desc;
    struct rte_mbuf *buff;
    /* The virtio_hdr is initialised to 0. */
    struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0};
    uint64_t buff_addr = 0;
    uint64_t buff_hdr_addr = 0;
    uint32_t head[VR_DPDK_VIRTIO_TX_BURST_SZ];
    uint32_t head_idx, packet_success = 0;
    uint16_t avail_idx, res_cur_idx;
    uint16_t res_base_idx, res_end_idx;
    uint16_t free_entries;
    uint8_t success = 0;
    vr_uvh_client_t *vru_cl;

    if (unlikely(vq->vdv_ready_state == VQ_NOT_READY))
        return 0;

    vru_cl = vr_dpdk_virtio_get_vif_client(vq->vdv_vif_idx);
    if (unlikely(vru_cl == NULL))
        return 0;

    /*
     * As many data cores may want access to available buffers,
     * they need to be reserved.
     */
    do {
        res_base_idx = vq->vdv_last_used_idx_res;
        avail_idx = *((volatile uint16_t *)&vq->vdv_avail->idx);

        free_entries = (avail_idx - res_base_idx);
        /*check that we have enough buffers*/
        if (unlikely(count > free_entries))
            count = free_entries;

        if (unlikely(count == 0))
            return 0;

        res_end_idx = res_base_idx + count;
        /* vq->vdv_last_used_idx_res is atomically updated. */
        /* TODO: Allow to disable cmpset if no concurrency in application. */
        success = rte_atomic16_cmpset(&vq->vdv_last_used_idx_res,
                res_base_idx, res_end_idx);
    } while (unlikely(success == 0));
    res_cur_idx = res_base_idx;
    RTE_LOG(DEBUG, VROUTER, "%s: Current Index %d| End Index %d\n",
            __func__, res_cur_idx, res_end_idx);

    /* Prefetch available ring to retrieve indexes. */
    rte_prefetch0(&vq->vdv_avail->ring[res_cur_idx & (vq->vdv_size - 1)]);

    /* Retrieve all of the head indexes first to avoid caching issues. */
    for (head_idx = 0; head_idx < count; head_idx++)
        head[head_idx] = vq->vdv_avail->ring[(res_cur_idx + head_idx) &
                    (vq->vdv_size - 1)];

    /* Prefetch descriptor index. */
    rte_prefetch0(&vq->vdv_desc[head[packet_success]]);

    while (res_cur_idx != res_end_idx) {
        uint32_t offset = 0, vb_offset = 0;
        uint32_t pkt_len, len_to_cpy, data_len, total_copied = 0;
        uint8_t hdr = 0, uncompleted_pkt = 0;

        /* Get descriptor from available ring */
        desc = &vq->vdv_desc[head[packet_success]];

        buff = pkts[packet_success];

        /* Convert from gpa to vva (guest physical addr -> vhost virtual addr) */
        buff_addr = (uintptr_t)vr_dpdk_guest_phys_to_host_virt(vru_cl, desc->addr);
        /* Prefetch buffer address. */
        rte_prefetch0((void *)(uintptr_t)buff_addr);

        /* Copy virtio_hdr to packet and increment buffer address */
        buff_hdr_addr = buff_addr;

        /*
         * If the descriptors are chained the header and data are
         * placed in separate buffers.
         */
        if (likely(desc->flags & VRING_DESC_F_NEXT)
            && (desc->len == sizeof(struct virtio_net_hdr))) {
            /*
             * TODO: verify that desc->next is sane below.
             */
            desc = &vq->vdv_desc[desc->next];
            /* Buffer address translation. */
            buff_addr = (uintptr_t)vr_dpdk_guest_phys_to_host_virt(vru_cl, desc->addr);
        } else {
            vb_offset += sizeof(struct virtio_net_hdr);
            hdr = 1;
        }

        pkt_len = rte_pktmbuf_pkt_len(buff);
        data_len = rte_pktmbuf_data_len(buff);
        len_to_cpy = RTE_MIN(data_len,
            hdr ? desc->len - sizeof(struct virtio_net_hdr) : desc->len);
        while (total_copied < pkt_len) {
            /* Copy mbuf data to buffer */
            rte_memcpy((void *)(uintptr_t)(buff_addr + vb_offset),
                rte_pktmbuf_mtod_offset(buff, const void *, offset),
                len_to_cpy);

            offset += len_to_cpy;
            vb_offset += len_to_cpy;
            total_copied += len_to_cpy;

            /* The whole packet completes */
            if (likely(total_copied == pkt_len))
                break;

            /* The current segment completes */
            if (offset == data_len) {
                buff = buff->next;
                offset = 0;
                data_len = rte_pktmbuf_data_len(buff);
            }

            /* The current vring descriptor done */
            if (vb_offset == desc->len) {
                if (desc->flags & VRING_DESC_F_NEXT) {
                    desc = &vq->vdv_desc[desc->next];
                    buff_addr = (uintptr_t)vr_dpdk_guest_phys_to_host_virt(vru_cl, desc->addr);
                    vb_offset = 0;
                } else {
                    /* Room in vring buffer is not enough */
                    uncompleted_pkt = 1;
                    break;
                }
            }
            len_to_cpy = RTE_MIN(data_len - offset, desc->len - vb_offset);
        };

        /* Update used ring with desc information */
        vq->vdv_used->ring[res_cur_idx & (vq->vdv_size - 1)].id =
                            head[packet_success];

        /* Drop the packet if it is uncompleted */
        if (unlikely(uncompleted_pkt == 1))
            vq->vdv_used->ring[res_cur_idx & (vq->vdv_size - 1)].len =
                            sizeof(struct virtio_net_hdr);
        else
            vq->vdv_used->ring[res_cur_idx & (vq->vdv_size - 1)].len =
                            pkt_len + sizeof(struct virtio_net_hdr);

        res_cur_idx++;
        packet_success++;

        /* TODO: in DPDK 2.1 we do not copy the header
        if (unlikely(uncompleted_pkt == 1))
            continue;
        */
        rte_memcpy((void *)(uintptr_t)buff_hdr_addr,
            (const void *)&virtio_hdr, sizeof(struct virtio_net_hdr));

        if (likely(res_cur_idx < res_end_idx)) {
            /* Prefetch descriptor index. */
            rte_prefetch0(&vq->vdv_desc[head[packet_success]]);
        }
    }

    rte_compiler_barrier();

    /* Wait until it's our turn to add our buffer to the used ring. */
    while (unlikely(vq->vdv_last_used_idx != res_base_idx))
        rte_pause();

    *(volatile uint16_t *)&vq->vdv_used->idx += count;
    vq->vdv_last_used_idx = res_end_idx;
    RTE_LOG(DEBUG, VROUTER, "%s: vif %d vq %p last_used_idx %d used->idx %d\n",
            __func__, vq->vdv_vif_idx, vq, vq->vdv_last_used_idx, vq->vdv_used->idx);

    /* flush used->idx update before we read avail->flags. */
    rte_mb();

    /* Kick the guest if necessary. */
    if (unlikely(!(vq->vdv_avail->flags & VRING_AVAIL_F_NO_INTERRUPT))) {
        p->nb_syscalls++;
        eventfd_write(vq->vdv_callfd, 1);
    }
    return count;
}
Пример #17
0
/*
 * Write scattered channel packet to TX bufring.
 *
 * The offset of this channel packet is written as a 64bits value
 * immediately after this channel packet.
 *
 * The write goes through three stages:
 *  1. Reserve space in ring buffer for the new data.
 *     Writer atomically moves priv_write_index.
 *  2. Copy the new data into the ring.
 *  3. Update the tail of the ring (visible to host) that indicates
 *     next read location. Writer updates write_index
 */
int
vmbus_txbr_write(struct vmbus_br *tbr, const struct iovec iov[], int iovlen,
		 bool *need_sig)
{
	struct vmbus_bufring *vbr = tbr->vbr;
	uint32_t ring_size = tbr->dsize;
	uint32_t old_windex, next_windex, windex, total;
	uint64_t save_windex;
	int i;

	total = 0;
	for (i = 0; i < iovlen; i++)
		total += iov[i].iov_len;
	total += sizeof(save_windex);

	/* Reserve space in ring */
	do {
		uint32_t avail;

		/* Get current free location */
		old_windex = tbr->windex;

		/* Prevent compiler reordering this with calculation */
		rte_compiler_barrier();

		avail = vmbus_br_availwrite(tbr, old_windex);

		/* If not enough space in ring, then tell caller. */
		if (avail <= total)
			return -EAGAIN;

		next_windex = vmbus_br_idxinc(old_windex, total, ring_size);

		/* Atomic update of next write_index for other threads */
	} while (!rte_atomic32_cmpset(&tbr->windex, old_windex, next_windex));

	/* Space from old..new is now reserved */
	windex = old_windex;
	for (i = 0; i < iovlen; i++) {
		windex = vmbus_txbr_copyto(tbr, windex,
					   iov[i].iov_base, iov[i].iov_len);
	}

	/* Set the offset of the current channel packet. */
	save_windex = ((uint64_t)old_windex) << 32;
	windex = vmbus_txbr_copyto(tbr, windex, &save_windex,
				   sizeof(save_windex));

	/* The region reserved should match region used */
	RTE_ASSERT(windex == next_windex);

	/* Ensure that data is available before updating host index */
	rte_smp_wmb();

	/* Checkin for our reservation. wait for our turn to update host */
	while (!rte_atomic32_cmpset(&vbr->windex, old_windex, next_windex))
		rte_pause();

	/* If host had read all data before this, then need to signal */
	*need_sig |= vmbus_txbr_need_signal(tbr, old_windex);
	return 0;
}