Пример #1
0
/* free mbufs from death row */
int
#else
void
#endif
rte_ip_frag_free_death_row(struct rte_ip_frag_death_row *dr,
		uint32_t prefetch)
{
	uint32_t i, k, n;

	k = RTE_MIN(prefetch, dr->cnt);
	n = dr->cnt;

	for (i = 0; i != k; i++)
		rte_prefetch0(dr->row[i]);

	for (i = 0; i != n - k; i++) {
		rte_prefetch0(dr->row[i + k]);
		rte_pktmbuf_free(dr->row[i]);
	}

	for (; i != n; i++)
		rte_pktmbuf_free(dr->row[i]);

	dr->cnt = 0;
#ifdef RTE_LIBRW_PIOT
        return n;
#endif
}
Пример #2
0
static uint16_t
virtqueue_dequeue_burst_rx(struct virtqueue *vq, struct rte_mbuf **rx_pkts,
			   uint32_t *len, uint16_t num)
{
	struct vring_used_elem *uep;
	struct rte_mbuf *cookie;
	uint16_t used_idx, desc_idx;
	uint16_t i;

	/*  Caller does the check */
	for (i = 0; i < num ; i++) {
		used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
		uep = &vq->vq_ring.used->ring[used_idx];
		desc_idx = (uint16_t) uep->id;
		len[i] = uep->len;
		cookie = (struct rte_mbuf *)vq->vq_descx[desc_idx].cookie;

		if (unlikely(cookie == NULL)) {
			PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u\n",
				vq->vq_used_cons_idx);
			break;
		}

		rte_prefetch0(cookie);
		rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
		rx_pkts[i]  = cookie;
		vq->vq_used_cons_idx++;
		vq_ring_free_chain(vq, desc_idx);
		vq->vq_descx[desc_idx].cookie = NULL;
	}

	return i;
}
Пример #3
0
int
test_prefetch(void)
{
	int a;

	rte_prefetch0(&a);
	rte_prefetch1(&a);
	rte_prefetch2(&a);

	return 0;
}
Пример #4
0
static int
perf_atq_worker_burst(void *arg, const int enable_fwd_latency)
{
	PERF_WORKER_INIT;
	uint16_t i;
	/* +1 to avoid prefetch out of array check */
	struct rte_event ev[BURST_SIZE + 1];

	while (t->done == false) {
		uint16_t const nb_rx = rte_event_dequeue_burst(dev, port, ev,
				BURST_SIZE, 0);

		if (!nb_rx) {
			rte_pause();
			continue;
		}

		for (i = 0; i < nb_rx; i++) {
			if (enable_fwd_latency && !prod_timer_type) {
				rte_prefetch0(ev[i+1].event_ptr);
				/* first stage in pipeline.
				 * mark time stamp to compute fwd latency
				 */
				atq_mark_fwd_latency(&ev[i]);
			}
			/* last stage in pipeline */
			if (unlikely((ev[i].sub_event_type % nb_stages)
						== laststage)) {
				if (enable_fwd_latency)
					cnt = perf_process_last_stage_latency(
						pool, &ev[i], w, bufs, sz, cnt);
				else
					cnt = perf_process_last_stage(pool,
						&ev[i], w, bufs, sz, cnt);

				ev[i].op = RTE_EVENT_OP_RELEASE;
			} else {
				atq_fwd_event(&ev[i], sched_type_list,
						nb_stages);
			}
		}

		uint16_t enq;

		enq = rte_event_enqueue_burst(dev, port, ev, nb_rx);
		while (enq < nb_rx) {
			enq += rte_event_enqueue_burst(dev, port,
							ev + enq, nb_rx - enq);
		}
	}
	return 0;
}
Пример #5
0
/* free mbufs from death row */
void
rte_ip_frag_free_death_row(struct rte_ip_frag_death_row *dr,
		uint32_t prefetch)
{
	uint32_t i, k, n;

	k = RTE_MIN(prefetch, dr->cnt);
	n = dr->cnt;

	for (i = 0; i != k; i++)
		rte_prefetch0(dr->row[i]);

	for (i = 0; i != n - k; i++) {
		rte_prefetch0(dr->row[i + k]);
		rte_pktmbuf_free(dr->row[i]);
	}

	for (; i != n; i++)
		rte_pktmbuf_free(dr->row[i]);

	dr->cnt = 0;
}
Пример #6
0
uint16_t bnxt_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
			       uint16_t nb_pkts)
{
	struct bnxt_rx_queue *rxq = rx_queue;
	struct bnxt_cp_ring_info *cpr = rxq->cp_ring;
	struct bnxt_rx_ring_info *rxr = rxq->rx_ring;
	uint32_t raw_cons = cpr->cp_raw_cons;
	uint32_t cons;
	int nb_rx_pkts = 0;
	bool rx_event = false;
	struct rx_pkt_cmpl *rxcmp;

	/* Handle RX burst request */
	while (1) {
		int rc;

		cons = RING_CMP(cpr->cp_ring_struct, raw_cons);
		rte_prefetch0(&cpr->cp_desc_ring[cons]);
		rxcmp = (struct rx_pkt_cmpl *)&cpr->cp_desc_ring[cons];

		if (!CMP_VALID(rxcmp, raw_cons, cpr->cp_ring_struct))
			break;

		/* TODO: Avoid magic numbers... */
		if ((CMP_TYPE(rxcmp) & 0x30) == 0x10) {
			rc = bnxt_rx_pkt(&rx_pkts[nb_rx_pkts], rxq, &raw_cons);
			if (likely(!rc))
				nb_rx_pkts++;
			else if (rc == -EBUSY)	/* partial completion */
				break;
			rx_event = true;
		}
		raw_cons = NEXT_RAW_CMP(raw_cons);
		if (nb_rx_pkts == nb_pkts)
			break;
	}
	if (raw_cons == cpr->cp_raw_cons) {
		/*
		 * For PMD, there is no need to keep on pushing to REARM
		 * the doorbell if there are no new completions
		 */
		return nb_rx_pkts;
	}
	cpr->cp_raw_cons = raw_cons;

	B_CP_DIS_DB(cpr, cpr->cp_raw_cons);
	if (rx_event)
		B_RX_DB(rxr->rx_doorbell, rxr->rx_prod);
	return nb_rx_pkts;
}
Пример #7
0
static int get_tx_q(struct port *p, queue_t qid,
		    snb_array_t pkts, int max_cnt)
{
	struct vport_priv *priv = get_port_priv(p);
	struct queue *tx_queue = &priv->inc_qs[qid];
	void *objs[max_cnt];

	int cnt;
	int i;

	cnt = llring_dequeue_burst(tx_queue->drv_to_sn, 
			objs, max_cnt);
	if (cnt == 0)
		return 0;

	for (i = 0; i < cnt; i++) {
		pkts[i] = (struct snbuf *) objs[i];
		rte_prefetch0(snb_head_data(pkts[i]));
	}

	refill_tx_bufs(tx_queue->sn_to_drv, max_cnt);

	for (i = 0; i < cnt; i++) {
		struct sn_tx_metadata *tx_meta;
		int legit_size;

		tx_meta = (struct sn_tx_metadata *)snb_head_data(pkts[i]);

#if OLD_METADATA
		pkts[i]->in_port = vport->port.port_id;
		pkts[i]->in_queue = txq;

		/* TODO: sanity check for the metadata */
		pkts[i]->tx.csum_start = tx_meta->csum_start;
		pkts[i]->tx.csum_dest = tx_meta->csum_dest;
#endif

		legit_size = (snb_append(pkts[i], sizeof(struct sn_tx_metadata) + 
				tx_meta->length) != NULL);
		assert(legit_size);
		
		snb_adj(pkts[i], sizeof(struct sn_tx_metadata));
	}

	return cnt;
}
/*
 * vPMD raw receive routine, only accept(nb_pkts >= RTE_IXGBE_DESCS_PER_LOOP)
 *
 * Notice:
 * - nb_pkts < RTE_IXGBE_DESCS_PER_LOOP, just return no packet
 * - nb_pkts > RTE_IXGBE_MAX_RX_BURST, only scan RTE_IXGBE_MAX_RX_BURST
 *   numbers of DD bit
 * - floor align nb_pkts to a RTE_IXGBE_DESC_PER_LOOP power-of-two
 */
static inline uint16_t
_recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
		uint16_t nb_pkts, uint8_t *split_packet)
{
	volatile union ixgbe_adv_rx_desc *rxdp;
	struct ixgbe_rx_entry *sw_ring;
	uint16_t nb_pkts_recd;
	int pos;
	uint64_t var;
	__m128i shuf_msk;
	__m128i crc_adjust = _mm_set_epi16(
				0, 0, 0,    /* ignore non-length fields */
				-rxq->crc_len, /* sub crc on data_len */
				0,          /* ignore high-16bits of pkt_len */
				-rxq->crc_len, /* sub crc on pkt_len */
				0, 0            /* ignore pkt_type field */
			);
	__m128i dd_check, eop_check;
	uint8_t vlan_flags;

	/* nb_pkts shall be less equal than RTE_IXGBE_MAX_RX_BURST */
	nb_pkts = RTE_MIN(nb_pkts, RTE_IXGBE_MAX_RX_BURST);

	/* nb_pkts has to be floor-aligned to RTE_IXGBE_DESCS_PER_LOOP */
	nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, RTE_IXGBE_DESCS_PER_LOOP);

	/* Just the act of getting into the function from the application is
	 * going to cost about 7 cycles
	 */
	rxdp = rxq->rx_ring + rxq->rx_tail;

	rte_prefetch0(rxdp);

	/* See if we need to rearm the RX queue - gives the prefetch a bit
	 * of time to act
	 */
	if (rxq->rxrearm_nb > RTE_IXGBE_RXQ_REARM_THRESH)
		ixgbe_rxq_rearm(rxq);

	/* Before we start moving massive data around, check to see if
	 * there is actually a packet available
	 */
	if (!(rxdp->wb.upper.status_error &
				rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
		return 0;

	/* 4 packets DD mask */
	dd_check = _mm_set_epi64x(0x0000000100000001LL, 0x0000000100000001LL);

	/* 4 packets EOP mask */
	eop_check = _mm_set_epi64x(0x0000000200000002LL, 0x0000000200000002LL);

	/* mask to shuffle from desc. to mbuf */
	shuf_msk = _mm_set_epi8(
		7, 6, 5, 4,  /* octet 4~7, 32bits rss */
		15, 14,      /* octet 14~15, low 16 bits vlan_macip */
		13, 12,      /* octet 12~13, 16 bits data_len */
		0xFF, 0xFF,  /* skip high 16 bits pkt_len, zero out */
		13, 12,      /* octet 12~13, low 16 bits pkt_len */
		0xFF, 0xFF,  /* skip 32 bit pkt_type */
		0xFF, 0xFF
		);

	/* Cache is empty -> need to scan the buffer rings, but first move
	 * the next 'n' mbufs into the cache
	 */
	sw_ring = &rxq->sw_ring[rxq->rx_tail];

	/* ensure these 2 flags are in the lower 8 bits */
	RTE_BUILD_BUG_ON((PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED) > UINT8_MAX);
	vlan_flags = rxq->vlan_flags & UINT8_MAX;

	/* A. load 4 packet in one loop
	 * [A*. mask out 4 unused dirty field in desc]
	 * B. copy 4 mbuf point from swring to rx_pkts
	 * C. calc the number of DD bits among the 4 packets
	 * [C*. extract the end-of-packet bit, if requested]
	 * D. fill info. from desc to mbuf
	 */
	for (pos = 0, nb_pkts_recd = 0; pos < nb_pkts;
			pos += RTE_IXGBE_DESCS_PER_LOOP,
			rxdp += RTE_IXGBE_DESCS_PER_LOOP) {
		__m128i descs[RTE_IXGBE_DESCS_PER_LOOP];
		__m128i pkt_mb1, pkt_mb2, pkt_mb3, pkt_mb4;
		__m128i zero, staterr, sterr_tmp1, sterr_tmp2;
		__m128i mbp1, mbp2; /* two mbuf pointer in one XMM reg. */

		/* B.1 load 1 mbuf point */
		mbp1 = _mm_loadu_si128((__m128i *)&sw_ring[pos]);

		/* Read desc statuses backwards to avoid race condition */
		/* A.1 load 4 pkts desc */
		descs[3] = _mm_loadu_si128((__m128i *)(rxdp + 3));
		rte_compiler_barrier();

		/* B.2 copy 2 mbuf point into rx_pkts  */
		_mm_storeu_si128((__m128i *)&rx_pkts[pos], mbp1);

		/* B.1 load 1 mbuf point */
		mbp2 = _mm_loadu_si128((__m128i *)&sw_ring[pos+2]);

		descs[2] = _mm_loadu_si128((__m128i *)(rxdp + 2));
		rte_compiler_barrier();
		/* B.1 load 2 mbuf point */
		descs[1] = _mm_loadu_si128((__m128i *)(rxdp + 1));
		rte_compiler_barrier();
		descs[0] = _mm_loadu_si128((__m128i *)(rxdp));

		/* B.2 copy 2 mbuf point into rx_pkts  */
		_mm_storeu_si128((__m128i *)&rx_pkts[pos+2], mbp2);

		if (split_packet) {
			rte_mbuf_prefetch_part2(rx_pkts[pos]);
			rte_mbuf_prefetch_part2(rx_pkts[pos + 1]);
			rte_mbuf_prefetch_part2(rx_pkts[pos + 2]);
			rte_mbuf_prefetch_part2(rx_pkts[pos + 3]);
		}

		/* avoid compiler reorder optimization */
		rte_compiler_barrier();

		/* D.1 pkt 3,4 convert format from desc to pktmbuf */
		pkt_mb4 = _mm_shuffle_epi8(descs[3], shuf_msk);
		pkt_mb3 = _mm_shuffle_epi8(descs[2], shuf_msk);

		/* D.1 pkt 1,2 convert format from desc to pktmbuf */
		pkt_mb2 = _mm_shuffle_epi8(descs[1], shuf_msk);
		pkt_mb1 = _mm_shuffle_epi8(descs[0], shuf_msk);

		/* C.1 4=>2 filter staterr info only */
		sterr_tmp2 = _mm_unpackhi_epi32(descs[3], descs[2]);
		/* C.1 4=>2 filter staterr info only */
		sterr_tmp1 = _mm_unpackhi_epi32(descs[1], descs[0]);

		/* set ol_flags with vlan packet type */
		desc_to_olflags_v(descs, vlan_flags, &rx_pkts[pos]);

		/* D.2 pkt 3,4 set in_port/nb_seg and remove crc */
		pkt_mb4 = _mm_add_epi16(pkt_mb4, crc_adjust);
		pkt_mb3 = _mm_add_epi16(pkt_mb3, crc_adjust);

		/* C.2 get 4 pkts staterr value  */
		zero = _mm_xor_si128(dd_check, dd_check);
		staterr = _mm_unpacklo_epi32(sterr_tmp1, sterr_tmp2);

		/* D.3 copy final 3,4 data to rx_pkts */
		_mm_storeu_si128((void *)&rx_pkts[pos+3]->rx_descriptor_fields1,
				pkt_mb4);
		_mm_storeu_si128((void *)&rx_pkts[pos+2]->rx_descriptor_fields1,
				pkt_mb3);

		/* D.2 pkt 1,2 set in_port/nb_seg and remove crc */
		pkt_mb2 = _mm_add_epi16(pkt_mb2, crc_adjust);
		pkt_mb1 = _mm_add_epi16(pkt_mb1, crc_adjust);

		/* C* extract and record EOP bit */
		if (split_packet) {
			__m128i eop_shuf_mask = _mm_set_epi8(
					0xFF, 0xFF, 0xFF, 0xFF,
					0xFF, 0xFF, 0xFF, 0xFF,
					0xFF, 0xFF, 0xFF, 0xFF,
					0x04, 0x0C, 0x00, 0x08
					);

			/* and with mask to extract bits, flipping 1-0 */
			__m128i eop_bits = _mm_andnot_si128(staterr, eop_check);
			/* the staterr values are not in order, as the count
			 * count of dd bits doesn't care. However, for end of
			 * packet tracking, we do care, so shuffle. This also
			 * compresses the 32-bit values to 8-bit
			 */
			eop_bits = _mm_shuffle_epi8(eop_bits, eop_shuf_mask);
			/* store the resulting 32-bit value */
			*(int *)split_packet = _mm_cvtsi128_si32(eop_bits);
			split_packet += RTE_IXGBE_DESCS_PER_LOOP;

			/* zero-out next pointers */
			rx_pkts[pos]->next = NULL;
			rx_pkts[pos + 1]->next = NULL;
			rx_pkts[pos + 2]->next = NULL;
			rx_pkts[pos + 3]->next = NULL;
		}

		/* C.3 calc available number of desc */
		staterr = _mm_and_si128(staterr, dd_check);
		staterr = _mm_packs_epi32(staterr, zero);

		/* D.3 copy final 1,2 data to rx_pkts */
		_mm_storeu_si128((void *)&rx_pkts[pos+1]->rx_descriptor_fields1,
				pkt_mb2);
		_mm_storeu_si128((void *)&rx_pkts[pos]->rx_descriptor_fields1,
				pkt_mb1);

		/* C.4 calc avaialbe number of desc */
		var = __builtin_popcountll(_mm_cvtsi128_si64(staterr));
		nb_pkts_recd += var;
		if (likely(var != RTE_IXGBE_DESCS_PER_LOOP))
			break;
	}

	/* Update our internal tail pointer */
	rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_pkts_recd);
	rxq->rx_tail = (uint16_t)(rxq->rx_tail & (rxq->nb_rx_desc - 1));
	rxq->rxrearm_nb = (uint16_t)(rxq->rxrearm_nb + nb_pkts_recd);

	return nb_pkts_recd;
}
Пример #9
0
uint16_t
rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
	struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count)
{
	struct rte_mbuf *m, *prev;
	struct vhost_virtqueue *vq;
	struct vring_desc *desc;
	uint64_t vb_addr = 0;
	uint32_t head[MAX_PKT_BURST];
	uint32_t used_idx;
	uint32_t i;
	uint16_t free_entries, entry_success = 0;
	uint16_t avail_idx;

	if (unlikely(queue_id != VIRTIO_TXQ)) {
		LOG_DEBUG(VHOST_DATA, "mq isn't supported in this version.\n");
		return 0;
	}

	vq = dev->virtqueue[VIRTIO_TXQ];
	avail_idx =  *((volatile uint16_t *)&vq->avail->idx);

	/* If there are no available buffers then return. */
	if (vq->last_used_idx == avail_idx)
		return 0;

	LOG_DEBUG(VHOST_DATA, "%s (%"PRIu64")\n", __func__,
		dev->device_fh);

	/* Prefetch available ring to retrieve head indexes. */
	rte_prefetch0(&vq->avail->ring[vq->last_used_idx & (vq->size - 1)]);

	/*get the number of free entries in the ring*/
	free_entries = (avail_idx - vq->last_used_idx);

	free_entries = RTE_MIN(free_entries, count);
	/* Limit to MAX_PKT_BURST. */
	free_entries = RTE_MIN(free_entries, MAX_PKT_BURST);

	LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Buffers available %d\n",
			dev->device_fh, free_entries);
	/* Retrieve all of the head indexes first to avoid caching issues. */
	for (i = 0; i < free_entries; i++)
		head[i] = vq->avail->ring[(vq->last_used_idx + i) & (vq->size - 1)];

	/* Prefetch descriptor index. */
	rte_prefetch0(&vq->desc[head[entry_success]]);
	rte_prefetch0(&vq->used->ring[vq->last_used_idx & (vq->size - 1)]);

	while (entry_success < free_entries) {
		uint32_t vb_avail, vb_offset;
		uint32_t seg_avail, seg_offset;
		uint32_t cpy_len;
		uint32_t seg_num = 0;
		struct rte_mbuf *cur;
		uint8_t alloc_err = 0;

		desc = &vq->desc[head[entry_success]];

		/* Discard first buffer as it is the virtio header */
		if (desc->flags & VRING_DESC_F_NEXT) {
			desc = &vq->desc[desc->next];
			vb_offset = 0;
			vb_avail = desc->len;
		} else {
			vb_offset = vq->vhost_hlen;
			vb_avail = desc->len - vb_offset;
		}

		/* Buffer address translation. */
		vb_addr = gpa_to_vva(dev, desc->addr);
		/* Prefetch buffer address. */
		rte_prefetch0((void *)(uintptr_t)vb_addr);

		used_idx = vq->last_used_idx & (vq->size - 1);

		if (entry_success < (free_entries - 1)) {
			/* Prefetch descriptor index. */
			rte_prefetch0(&vq->desc[head[entry_success+1]]);
			rte_prefetch0(&vq->used->ring[(used_idx + 1) & (vq->size - 1)]);
		}

		/* Update used index buffer information. */
		vq->used->ring[used_idx].id = head[entry_success];
		vq->used->ring[used_idx].len = 0;

		/* Allocate an mbuf and populate the structure. */
		m = rte_pktmbuf_alloc(mbuf_pool);
		if (unlikely(m == NULL)) {
			RTE_LOG(ERR, VHOST_DATA,
				"Failed to allocate memory for mbuf.\n");
			break;
		}
		seg_offset = 0;
		seg_avail = m->buf_len - RTE_PKTMBUF_HEADROOM;
		cpy_len = RTE_MIN(vb_avail, seg_avail);

		PRINT_PACKET(dev, (uintptr_t)vb_addr, desc->len, 0);

		seg_num++;
		cur = m;
		prev = m;
		while (cpy_len != 0) {
			rte_memcpy((void *)(rte_pktmbuf_mtod(cur, char *) + seg_offset),
				(void *)((uintptr_t)(vb_addr + vb_offset)),
				cpy_len);

			seg_offset += cpy_len;
			vb_offset += cpy_len;
			vb_avail -= cpy_len;
			seg_avail -= cpy_len;

			if (vb_avail != 0) {
				/*
				 * The segment reachs to its end,
				 * while the virtio buffer in TX vring has
				 * more data to be copied.
				 */
				cur->data_len = seg_offset;
				m->pkt_len += seg_offset;
				/* Allocate mbuf and populate the structure. */
				cur = rte_pktmbuf_alloc(mbuf_pool);
				if (unlikely(cur == NULL)) {
					RTE_LOG(ERR, VHOST_DATA, "Failed to "
						"allocate memory for mbuf.\n");
					rte_pktmbuf_free(m);
					alloc_err = 1;
					break;
				}

				seg_num++;
				prev->next = cur;
				prev = cur;
				seg_offset = 0;
				seg_avail = cur->buf_len - RTE_PKTMBUF_HEADROOM;
			} else {
				if (desc->flags & VRING_DESC_F_NEXT) {
					/*
					 * There are more virtio buffers in
					 * same vring entry need to be copied.
					 */
					if (seg_avail == 0) {
						/*
						 * The current segment hasn't
						 * room to accomodate more
						 * data.
						 */
						cur->data_len = seg_offset;
						m->pkt_len += seg_offset;
						/*
						 * Allocate an mbuf and
						 * populate the structure.
						 */
						cur = rte_pktmbuf_alloc(mbuf_pool);
						if (unlikely(cur == NULL)) {
							RTE_LOG(ERR,
								VHOST_DATA,
								"Failed to "
								"allocate memory "
								"for mbuf\n");
							rte_pktmbuf_free(m);
							alloc_err = 1;
							break;
						}
						seg_num++;
						prev->next = cur;
						prev = cur;
						seg_offset = 0;
						seg_avail = cur->buf_len - RTE_PKTMBUF_HEADROOM;
					}

					desc = &vq->desc[desc->next];

					/* Buffer address translation. */
					vb_addr = gpa_to_vva(dev, desc->addr);
					/* Prefetch buffer address. */
					rte_prefetch0((void *)(uintptr_t)vb_addr);
					vb_offset = 0;
					vb_avail = desc->len;

					PRINT_PACKET(dev, (uintptr_t)vb_addr,
						desc->len, 0);
				} else {
					/* The whole packet completes. */
					cur->data_len = seg_offset;
					m->pkt_len += seg_offset;
					vb_avail = 0;
				}
			}

			cpy_len = RTE_MIN(vb_avail, seg_avail);
		}

		if (unlikely(alloc_err == 1))
			break;

		m->nb_segs = seg_num;

		pkts[entry_success] = m;
		vq->last_used_idx++;
		entry_success++;
	}

	rte_compiler_barrier();
	vq->used->idx += entry_success;
	/* Kick guest if required. */
	if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))
		eventfd_write((int)vq->callfd, 1);
	return entry_success;
}
Пример #10
0
virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
	struct rte_mbuf **pkts, uint32_t count)
{
	struct vhost_virtqueue *vq;
	struct vring_desc *desc;
	struct rte_mbuf *buff;
	/* The virtio_hdr is initialised to 0. */
	struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0};
	uint64_t buff_addr = 0;
	uint64_t buff_hdr_addr = 0;
	uint32_t head[MAX_PKT_BURST];
	uint32_t head_idx, packet_success = 0;
	uint16_t avail_idx, res_cur_idx;
	uint16_t res_base_idx, res_end_idx;
	uint16_t free_entries;
	uint8_t success = 0;

	LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_rx()\n", dev->device_fh);
	if (unlikely(queue_id != VIRTIO_RXQ)) {
		LOG_DEBUG(VHOST_DATA, "mq isn't supported in this version.\n");
		return 0;
	}

	vq = dev->virtqueue[VIRTIO_RXQ];
	count = (count > MAX_PKT_BURST) ? MAX_PKT_BURST : count;

	/*
	 * As many data cores may want access to available buffers,
	 * they need to be reserved.
	 */
	do {
		res_base_idx = vq->last_used_idx_res;
		avail_idx = *((volatile uint16_t *)&vq->avail->idx);

		free_entries = (avail_idx - res_base_idx);
		/*check that we have enough buffers*/
		if (unlikely(count > free_entries))
			count = free_entries;

		if (count == 0)
			return 0;

		res_end_idx = res_base_idx + count;
		/* vq->last_used_idx_res is atomically updated. */
		/* TODO: Allow to disable cmpset if no concurrency in application. */
		success = rte_atomic16_cmpset(&vq->last_used_idx_res,
				res_base_idx, res_end_idx);
	} while (unlikely(success == 0));
	res_cur_idx = res_base_idx;
	LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Current Index %d| End Index %d\n",
			dev->device_fh, res_cur_idx, res_end_idx);

	/* Prefetch available ring to retrieve indexes. */
	rte_prefetch0(&vq->avail->ring[res_cur_idx & (vq->size - 1)]);

	/* Retrieve all of the head indexes first to avoid caching issues. */
	for (head_idx = 0; head_idx < count; head_idx++)
		head[head_idx] = vq->avail->ring[(res_cur_idx + head_idx) &
					(vq->size - 1)];

	/*Prefetch descriptor index. */
	rte_prefetch0(&vq->desc[head[packet_success]]);

	while (res_cur_idx != res_end_idx) {
		uint32_t offset = 0, vb_offset = 0;
		uint32_t pkt_len, len_to_cpy, data_len, total_copied = 0;
		uint8_t hdr = 0, uncompleted_pkt = 0;

		/* Get descriptor from available ring */
		desc = &vq->desc[head[packet_success]];

		buff = pkts[packet_success];

		/* Convert from gpa to vva (guest physical addr -> vhost virtual addr) */
		buff_addr = gpa_to_vva(dev, desc->addr);
		/* Prefetch buffer address. */
		rte_prefetch0((void *)(uintptr_t)buff_addr);

		/* Copy virtio_hdr to packet and increment buffer address */
		buff_hdr_addr = buff_addr;

		/*
		 * If the descriptors are chained the header and data are
		 * placed in separate buffers.
		 */
		if ((desc->flags & VRING_DESC_F_NEXT) &&
			(desc->len == vq->vhost_hlen)) {
			desc = &vq->desc[desc->next];
			/* Buffer address translation. */
			buff_addr = gpa_to_vva(dev, desc->addr);
		} else {
			vb_offset += vq->vhost_hlen;
			hdr = 1;
		}

		pkt_len = rte_pktmbuf_pkt_len(buff);
		data_len = rte_pktmbuf_data_len(buff);
		len_to_cpy = RTE_MIN(data_len,
			hdr ? desc->len - vq->vhost_hlen : desc->len);
		while (total_copied < pkt_len) {
			/* Copy mbuf data to buffer */
			rte_memcpy((void *)(uintptr_t)(buff_addr + vb_offset),
				(const void *)(rte_pktmbuf_mtod(buff, const char *) + offset),
				len_to_cpy);
			PRINT_PACKET(dev, (uintptr_t)(buff_addr + vb_offset),
				len_to_cpy, 0);

			offset += len_to_cpy;
			vb_offset += len_to_cpy;
			total_copied += len_to_cpy;

			/* The whole packet completes */
			if (total_copied == pkt_len)
				break;

			/* The current segment completes */
			if (offset == data_len) {
				buff = buff->next;
				offset = 0;
				data_len = rte_pktmbuf_data_len(buff);
			}

			/* The current vring descriptor done */
			if (vb_offset == desc->len) {
				if (desc->flags & VRING_DESC_F_NEXT) {
					desc = &vq->desc[desc->next];
					buff_addr = gpa_to_vva(dev, desc->addr);
					vb_offset = 0;
				} else {
					/* Room in vring buffer is not enough */
					uncompleted_pkt = 1;
					break;
				}
			}
			len_to_cpy = RTE_MIN(data_len - offset, desc->len - vb_offset);
		};

		/* Update used ring with desc information */
		vq->used->ring[res_cur_idx & (vq->size - 1)].id =
							head[packet_success];

		/* Drop the packet if it is uncompleted */
		if (unlikely(uncompleted_pkt == 1))
			vq->used->ring[res_cur_idx & (vq->size - 1)].len =
							vq->vhost_hlen;
		else
			vq->used->ring[res_cur_idx & (vq->size - 1)].len =
							pkt_len + vq->vhost_hlen;

		res_cur_idx++;
		packet_success++;

		if (unlikely(uncompleted_pkt == 1))
			continue;

		rte_memcpy((void *)(uintptr_t)buff_hdr_addr,
			(const void *)&virtio_hdr, vq->vhost_hlen);

		PRINT_PACKET(dev, (uintptr_t)buff_hdr_addr, vq->vhost_hlen, 1);

		if (res_cur_idx < res_end_idx) {
			/* Prefetch descriptor index. */
			rte_prefetch0(&vq->desc[head[packet_success]]);
		}
	}

	rte_compiler_barrier();

	/* Wait until it's our turn to add our buffer to the used ring. */
	while (unlikely(vq->last_used_idx != res_base_idx))
		rte_pause();

	*(volatile uint16_t *)&vq->used->idx += count;
	vq->last_used_idx = res_end_idx;

	/* flush used->idx update before we read avail->flags. */
	rte_mb();

	/* Kick the guest if necessary. */
	if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))
		eventfd_write((int)vq->callfd, 1);
	return count;
}
Пример #11
0
copy_from_mbuf_to_vring(struct virtio_net *dev, uint16_t res_base_idx,
	uint16_t res_end_idx, struct rte_mbuf *pkt)
{
	uint32_t vec_idx = 0;
	uint32_t entry_success = 0;
	struct vhost_virtqueue *vq;
	/* The virtio_hdr is initialised to 0. */
	struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {
		{0, 0, 0, 0, 0, 0}, 0};
	uint16_t cur_idx = res_base_idx;
	uint64_t vb_addr = 0;
	uint64_t vb_hdr_addr = 0;
	uint32_t seg_offset = 0;
	uint32_t vb_offset = 0;
	uint32_t seg_avail;
	uint32_t vb_avail;
	uint32_t cpy_len, entry_len;

	if (pkt == NULL)
		return 0;

	LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Current Index %d| "
		"End Index %d\n",
		dev->device_fh, cur_idx, res_end_idx);

	/*
	 * Convert from gpa to vva
	 * (guest physical addr -> vhost virtual addr)
	 */
	vq = dev->virtqueue[VIRTIO_RXQ];
	vb_addr = gpa_to_vva(dev, vq->buf_vec[vec_idx].buf_addr);
	vb_hdr_addr = vb_addr;

	/* Prefetch buffer address. */
	rte_prefetch0((void *)(uintptr_t)vb_addr);

	virtio_hdr.num_buffers = res_end_idx - res_base_idx;

	LOG_DEBUG(VHOST_DATA, "(%"PRIu64") RX: Num merge buffers %d\n",
		dev->device_fh, virtio_hdr.num_buffers);

	rte_memcpy((void *)(uintptr_t)vb_hdr_addr,
		(const void *)&virtio_hdr, vq->vhost_hlen);

	PRINT_PACKET(dev, (uintptr_t)vb_hdr_addr, vq->vhost_hlen, 1);

	seg_avail = rte_pktmbuf_data_len(pkt);
	vb_offset = vq->vhost_hlen;
	vb_avail = vq->buf_vec[vec_idx].buf_len - vq->vhost_hlen;

	entry_len = vq->vhost_hlen;

	if (vb_avail == 0) {
		uint32_t desc_idx =
			vq->buf_vec[vec_idx].desc_idx;

		if ((vq->desc[desc_idx].flags
			& VRING_DESC_F_NEXT) == 0) {
			/* Update used ring with desc information */
			vq->used->ring[cur_idx & (vq->size - 1)].id
				= vq->buf_vec[vec_idx].desc_idx;
			vq->used->ring[cur_idx & (vq->size - 1)].len
				= entry_len;

			entry_len = 0;
			cur_idx++;
			entry_success++;
		}

		vec_idx++;
		vb_addr = gpa_to_vva(dev, vq->buf_vec[vec_idx].buf_addr);

		/* Prefetch buffer address. */
		rte_prefetch0((void *)(uintptr_t)vb_addr);
		vb_offset = 0;
		vb_avail = vq->buf_vec[vec_idx].buf_len;
	}

	cpy_len = RTE_MIN(vb_avail, seg_avail);

	while (cpy_len > 0) {
		/* Copy mbuf data to vring buffer */
		rte_memcpy((void *)(uintptr_t)(vb_addr + vb_offset),
			(const void *)(rte_pktmbuf_mtod(pkt, char*) + seg_offset),
			cpy_len);

		PRINT_PACKET(dev,
			(uintptr_t)(vb_addr + vb_offset),
			cpy_len, 0);

		seg_offset += cpy_len;
		vb_offset += cpy_len;
		seg_avail -= cpy_len;
		vb_avail -= cpy_len;
		entry_len += cpy_len;

		if (seg_avail != 0) {
			/*
			 * The virtio buffer in this vring
			 * entry reach to its end.
			 * But the segment doesn't complete.
			 */
			if ((vq->desc[vq->buf_vec[vec_idx].desc_idx].flags &
				VRING_DESC_F_NEXT) == 0) {
				/* Update used ring with desc information */
				vq->used->ring[cur_idx & (vq->size - 1)].id
					= vq->buf_vec[vec_idx].desc_idx;
				vq->used->ring[cur_idx & (vq->size - 1)].len
					= entry_len;
				entry_len = 0;
				cur_idx++;
				entry_success++;
			}

			vec_idx++;
			vb_addr = gpa_to_vva(dev,
				vq->buf_vec[vec_idx].buf_addr);
			vb_offset = 0;
			vb_avail = vq->buf_vec[vec_idx].buf_len;
			cpy_len = RTE_MIN(vb_avail, seg_avail);
		} else {
			/*
			 * This current segment complete, need continue to
			 * check if the whole packet complete or not.
			 */
			pkt = pkt->next;
			if (pkt != NULL) {
				/*
				 * There are more segments.
				 */
				if (vb_avail == 0) {
					/*
					 * This current buffer from vring is
					 * used up, need fetch next buffer
					 * from buf_vec.
					 */
					uint32_t desc_idx =
						vq->buf_vec[vec_idx].desc_idx;

					if ((vq->desc[desc_idx].flags &
						VRING_DESC_F_NEXT) == 0) {
						uint16_t wrapped_idx =
							cur_idx & (vq->size - 1);
						/*
						 * Update used ring with the
						 * descriptor information
						 */
						vq->used->ring[wrapped_idx].id
							= desc_idx;
						vq->used->ring[wrapped_idx].len
							= entry_len;
						entry_success++;
						entry_len = 0;
						cur_idx++;
					}

					/* Get next buffer from buf_vec. */
					vec_idx++;
					vb_addr = gpa_to_vva(dev,
						vq->buf_vec[vec_idx].buf_addr);
					vb_avail =
						vq->buf_vec[vec_idx].buf_len;
					vb_offset = 0;
				}

				seg_offset = 0;
				seg_avail = rte_pktmbuf_data_len(pkt);
				cpy_len = RTE_MIN(vb_avail, seg_avail);
			} else {
				/*
				 * This whole packet completes.
				 */
				/* Update used ring with desc information */
				vq->used->ring[cur_idx & (vq->size - 1)].id
					= vq->buf_vec[vec_idx].desc_idx;
				vq->used->ring[cur_idx & (vq->size - 1)].len
					= entry_len;
				entry_success++;
				break;
			}
		}
	}

	return entry_success;
}
Пример #12
0
uint16_t
fm10k_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
				uint16_t nb_pkts)
{
	struct rte_mbuf *mbuf;
	union fm10k_rx_desc desc;
	struct fm10k_rx_queue *q = rx_queue;
	uint16_t count = 0;
	uint16_t nb_rcv, nb_seg;
	int alloc = 0;
	uint16_t next_dd;
	struct rte_mbuf *first_seg = q->pkt_first_seg;
	struct rte_mbuf *last_seg = q->pkt_last_seg;
	int ret;

	next_dd = q->next_dd;
	nb_rcv = 0;

	nb_seg = RTE_MIN(nb_pkts, q->alloc_thresh);
	for (count = 0; count < nb_seg; count++) {
		mbuf = q->sw_ring[next_dd];
		desc = q->hw_ring[next_dd];
		if (!(desc.d.staterr & FM10K_RXD_STATUS_DD))
			break;
#ifdef RTE_LIBRTE_FM10K_DEBUG_RX
		dump_rxd(&desc);
#endif

		if (++next_dd == q->nb_desc) {
			next_dd = 0;
			alloc = 1;
		}

		/* Prefetch next mbuf while processing current one. */
		rte_prefetch0(q->sw_ring[next_dd]);

		/*
		 * When next RX descriptor is on a cache-line boundary,
		 * prefetch the next 4 RX descriptors and the next 8 pointers
		 * to mbufs.
		 */
		if ((next_dd & 0x3) == 0) {
			rte_prefetch0(&q->hw_ring[next_dd]);
			rte_prefetch0(&q->sw_ring[next_dd]);
		}

		/* Fill data length */
		rte_pktmbuf_data_len(mbuf) = desc.w.length;

		/*
		 * If this is the first buffer of the received packet,
		 * set the pointer to the first mbuf of the packet and
		 * initialize its context.
		 * Otherwise, update the total length and the number of segments
		 * of the current scattered packet, and update the pointer to
		 * the last mbuf of the current packet.
		 */
		if (!first_seg) {
			first_seg = mbuf;
			first_seg->pkt_len = desc.w.length;
		} else {
			first_seg->pkt_len =
					(uint16_t)(first_seg->pkt_len +
					rte_pktmbuf_data_len(mbuf));
			first_seg->nb_segs++;
			last_seg->next = mbuf;
		}

		/*
		 * If this is not the last buffer of the received packet,
		 * update the pointer to the last mbuf of the current scattered
		 * packet and continue to parse the RX ring.
		 */
		if (!(desc.d.staterr & FM10K_RXD_STATUS_EOP)) {
			last_seg = mbuf;
			continue;
		}

		first_seg->ol_flags = 0;
#ifdef RTE_LIBRTE_FM10K_RX_OLFLAGS_ENABLE
		rx_desc_to_ol_flags(first_seg, &desc);
#endif
		first_seg->hash.rss = desc.d.rss;

		/* Prefetch data of first segment, if configured to do so. */
		rte_packet_prefetch((char *)first_seg->buf_addr +
			first_seg->data_off);

		/*
		 * Store the mbuf address into the next entry of the array
		 * of returned packets.
		 */
		rx_pkts[nb_rcv++] = first_seg;

		/*
		 * Setup receipt context for a new packet.
		 */
		first_seg = NULL;
	}

	q->next_dd = next_dd;

	if ((q->next_dd > q->next_trigger) || (alloc == 1)) {
		ret = rte_mempool_get_bulk(q->mp,
					(void **)&q->sw_ring[q->next_alloc],
					q->alloc_thresh);

		if (unlikely(ret != 0)) {
			uint8_t port = q->port_id;
			PMD_RX_LOG(ERR, "Failed to alloc mbuf");
			/*
			 * Need to restore next_dd if we cannot allocate new
			 * buffers to replenish the old ones.
			 */
			q->next_dd = (q->next_dd + q->nb_desc - count) %
								q->nb_desc;
			rte_eth_devices[port].data->rx_mbuf_alloc_failed++;
			return 0;
		}

		for (; q->next_alloc <= q->next_trigger; ++q->next_alloc) {
			mbuf = q->sw_ring[q->next_alloc];

			/* setup static mbuf fields */
			fm10k_pktmbuf_reset(mbuf, q->port_id);

			/* write descriptor */
			desc.q.pkt_addr = MBUF_DMA_ADDR_DEFAULT(mbuf);
			desc.q.hdr_addr = MBUF_DMA_ADDR_DEFAULT(mbuf);
			q->hw_ring[q->next_alloc] = desc;
		}
		FM10K_PCI_REG_WRITE(q->tail_ptr, q->next_trigger);
		q->next_trigger += q->alloc_thresh;
		if (q->next_trigger >= q->nb_desc) {
			q->next_trigger = q->alloc_thresh - 1;
			q->next_alloc = 0;
		}
	}

	q->pkt_first_seg = first_seg;
	q->pkt_last_seg = last_seg;

	return nb_rcv;
}
Пример #13
0
uint16_t
fm10k_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
	uint16_t nb_pkts)
{
	struct rte_mbuf *mbuf;
	union fm10k_rx_desc desc;
	struct fm10k_rx_queue *q = rx_queue;
	uint16_t count = 0;
	int alloc = 0;
	uint16_t next_dd;
	int ret;

	next_dd = q->next_dd;

	nb_pkts = RTE_MIN(nb_pkts, q->alloc_thresh);
	for (count = 0; count < nb_pkts; ++count) {
		mbuf = q->sw_ring[next_dd];
		desc = q->hw_ring[next_dd];
		if (!(desc.d.staterr & FM10K_RXD_STATUS_DD))
			break;
#ifdef RTE_LIBRTE_FM10K_DEBUG_RX
		dump_rxd(&desc);
#endif
		rte_pktmbuf_pkt_len(mbuf) = desc.w.length;
		rte_pktmbuf_data_len(mbuf) = desc.w.length;

		mbuf->ol_flags = 0;
#ifdef RTE_LIBRTE_FM10K_RX_OLFLAGS_ENABLE
		rx_desc_to_ol_flags(mbuf, &desc);
#endif

		mbuf->hash.rss = desc.d.rss;

		rx_pkts[count] = mbuf;
		if (++next_dd == q->nb_desc) {
			next_dd = 0;
			alloc = 1;
		}

		/* Prefetch next mbuf while processing current one. */
		rte_prefetch0(q->sw_ring[next_dd]);

		/*
		 * When next RX descriptor is on a cache-line boundary,
		 * prefetch the next 4 RX descriptors and the next 8 pointers
		 * to mbufs.
		 */
		if ((next_dd & 0x3) == 0) {
			rte_prefetch0(&q->hw_ring[next_dd]);
			rte_prefetch0(&q->sw_ring[next_dd]);
		}
	}

	q->next_dd = next_dd;

	if ((q->next_dd > q->next_trigger) || (alloc == 1)) {
		ret = rte_mempool_get_bulk(q->mp,
					(void **)&q->sw_ring[q->next_alloc],
					q->alloc_thresh);

		if (unlikely(ret != 0)) {
			uint8_t port = q->port_id;
			PMD_RX_LOG(ERR, "Failed to alloc mbuf");
			/*
			 * Need to restore next_dd if we cannot allocate new
			 * buffers to replenish the old ones.
			 */
			q->next_dd = (q->next_dd + q->nb_desc - count) %
								q->nb_desc;
			rte_eth_devices[port].data->rx_mbuf_alloc_failed++;
			return 0;
		}

		for (; q->next_alloc <= q->next_trigger; ++q->next_alloc) {
			mbuf = q->sw_ring[q->next_alloc];

			/* setup static mbuf fields */
			fm10k_pktmbuf_reset(mbuf, q->port_id);

			/* write descriptor */
			desc.q.pkt_addr = MBUF_DMA_ADDR_DEFAULT(mbuf);
			desc.q.hdr_addr = MBUF_DMA_ADDR_DEFAULT(mbuf);
			q->hw_ring[q->next_alloc] = desc;
		}
		FM10K_PCI_REG_WRITE(q->tail_ptr, q->next_trigger);
		q->next_trigger += q->alloc_thresh;
		if (q->next_trigger >= q->nb_desc) {
			q->next_trigger = q->alloc_thresh - 1;
			q->next_alloc = 0;
		}
	}

	return count;
}
Пример #14
0
tx_burst_sg(struct txq *txq, unsigned int segs, struct txq_elt *elt,
	    struct rte_mbuf *buf, unsigned int elts_head,
	    struct ibv_sge (*sges)[MLX5_PMD_SGE_WR_N])
{
	unsigned int sent_size = 0;
	unsigned int j;
	int linearize = 0;

	/* When there are too many segments, extra segments are
	 * linearized in the last SGE. */
	if (unlikely(segs > RTE_DIM(*sges))) {
		segs = (RTE_DIM(*sges) - 1);
		linearize = 1;
	}
	/* Update element. */
	elt->buf = buf;
	/* Register segments as SGEs. */
	for (j = 0; (j != segs); ++j) {
		struct ibv_sge *sge = &(*sges)[j];
		uint32_t lkey;

		/* Retrieve Memory Region key for this memory pool. */
		lkey = txq_mp2mr(txq, txq_mb2mp(buf));
		if (unlikely(lkey == (uint32_t)-1)) {
			/* MR does not exist. */
			DEBUG("%p: unable to get MP <-> MR association",
			      (void *)txq);
			/* Clean up TX element. */
			elt->buf = NULL;
			goto stop;
		}
		/* Update SGE. */
		sge->addr = rte_pktmbuf_mtod(buf, uintptr_t);
		if (txq->priv->vf)
			rte_prefetch0((volatile void *)
				      (uintptr_t)sge->addr);
		sge->length = DATA_LEN(buf);
		sge->lkey = lkey;
		sent_size += sge->length;
		buf = NEXT(buf);
	}
	/* If buf is not NULL here and is not going to be linearized,
	 * nb_segs is not valid. */
	assert(j == segs);
	assert((buf == NULL) || (linearize));
	/* Linearize extra segments. */
	if (linearize) {
		struct ibv_sge *sge = &(*sges)[segs];
		linear_t *linear = &(*txq->elts_linear)[elts_head];
		unsigned int size = linearize_mbuf(linear, buf);

		assert(segs == (RTE_DIM(*sges) - 1));
		if (size == 0) {
			/* Invalid packet. */
			DEBUG("%p: packet too large to be linearized.",
			      (void *)txq);
			/* Clean up TX element. */
			elt->buf = NULL;
			goto stop;
		}
		/* If MLX5_PMD_SGE_WR_N is 1, free mbuf immediately. */
		if (RTE_DIM(*sges) == 1) {
			do {
				struct rte_mbuf *next = NEXT(buf);

				rte_pktmbuf_free_seg(buf);
				buf = next;
			} while (buf != NULL);
			elt->buf = NULL;
		}
		/* Update SGE. */
		sge->addr = (uintptr_t)&(*linear)[0];
		sge->length = size;
		sge->lkey = txq->mr_linear->lkey;
		sent_size += size;
		/* Include last segment. */
		segs++;
	}
	return (struct tx_burst_sg_ret){
		.length = sent_size,
		.num = segs,
	};
stop:
	return (struct tx_burst_sg_ret){
		.length = -1,
		.num = -1,
	};
}

#endif /* MLX5_PMD_SGE_WR_N > 1 */

/**
 * DPDK callback for TX.
 *
 * @param dpdk_txq
 *   Generic pointer to TX queue structure.
 * @param[in] pkts
 *   Packets to transmit.
 * @param pkts_n
 *   Number of packets in array.
 *
 * @return
 *   Number of packets successfully transmitted (<= pkts_n).
 */
uint16_t
mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
{
	struct txq *txq = (struct txq *)dpdk_txq;
	unsigned int elts_head = txq->elts_head;
	const unsigned int elts_n = txq->elts_n;
	unsigned int elts_comp_cd = txq->elts_comp_cd;
	unsigned int elts_comp = 0;
	unsigned int i;
	unsigned int max;
	int err;
	struct rte_mbuf *buf = pkts[0];

	assert(elts_comp_cd != 0);
	/* Prefetch first packet cacheline. */
	rte_prefetch0(buf);
	txq_complete(txq);
	max = (elts_n - (elts_head - txq->elts_tail));
	if (max > elts_n)
		max -= elts_n;
	assert(max >= 1);
	assert(max <= elts_n);
	/* Always leave one free entry in the ring. */
	--max;
	if (max == 0)
		return 0;
	if (max > pkts_n)
		max = pkts_n;
	for (i = 0; (i != max); ++i) {
		struct rte_mbuf *buf_next = pkts[i + 1];
		unsigned int elts_head_next =
			(((elts_head + 1) == elts_n) ? 0 : elts_head + 1);
		struct txq_elt *elt = &(*txq->elts)[elts_head];
		unsigned int segs = NB_SEGS(buf);
#ifdef MLX5_PMD_SOFT_COUNTERS
		unsigned int sent_size = 0;
#endif
		uint32_t send_flags = 0;
#ifdef HAVE_VERBS_VLAN_INSERTION
		int insert_vlan = 0;
#endif /* HAVE_VERBS_VLAN_INSERTION */

		if (i + 1 < max)
			rte_prefetch0(buf_next);
		/* Request TX completion. */
		if (unlikely(--elts_comp_cd == 0)) {
			elts_comp_cd = txq->elts_comp_cd_init;
			++elts_comp;
			send_flags |= IBV_EXP_QP_BURST_SIGNALED;
		}
		/* Should we enable HW CKSUM offload */
		if (buf->ol_flags &
		    (PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM)) {
			send_flags |= IBV_EXP_QP_BURST_IP_CSUM;
			/* HW does not support checksum offloads at arbitrary
			 * offsets but automatically recognizes the packet
			 * type. For inner L3/L4 checksums, only VXLAN (UDP)
			 * tunnels are currently supported. */
			if (RTE_ETH_IS_TUNNEL_PKT(buf->packet_type))
				send_flags |= IBV_EXP_QP_BURST_TUNNEL;
		}
		if (buf->ol_flags & PKT_TX_VLAN_PKT) {
#ifdef HAVE_VERBS_VLAN_INSERTION
			if (!txq->priv->mps)
				insert_vlan = 1;
			else
#endif /* HAVE_VERBS_VLAN_INSERTION */
			{
				err = insert_vlan_sw(buf);
				if (unlikely(err))
					goto stop;
			}
		}
		if (likely(segs == 1)) {
			uintptr_t addr;
			uint32_t length;
			uint32_t lkey;
			uintptr_t buf_next_addr;

			/* Retrieve buffer information. */
			addr = rte_pktmbuf_mtod(buf, uintptr_t);
			length = DATA_LEN(buf);
			/* Update element. */
			elt->buf = buf;
			if (txq->priv->vf)
				rte_prefetch0((volatile void *)
					      (uintptr_t)addr);
			/* Prefetch next buffer data. */
			if (i + 1 < max) {
				buf_next_addr =
					rte_pktmbuf_mtod(buf_next, uintptr_t);
				rte_prefetch0((volatile void *)
					      (uintptr_t)buf_next_addr);
			}
			/* Put packet into send queue. */
#if MLX5_PMD_MAX_INLINE > 0
			if (length <= txq->max_inline) {
#ifdef HAVE_VERBS_VLAN_INSERTION
				if (insert_vlan)
					err = txq->send_pending_inline_vlan
						(txq->qp,
						 (void *)addr,
						 length,
						 send_flags,
						 &buf->vlan_tci);
				else
#endif /* HAVE_VERBS_VLAN_INSERTION */
					err = txq->send_pending_inline
						(txq->qp,
						 (void *)addr,
						 length,
						 send_flags);
			} else
#endif
			{
				/* Retrieve Memory Region key for this
				 * memory pool. */
				lkey = txq_mp2mr(txq, txq_mb2mp(buf));
				if (unlikely(lkey == (uint32_t)-1)) {
					/* MR does not exist. */
					DEBUG("%p: unable to get MP <-> MR"
					      " association", (void *)txq);
					/* Clean up TX element. */
					elt->buf = NULL;
					goto stop;
				}
#ifdef HAVE_VERBS_VLAN_INSERTION
				if (insert_vlan)
					err = txq->send_pending_vlan
						(txq->qp,
						 addr,
						 length,
						 lkey,
						 send_flags,
						 &buf->vlan_tci);
				else
#endif /* HAVE_VERBS_VLAN_INSERTION */
					err = txq->send_pending
						(txq->qp,
						 addr,
						 length,
						 lkey,
						 send_flags);
			}
			if (unlikely(err))
				goto stop;
#ifdef MLX5_PMD_SOFT_COUNTERS
			sent_size += length;
#endif
		} else {
#if MLX5_PMD_SGE_WR_N > 1
			struct ibv_sge sges[MLX5_PMD_SGE_WR_N];
			struct tx_burst_sg_ret ret;

			ret = tx_burst_sg(txq, segs, elt, buf, elts_head,
					  &sges);
			if (ret.length == (unsigned int)-1)
				goto stop;
			/* Put SG list into send queue. */
#ifdef HAVE_VERBS_VLAN_INSERTION
			if (insert_vlan)
				err = txq->send_pending_sg_list_vlan
					(txq->qp,
					 sges,
					 ret.num,
					 send_flags,
					 &buf->vlan_tci);
			else
#endif /* HAVE_VERBS_VLAN_INSERTION */
				err = txq->send_pending_sg_list
					(txq->qp,
					 sges,
					 ret.num,
					 send_flags);
			if (unlikely(err))
				goto stop;
#ifdef MLX5_PMD_SOFT_COUNTERS
			sent_size += ret.length;
#endif
#else /* MLX5_PMD_SGE_WR_N > 1 */
			DEBUG("%p: TX scattered buffers support not"
			      " compiled in", (void *)txq);
			goto stop;
#endif /* MLX5_PMD_SGE_WR_N > 1 */
		}
		elts_head = elts_head_next;
		buf = buf_next;
#ifdef MLX5_PMD_SOFT_COUNTERS
		/* Increment sent bytes counter. */
		txq->stats.obytes += sent_size;
#endif
	}
stop:
	/* Take a shortcut if nothing must be sent. */
	if (unlikely(i == 0))
		return 0;
#ifdef MLX5_PMD_SOFT_COUNTERS
	/* Increment sent packets counter. */
	txq->stats.opackets += i;
#endif
	/* Ring QP doorbell. */
	err = txq->send_flush(txq->qp);
	if (unlikely(err)) {
		/* A nonzero value is not supposed to be returned.
		 * Nothing can be done about it. */
		DEBUG("%p: send_flush() failed with error %d",
		      (void *)txq, err);
	}
	txq->elts_head = elts_head;
	txq->elts_comp += elts_comp;
	txq->elts_comp_cd = elts_comp_cd;
	return i;
}

/**
 * Translate RX completion flags to packet type.
 *
 * @param flags
 *   RX completion flags returned by poll_length_flags().
 *
 * @note: fix mlx5_dev_supported_ptypes_get() if any change here.
 *
 * @return
 *   Packet type for struct rte_mbuf.
 */
static inline uint32_t
rxq_cq_to_pkt_type(uint32_t flags)
{
	uint32_t pkt_type;

	if (flags & IBV_EXP_CQ_RX_TUNNEL_PACKET)
		pkt_type =
			TRANSPOSE(flags,
				  IBV_EXP_CQ_RX_OUTER_IPV4_PACKET,
				  RTE_PTYPE_L3_IPV4) |
			TRANSPOSE(flags,
				  IBV_EXP_CQ_RX_OUTER_IPV6_PACKET,
				  RTE_PTYPE_L3_IPV6) |
			TRANSPOSE(flags,
				  IBV_EXP_CQ_RX_IPV4_PACKET,
				  RTE_PTYPE_INNER_L3_IPV4) |
			TRANSPOSE(flags,
				  IBV_EXP_CQ_RX_IPV6_PACKET,
				  RTE_PTYPE_INNER_L3_IPV6);
	else
		pkt_type =
			TRANSPOSE(flags,
				  IBV_EXP_CQ_RX_IPV4_PACKET,
				  RTE_PTYPE_L3_IPV4) |
			TRANSPOSE(flags,
				  IBV_EXP_CQ_RX_IPV6_PACKET,
				  RTE_PTYPE_L3_IPV6);
	return pkt_type;
}
Пример #15
0
static uint16_t
schedule_enqueue(void *qp, struct rte_crypto_op **ops, uint16_t nb_ops)
{
	struct scheduler_qp_ctx *qp_ctx = qp;
	struct psd_scheduler_qp_ctx *psd_qp_ctx = qp_ctx->private_qp_ctx;
	struct rte_crypto_op *sched_ops[NB_PKT_SIZE_SLAVES][nb_ops];
	struct scheduler_session *sess;
	uint32_t in_flight_ops[NB_PKT_SIZE_SLAVES] = {
			psd_qp_ctx->primary_slave.nb_inflight_cops,
			psd_qp_ctx->secondary_slave.nb_inflight_cops
	};
	struct psd_schedule_op enq_ops[NB_PKT_SIZE_SLAVES] = {
		{PRIMARY_SLAVE_IDX, 0}, {SECONDARY_SLAVE_IDX, 0}
	};
	struct psd_schedule_op *p_enq_op;
	uint16_t i, processed_ops_pri = 0, processed_ops_sec = 0;
	uint32_t job_len;

	if (unlikely(nb_ops == 0))
		return 0;

	for (i = 0; i < nb_ops && i < 4; i++) {
		rte_prefetch0(ops[i]->sym);
		rte_prefetch0(ops[i]->sym->session);
	}

	for (i = 0; (i < (nb_ops - 8)) && (nb_ops > 8); i += 4) {
		rte_prefetch0(ops[i + 4]->sym);
		rte_prefetch0(ops[i + 4]->sym->session);
		rte_prefetch0(ops[i + 5]->sym);
		rte_prefetch0(ops[i + 5]->sym->session);
		rte_prefetch0(ops[i + 6]->sym);
		rte_prefetch0(ops[i + 6]->sym->session);
		rte_prefetch0(ops[i + 7]->sym);
		rte_prefetch0(ops[i + 7]->sym->session);

		sess = (struct scheduler_session *)
				ops[i]->sym->session->_private;
		/* job_len is initialized as cipher data length, once
		 * it is 0, equals to auth data length
		 */
		job_len = ops[i]->sym->cipher.data.length;
		job_len += (ops[i]->sym->cipher.data.length == 0) *
				ops[i]->sym->auth.data.length;
		/* decide the target op based on the job length */
		p_enq_op = &enq_ops[!(job_len & psd_qp_ctx->threshold)];

		/* stop schedule cops before the queue is full, this shall
		 * prevent the failed enqueue
		 */
		if (p_enq_op->pos + in_flight_ops[p_enq_op->slave_idx] ==
				qp_ctx->max_nb_objs) {
			i = nb_ops;
			break;
		}

		sched_ops[p_enq_op->slave_idx][p_enq_op->pos] = ops[i];
		ops[i]->sym->session = sess->sessions[p_enq_op->slave_idx];
		p_enq_op->pos++;

		sess = (struct scheduler_session *)
				ops[i+1]->sym->session->_private;
		job_len = ops[i+1]->sym->cipher.data.length;
		job_len += (ops[i+1]->sym->cipher.data.length == 0) *
				ops[i+1]->sym->auth.data.length;
		p_enq_op = &enq_ops[!(job_len & psd_qp_ctx->threshold)];

		if (p_enq_op->pos + in_flight_ops[p_enq_op->slave_idx] ==
				qp_ctx->max_nb_objs) {
			i = nb_ops;
			break;
		}

		sched_ops[p_enq_op->slave_idx][p_enq_op->pos] = ops[i+1];
		ops[i+1]->sym->session = sess->sessions[p_enq_op->slave_idx];
		p_enq_op->pos++;

		sess = (struct scheduler_session *)
				ops[i+2]->sym->session->_private;
		job_len = ops[i+2]->sym->cipher.data.length;
		job_len += (ops[i+2]->sym->cipher.data.length == 0) *
				ops[i+2]->sym->auth.data.length;
		p_enq_op = &enq_ops[!(job_len & psd_qp_ctx->threshold)];

		if (p_enq_op->pos + in_flight_ops[p_enq_op->slave_idx] ==
				qp_ctx->max_nb_objs) {
			i = nb_ops;
			break;
		}

		sched_ops[p_enq_op->slave_idx][p_enq_op->pos] = ops[i+2];
		ops[i+2]->sym->session = sess->sessions[p_enq_op->slave_idx];
		p_enq_op->pos++;

		sess = (struct scheduler_session *)
				ops[i+3]->sym->session->_private;

		job_len = ops[i+3]->sym->cipher.data.length;
		job_len += (ops[i+3]->sym->cipher.data.length == 0) *
				ops[i+3]->sym->auth.data.length;
		p_enq_op = &enq_ops[!(job_len & psd_qp_ctx->threshold)];

		if (p_enq_op->pos + in_flight_ops[p_enq_op->slave_idx] ==
				qp_ctx->max_nb_objs) {
			i = nb_ops;
			break;
		}

		sched_ops[p_enq_op->slave_idx][p_enq_op->pos] = ops[i+3];
		ops[i+3]->sym->session = sess->sessions[p_enq_op->slave_idx];
		p_enq_op->pos++;
	}

	for (; i < nb_ops; i++) {
		sess = (struct scheduler_session *)
				ops[i]->sym->session->_private;

		job_len = ops[i]->sym->cipher.data.length;
		job_len += (ops[i]->sym->cipher.data.length == 0) *
				ops[i]->sym->auth.data.length;
		p_enq_op = &enq_ops[!(job_len & psd_qp_ctx->threshold)];

		if (p_enq_op->pos + in_flight_ops[p_enq_op->slave_idx] ==
				qp_ctx->max_nb_objs) {
			i = nb_ops;
			break;
		}

		sched_ops[p_enq_op->slave_idx][p_enq_op->pos] = ops[i];
		ops[i]->sym->session = sess->sessions[p_enq_op->slave_idx];
		p_enq_op->pos++;
	}

	processed_ops_pri = rte_cryptodev_enqueue_burst(
			psd_qp_ctx->primary_slave.dev_id,
			psd_qp_ctx->primary_slave.qp_id,
			sched_ops[PRIMARY_SLAVE_IDX],
			enq_ops[PRIMARY_SLAVE_IDX].pos);
	/* enqueue shall not fail as the slave queue is monitored */
	RTE_ASSERT(processed_ops_pri == enq_ops[PRIMARY_SLAVE_IDX].pos);

	psd_qp_ctx->primary_slave.nb_inflight_cops += processed_ops_pri;

	processed_ops_sec = rte_cryptodev_enqueue_burst(
			psd_qp_ctx->secondary_slave.dev_id,
			psd_qp_ctx->secondary_slave.qp_id,
			sched_ops[SECONDARY_SLAVE_IDX],
			enq_ops[SECONDARY_SLAVE_IDX].pos);
	RTE_ASSERT(processed_ops_sec == enq_ops[SECONDARY_SLAVE_IDX].pos);

	psd_qp_ctx->secondary_slave.nb_inflight_cops += processed_ops_sec;

	return processed_ops_pri + processed_ops_sec;
}
Пример #16
0
static uint16_t bnxt_rx_pkt(struct rte_mbuf **rx_pkt,
			    struct bnxt_rx_queue *rxq, uint32_t *raw_cons)
{
	struct bnxt_cp_ring_info *cpr = rxq->cp_ring;
	struct bnxt_rx_ring_info *rxr = rxq->rx_ring;
	struct rx_pkt_cmpl *rxcmp;
	struct rx_pkt_cmpl_hi *rxcmp1;
	uint32_t tmp_raw_cons = *raw_cons;
	uint16_t cons, prod, cp_cons =
	    RING_CMP(cpr->cp_ring_struct, tmp_raw_cons);
	struct bnxt_sw_rx_bd *rx_buf;
	struct rte_mbuf *mbuf;
	int rc = 0;

	rxcmp = (struct rx_pkt_cmpl *)
	    &cpr->cp_desc_ring[cp_cons];

	tmp_raw_cons = NEXT_RAW_CMP(tmp_raw_cons);
	cp_cons = RING_CMP(cpr->cp_ring_struct, tmp_raw_cons);
	rxcmp1 = (struct rx_pkt_cmpl_hi *)&cpr->cp_desc_ring[cp_cons];

	if (!CMP_VALID(rxcmp1, tmp_raw_cons, cpr->cp_ring_struct))
		return -EBUSY;

	prod = rxr->rx_prod;

	/* EW - GRO deferred to phase 3 */
	cons = rxcmp->opaque;
	rx_buf = &rxr->rx_buf_ring[cons];
	mbuf = rx_buf->mbuf;
	rte_prefetch0(mbuf);

	mbuf->nb_segs = 1;
	mbuf->next = NULL;
	mbuf->pkt_len = rxcmp->len;
	mbuf->data_len = mbuf->pkt_len;
	mbuf->port = rxq->port_id;
	mbuf->ol_flags = 0;
	if (rxcmp->flags_type & RX_PKT_CMPL_FLAGS_RSS_VALID) {
		mbuf->hash.rss = rxcmp->rss_hash;
		mbuf->ol_flags |= PKT_RX_RSS_HASH;
	} else {
		mbuf->hash.fdir.id = rxcmp1->cfa_code;
		mbuf->ol_flags |= PKT_RX_FDIR | PKT_RX_FDIR_ID;
	}
	if (rxcmp1->flags2 & RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN) {
		mbuf->vlan_tci = rxcmp1->metadata &
			(RX_PKT_CMPL_METADATA_VID_MASK |
			RX_PKT_CMPL_METADATA_DE |
			RX_PKT_CMPL_METADATA_PRI_MASK);
		mbuf->ol_flags |= PKT_RX_VLAN_PKT;
	}

	rx_buf->mbuf = NULL;
	if (rxcmp1->errors_v2 & RX_CMP_L2_ERRORS) {
		/* Re-install the mbuf back to the rx ring */
		bnxt_reuse_rx_mbuf(rxr, cons, mbuf);

		rc = -EIO;
		goto next_rx;
	}
	/*
	 * TODO: Redesign this....
	 * If the allocation fails, the packet does not get received.
	 * Simply returning this will result in slowly falling behind
	 * on the producer ring buffers.
	 * Instead, "filling up" the producer just before ringing the
	 * doorbell could be a better solution since it will let the
	 * producer ring starve until memory is available again pushing
	 * the drops into hardware and getting them out of the driver
	 * allowing recovery to a full producer ring.
	 *
	 * This could also help with cache usage by preventing per-packet
	 * calls in favour of a tight loop with the same function being called
	 * in it.
	 */
	if (bnxt_alloc_rx_data(rxq, rxr, prod)) {
		RTE_LOG(ERR, PMD, "mbuf alloc failed with prod=0x%x\n", prod);
		rc = -ENOMEM;
		goto next_rx;
	}

	/*
	 * All MBUFs are allocated with the same size under DPDK,
	 * no optimization for rx_copy_thresh
	 */

	/* AGG buf operation is deferred */

	/* EW - VLAN reception.  Must compare against the ol_flags */

	*rx_pkt = mbuf;
next_rx:
	rxr->rx_prod = RING_NEXT(rxr->rx_ring_struct, prod);

	*raw_cons = tmp_raw_cons;

	return rc;
}
Пример #17
0
/**
 * DPDK callback for RX.
 *
 * The following function is the same as mlx5_rx_burst_sp(), except it doesn't
 * manage scattered packets. Improves performance when MRU is lower than the
 * size of the first segment.
 *
 * @param dpdk_rxq
 *   Generic pointer to RX queue structure.
 * @param[out] pkts
 *   Array to store received packets.
 * @param pkts_n
 *   Maximum number of packets in array.
 *
 * @return
 *   Number of packets successfully received (<= pkts_n).
 */
uint16_t
mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
{
	struct rxq *rxq = (struct rxq *)dpdk_rxq;
	struct rxq_elt (*elts)[rxq->elts_n] = rxq->elts.no_sp;
	const unsigned int elts_n = rxq->elts_n;
	unsigned int elts_head = rxq->elts_head;
	struct ibv_sge sges[pkts_n];
	unsigned int i;
	unsigned int pkts_ret = 0;
	int ret;

	if (unlikely(rxq->sp))
		return mlx5_rx_burst_sp(dpdk_rxq, pkts, pkts_n);
	for (i = 0; (i != pkts_n); ++i) {
		struct rxq_elt *elt = &(*elts)[elts_head];
		unsigned int len;
		struct rte_mbuf *seg = elt->buf;
		struct rte_mbuf *rep;
		uint32_t flags;
		uint16_t vlan_tci;

		/* Sanity checks. */
		assert(seg != NULL);
		assert(elts_head < rxq->elts_n);
		assert(rxq->elts_head < rxq->elts_n);
		/*
		 * Fetch initial bytes of packet descriptor into a
		 * cacheline while allocating rep.
		 */
		rte_prefetch0(seg);
		rte_prefetch0(&seg->cacheline1);
		ret = rxq->poll(rxq->cq, NULL, NULL, &flags, &vlan_tci);
		if (unlikely(ret < 0)) {
			struct ibv_wc wc;
			int wcs_n;

			DEBUG("rxq=%p, poll_length() failed (ret=%d)",
			      (void *)rxq, ret);
			/* ibv_poll_cq() must be used in case of failure. */
			wcs_n = ibv_poll_cq(rxq->cq, 1, &wc);
			if (unlikely(wcs_n == 0))
				break;
			if (unlikely(wcs_n < 0)) {
				DEBUG("rxq=%p, ibv_poll_cq() failed (wcs_n=%d)",
				      (void *)rxq, wcs_n);
				break;
			}
			assert(wcs_n == 1);
			if (unlikely(wc.status != IBV_WC_SUCCESS)) {
				/* Whatever, just repost the offending WR. */
				DEBUG("rxq=%p, wr_id=%" PRIu64 ": bad work"
				      " completion status (%d): %s",
				      (void *)rxq, wc.wr_id, wc.status,
				      ibv_wc_status_str(wc.status));
#ifdef MLX5_PMD_SOFT_COUNTERS
				/* Increment dropped packets counter. */
				++rxq->stats.idropped;
#endif
				/* Add SGE to array for repost. */
				sges[i] = elt->sge;
				goto repost;
			}
			ret = wc.byte_len;
		}
		if (ret == 0)
			break;
		assert(ret >= (rxq->crc_present << 2));
		len = ret - (rxq->crc_present << 2);
		rep = __rte_mbuf_raw_alloc(rxq->mp);
		if (unlikely(rep == NULL)) {
			/*
			 * Unable to allocate a replacement mbuf,
			 * repost WR.
			 */
			DEBUG("rxq=%p: can't allocate a new mbuf",
			      (void *)rxq);
			/* Increment out of memory counters. */
			++rxq->stats.rx_nombuf;
			++rxq->priv->dev->data->rx_mbuf_alloc_failed;
			goto repost;
		}

		/* Reconfigure sge to use rep instead of seg. */
		elt->sge.addr = (uintptr_t)rep->buf_addr + RTE_PKTMBUF_HEADROOM;
		assert(elt->sge.lkey == rxq->mr->lkey);
		elt->buf = rep;

		/* Add SGE to array for repost. */
		sges[i] = elt->sge;

		/* Update seg information. */
		SET_DATA_OFF(seg, RTE_PKTMBUF_HEADROOM);
		NB_SEGS(seg) = 1;
		PORT(seg) = rxq->port_id;
		NEXT(seg) = NULL;
		PKT_LEN(seg) = len;
		DATA_LEN(seg) = len;
		if (rxq->csum | rxq->csum_l2tun | rxq->vlan_strip) {
			seg->packet_type = rxq_cq_to_pkt_type(flags);
			seg->ol_flags = rxq_cq_to_ol_flags(rxq, flags);
#ifdef HAVE_EXP_DEVICE_ATTR_VLAN_OFFLOADS
			if (flags & IBV_EXP_CQ_RX_CVLAN_STRIPPED_V1) {
				seg->ol_flags |= PKT_RX_VLAN_PKT;
				seg->vlan_tci = vlan_tci;
			}
#endif /* HAVE_EXP_DEVICE_ATTR_VLAN_OFFLOADS */
		}
		/* Return packet. */
		*(pkts++) = seg;
		++pkts_ret;
#ifdef MLX5_PMD_SOFT_COUNTERS
		/* Increment bytes counter. */
		rxq->stats.ibytes += len;
#endif
repost:
		if (++elts_head >= elts_n)
			elts_head = 0;
		continue;
	}
	if (unlikely(i == 0))
		return 0;
	/* Repost WRs. */
#ifdef DEBUG_RECV
	DEBUG("%p: reposting %u WRs", (void *)rxq, i);
#endif
	ret = rxq->recv(rxq->wq, sges, i);
	if (unlikely(ret)) {
		/* Inability to repost WRs is fatal. */
		DEBUG("%p: recv_burst(): failed (ret=%d)",
		      (void *)rxq->priv,
		      ret);
		abort();
	}
	rxq->elts_head = elts_head;
#ifdef MLX5_PMD_SOFT_COUNTERS
	/* Increment packets counter. */
	rxq->stats.ipackets += pkts_ret;
#endif
	return pkts_ret;
}
Пример #18
0
virtio_dev_rx(struct virtio_net *dev, struct rte_mbuf **pkts, uint32_t count)
{
	struct vhost_virtqueue *vq;
	struct vring_desc *desc;
	struct rte_mbuf *buff;
	/* The virtio_hdr is initialised to 0. */
	struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0,0,0,0,0,0},0};
	uint64_t buff_addr = 0;
	uint64_t buff_hdr_addr = 0;
	uint32_t head[MAX_PKT_BURST], packet_len = 0;
	uint32_t head_idx, packet_success = 0;
	uint16_t avail_idx, res_cur_idx;
	uint16_t res_base_idx, res_end_idx;
	uint16_t free_entries;
	uint8_t success = 0;

	LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_rx()\n", dev->device_fh);
	vq = dev->virtqueue_rx;
	count = (count > MAX_PKT_BURST) ? MAX_PKT_BURST : count;
	/* As many data cores may want access to available buffers, they need to be reserved. */
	do {

		res_base_idx = vq->last_used_idx_res;

		avail_idx = *((volatile uint16_t *)&vq->avail->idx);

		free_entries = (avail_idx - res_base_idx);

		/*check that we have enough buffers*/
		if (unlikely(count > free_entries))
			count = free_entries;

		if (count == 0)
			return 0;

		res_end_idx = res_base_idx + count;
		/* vq->last_used_idx_res is atomically updated. */
		success = rte_atomic16_cmpset(&vq->last_used_idx_res, res_base_idx,
									res_end_idx);
	} while (unlikely(success == 0));
	res_cur_idx = res_base_idx;
	LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Current Index %d| End Index %d\n", dev->device_fh, res_cur_idx, res_end_idx);

	/* Prefetch available ring to retrieve indexes. */
	rte_prefetch0(&vq->avail->ring[res_cur_idx & (vq->size - 1)]);

	/* Retrieve all of the head indexes first to avoid caching issues. */
	for (head_idx = 0; head_idx < count; head_idx++)
		head[head_idx] = vq->avail->ring[(res_cur_idx + head_idx) & (vq->size - 1)];

	/*Prefetch descriptor index. */
	rte_prefetch0(&vq->desc[head[packet_success]]);

	while (res_cur_idx != res_end_idx) {
		/* Get descriptor from available ring */
		desc = &vq->desc[head[packet_success]];
		/* Prefetch descriptor address. */
		rte_prefetch0(desc);

		buff = pkts[packet_success];

		/* Convert from gpa to vva (guest physical addr -> vhost virtual addr) */
		buff_addr = gpa_to_vva(dev, desc->addr);
		/* Prefetch buffer address. */
		rte_prefetch0((void*)(uintptr_t)buff_addr);

		{
			/* Copy virtio_hdr to packet and increment buffer address */
			buff_hdr_addr = buff_addr;
			packet_len = rte_pktmbuf_data_len(buff) + vq->vhost_hlen;

			/*
			 * If the descriptors are chained the header and data are placed in
			 * separate buffers.
			 */
			if (desc->flags & VRING_DESC_F_NEXT) {
				desc->len = vq->vhost_hlen;
				desc = &vq->desc[desc->next];
				/* Buffer address translation. */
				buff_addr = gpa_to_vva(dev, desc->addr);
				desc->len = rte_pktmbuf_data_len(buff);
			} else {
				buff_addr += vq->vhost_hlen;
				desc->len = packet_len;
			}
		}

		/* Update used ring with desc information */
		vq->used->ring[res_cur_idx & (vq->size - 1)].id = head[packet_success];
		vq->used->ring[res_cur_idx & (vq->size - 1)].len = packet_len;

		/* Copy mbuf data to buffer */
		rte_memcpy((void *)(uintptr_t)buff_addr, (const void*)buff->pkt.data, rte_pktmbuf_data_len(buff));

		res_cur_idx++;
		packet_success++;

		/* mergeable is disabled then a header is required per buffer. */
		rte_memcpy((void *)(uintptr_t)buff_hdr_addr, (const void*)&virtio_hdr, vq->vhost_hlen);
		if (res_cur_idx < res_end_idx) {
			/* Prefetch descriptor index. */
			rte_prefetch0(&vq->desc[head[packet_success]]);
		}
	}

	rte_compiler_barrier();

	/* Wait until it's our turn to add our buffer to the used ring. */
	while (unlikely(vq->last_used_idx != res_base_idx))
		rte_pause();

	*(volatile uint16_t *)&vq->used->idx += count;

	vq->last_used_idx = res_end_idx;

	return count;
}
Пример #19
0
static uint16_t
bnx2x_recv_pkts(void *p_rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
{
	struct bnx2x_rx_queue *rxq = p_rxq;
	struct bnx2x_softc *sc = rxq->sc;
	struct bnx2x_fastpath *fp = &sc->fp[rxq->queue_id];
	uint32_t nb_rx = 0;
	uint16_t hw_cq_cons, sw_cq_cons, sw_cq_prod;
	uint16_t bd_cons, bd_prod;
	struct rte_mbuf *new_mb;
	uint16_t rx_pref;
	struct eth_fast_path_rx_cqe *cqe_fp;
	uint16_t len, pad;
	struct rte_mbuf *rx_mb = NULL;

	hw_cq_cons = le16toh(*fp->rx_cq_cons_sb);
	if ((hw_cq_cons & USABLE_RCQ_ENTRIES_PER_PAGE) ==
			USABLE_RCQ_ENTRIES_PER_PAGE) {
		++hw_cq_cons;
	}

	bd_cons = rxq->rx_bd_head;
	bd_prod = rxq->rx_bd_tail;
	sw_cq_cons = rxq->rx_cq_head;
	sw_cq_prod = rxq->rx_cq_tail;

	if (sw_cq_cons == hw_cq_cons)
		return 0;

	while (nb_rx < nb_pkts && sw_cq_cons != hw_cq_cons) {

		bd_prod &= MAX_RX_BD(rxq);
		bd_cons &= MAX_RX_BD(rxq);

		cqe_fp = &rxq->cq_ring[sw_cq_cons & MAX_RX_BD(rxq)].fast_path_cqe;

		if (unlikely(CQE_TYPE_SLOW(cqe_fp->type_error_flags & ETH_FAST_PATH_RX_CQE_TYPE))) {
			PMD_RX_LOG(ERR, "slowpath event during traffic processing");
			break;
		}

		if (unlikely(cqe_fp->type_error_flags & ETH_FAST_PATH_RX_CQE_PHY_DECODE_ERR_FLG)) {
			PMD_RX_LOG(ERR, "flags 0x%x rx packet %u",
					cqe_fp->type_error_flags, sw_cq_cons);
			goto next_rx;
		}

		len = cqe_fp->pkt_len_or_gro_seg_len;
		pad = cqe_fp->placement_offset;

		new_mb = bnx2x_rxmbuf_alloc(rxq->mb_pool);
		if (unlikely(!new_mb)) {
			PMD_RX_LOG(ERR, "mbuf alloc fail fp[%02d]", fp->index);
			goto next_rx;
		}

		rx_mb = rxq->sw_ring[bd_cons];
		rxq->sw_ring[bd_cons] = new_mb;
		rxq->rx_ring[bd_prod] = new_mb->buf_physaddr;

		rx_pref = NEXT_RX_BD(bd_cons) & MAX_RX_BD(rxq);
		rte_prefetch0(rxq->sw_ring[rx_pref]);
		if ((rx_pref & 0x3) == 0) {
			rte_prefetch0(&rxq->rx_ring[rx_pref]);
			rte_prefetch0(&rxq->sw_ring[rx_pref]);
		}

		rx_mb->data_off = pad;
		rx_mb->nb_segs = 1;
		rx_mb->next = NULL;
		rx_mb->pkt_len = rx_mb->data_len = len;
		rx_mb->port = rxq->port_id;
		rx_mb->buf_len = len + pad;
		rte_prefetch1(rte_pktmbuf_mtod(rx_mb, void *));

		/*
		 * If we received a packet with a vlan tag,
		 * attach that information to the packet.
		 */
		if (cqe_fp->pars_flags.flags & PARSING_FLAGS_VLAN) {
			rx_mb->vlan_tci = cqe_fp->vlan_tag;
			rx_mb->ol_flags |= PKT_RX_VLAN_PKT;
		}

		rx_pkts[nb_rx] = rx_mb;
		nb_rx++;

		/* limit spinning on the queue */
		if (unlikely(nb_rx == sc->rx_budget)) {
			PMD_RX_LOG(ERR, "Limit spinning on the queue");
			break;
		}

next_rx:
		bd_cons    = NEXT_RX_BD(bd_cons);
		bd_prod    = NEXT_RX_BD(bd_prod);
		sw_cq_prod = NEXT_RCQ_IDX(sw_cq_prod);
		sw_cq_cons = NEXT_RCQ_IDX(sw_cq_cons);
	}
	rxq->rx_bd_head = bd_cons;
	rxq->rx_bd_tail = bd_prod;
	rxq->rx_cq_head = sw_cq_cons;
	rxq->rx_cq_tail = sw_cq_prod;

	bnx2x_upd_rx_prod_fast(sc, fp, bd_prod, sw_cq_prod);

	return nb_rx;
}
Пример #20
0
static inline uint16_t
fm10k_recv_raw_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
		uint16_t nb_pkts, uint8_t *split_packet)
{
	volatile union fm10k_rx_desc *rxdp;
	struct rte_mbuf **mbufp;
	uint16_t nb_pkts_recd;
	int pos;
	struct fm10k_rx_queue *rxq = rx_queue;
	uint64_t var;
	__m128i shuf_msk;
	__m128i dd_check, eop_check;
	uint16_t next_dd;

	next_dd = rxq->next_dd;

	/* Just the act of getting into the function from the application is
	 * going to cost about 7 cycles
	 */
	rxdp = rxq->hw_ring + next_dd;

	rte_prefetch0(rxdp);

	/* See if we need to rearm the RX queue - gives the prefetch a bit
	 * of time to act
	 */
	if (rxq->rxrearm_nb > RTE_FM10K_RXQ_REARM_THRESH)
		fm10k_rxq_rearm(rxq);

	/* Before we start moving massive data around, check to see if
	 * there is actually a packet available
	 */
	if (!(rxdp->d.staterr & FM10K_RXD_STATUS_DD))
		return 0;

	/* Vecotr RX will process 4 packets at a time, strip the unaligned
	 * tails in case it's not multiple of 4.
	 */
	nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, RTE_FM10K_DESCS_PER_LOOP);

	/* 4 packets DD mask */
	dd_check = _mm_set_epi64x(0x0000000100000001LL, 0x0000000100000001LL);

	/* 4 packets EOP mask */
	eop_check = _mm_set_epi64x(0x0000000200000002LL, 0x0000000200000002LL);

	/* mask to shuffle from desc. to mbuf */
	shuf_msk = _mm_set_epi8(
		7, 6, 5, 4,  /* octet 4~7, 32bits rss */
		15, 14,      /* octet 14~15, low 16 bits vlan_macip */
		13, 12,      /* octet 12~13, 16 bits data_len */
		0xFF, 0xFF,  /* skip high 16 bits pkt_len, zero out */
		13, 12,      /* octet 12~13, low 16 bits pkt_len */
		0xFF, 0xFF,  /* skip high 16 bits pkt_type */
		0xFF, 0xFF   /* Skip pkt_type field in shuffle operation */
		);
	/*
	 * Compile-time verify the shuffle mask
	 * NOTE: some field positions already verified above, but duplicated
	 * here for completeness in case of future modifications.
	 */
	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, pkt_len) !=
			offsetof(struct rte_mbuf, rx_descriptor_fields1) + 4);
	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_len) !=
			offsetof(struct rte_mbuf, rx_descriptor_fields1) + 8);
	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, vlan_tci) !=
			offsetof(struct rte_mbuf, rx_descriptor_fields1) + 10);
	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, hash) !=
			offsetof(struct rte_mbuf, rx_descriptor_fields1) + 12);

	/* Cache is empty -> need to scan the buffer rings, but first move
	 * the next 'n' mbufs into the cache
	 */
	mbufp = &rxq->sw_ring[next_dd];

	/* A. load 4 packet in one loop
	 * [A*. mask out 4 unused dirty field in desc]
	 * B. copy 4 mbuf point from swring to rx_pkts
	 * C. calc the number of DD bits among the 4 packets
	 * [C*. extract the end-of-packet bit, if requested]
	 * D. fill info. from desc to mbuf
	 */
	for (pos = 0, nb_pkts_recd = 0; pos < nb_pkts;
			pos += RTE_FM10K_DESCS_PER_LOOP,
			rxdp += RTE_FM10K_DESCS_PER_LOOP) {
		__m128i descs0[RTE_FM10K_DESCS_PER_LOOP];
		__m128i pkt_mb1, pkt_mb2, pkt_mb3, pkt_mb4;
		__m128i zero, staterr, sterr_tmp1, sterr_tmp2;
		__m128i mbp1;
		/* 2 64 bit or 4 32 bit mbuf pointers in one XMM reg. */
#if defined(RTE_ARCH_X86_64)
		__m128i mbp2;
#endif

		/* B.1 load 2 (64 bit) or 4 (32 bit) mbuf points */
		mbp1 = _mm_loadu_si128((__m128i *)&mbufp[pos]);

		/* Read desc statuses backwards to avoid race condition */
		/* A.1 load 4 pkts desc */
		descs0[3] = _mm_loadu_si128((__m128i *)(rxdp + 3));
		rte_compiler_barrier();

		/* B.2 copy 2 64 bit or 4 32 bit mbuf point into rx_pkts */
		_mm_storeu_si128((__m128i *)&rx_pkts[pos], mbp1);

#if defined(RTE_ARCH_X86_64)
		/* B.1 load 2 64 bit mbuf poitns */
		mbp2 = _mm_loadu_si128((__m128i *)&mbufp[pos+2]);
#endif

		descs0[2] = _mm_loadu_si128((__m128i *)(rxdp + 2));
		rte_compiler_barrier();
		/* B.1 load 2 mbuf point */
		descs0[1] = _mm_loadu_si128((__m128i *)(rxdp + 1));
		rte_compiler_barrier();
		descs0[0] = _mm_loadu_si128((__m128i *)(rxdp));

#if defined(RTE_ARCH_X86_64)
		/* B.2 copy 2 mbuf point into rx_pkts  */
		_mm_storeu_si128((__m128i *)&rx_pkts[pos+2], mbp2);
#endif

		/* avoid compiler reorder optimization */
		rte_compiler_barrier();

		if (split_packet) {
			rte_mbuf_prefetch_part2(rx_pkts[pos]);
			rte_mbuf_prefetch_part2(rx_pkts[pos + 1]);
			rte_mbuf_prefetch_part2(rx_pkts[pos + 2]);
			rte_mbuf_prefetch_part2(rx_pkts[pos + 3]);
		}

		/* D.1 pkt 3,4 convert format from desc to pktmbuf */
		pkt_mb4 = _mm_shuffle_epi8(descs0[3], shuf_msk);
		pkt_mb3 = _mm_shuffle_epi8(descs0[2], shuf_msk);

		/* C.1 4=>2 filter staterr info only */
		sterr_tmp2 = _mm_unpackhi_epi32(descs0[3], descs0[2]);
		/* C.1 4=>2 filter staterr info only */
		sterr_tmp1 = _mm_unpackhi_epi32(descs0[1], descs0[0]);

		/* set ol_flags with vlan packet type */
		fm10k_desc_to_olflags_v(descs0, &rx_pkts[pos]);

		/* D.1 pkt 1,2 convert format from desc to pktmbuf */
		pkt_mb2 = _mm_shuffle_epi8(descs0[1], shuf_msk);
		pkt_mb1 = _mm_shuffle_epi8(descs0[0], shuf_msk);

		/* C.2 get 4 pkts staterr value  */
		zero = _mm_xor_si128(dd_check, dd_check);
		staterr = _mm_unpacklo_epi32(sterr_tmp1, sterr_tmp2);

		/* D.3 copy final 3,4 data to rx_pkts */
		_mm_storeu_si128((void *)&rx_pkts[pos+3]->rx_descriptor_fields1,
				pkt_mb4);
		_mm_storeu_si128((void *)&rx_pkts[pos+2]->rx_descriptor_fields1,
				pkt_mb3);

		/* C* extract and record EOP bit */
		if (split_packet) {
			__m128i eop_shuf_mask = _mm_set_epi8(
					0xFF, 0xFF, 0xFF, 0xFF,
					0xFF, 0xFF, 0xFF, 0xFF,
					0xFF, 0xFF, 0xFF, 0xFF,
					0x04, 0x0C, 0x00, 0x08
					);

			/* and with mask to extract bits, flipping 1-0 */
			__m128i eop_bits = _mm_andnot_si128(staterr, eop_check);
			/* the staterr values are not in order, as the count
			 * count of dd bits doesn't care. However, for end of
			 * packet tracking, we do care, so shuffle. This also
			 * compresses the 32-bit values to 8-bit
			 */
			eop_bits = _mm_shuffle_epi8(eop_bits, eop_shuf_mask);
			/* store the resulting 32-bit value */
			*(int *)split_packet = _mm_cvtsi128_si32(eop_bits);
			split_packet += RTE_FM10K_DESCS_PER_LOOP;

			/* zero-out next pointers */
			rx_pkts[pos]->next = NULL;
			rx_pkts[pos + 1]->next = NULL;
			rx_pkts[pos + 2]->next = NULL;
			rx_pkts[pos + 3]->next = NULL;
		}

		/* C.3 calc available number of desc */
		staterr = _mm_and_si128(staterr, dd_check);
		staterr = _mm_packs_epi32(staterr, zero);

		/* D.3 copy final 1,2 data to rx_pkts */
		_mm_storeu_si128((void *)&rx_pkts[pos+1]->rx_descriptor_fields1,
				pkt_mb2);
		_mm_storeu_si128((void *)&rx_pkts[pos]->rx_descriptor_fields1,
				pkt_mb1);

		fm10k_desc_to_pktype_v(descs0, &rx_pkts[pos]);

		/* C.4 calc avaialbe number of desc */
		var = __builtin_popcountll(_mm_cvtsi128_si64(staterr));
		nb_pkts_recd += var;
		if (likely(var != RTE_FM10K_DESCS_PER_LOOP))
			break;
	}

	/* Update our internal tail pointer */
	rxq->next_dd = (uint16_t)(rxq->next_dd + nb_pkts_recd);
	rxq->next_dd = (uint16_t)(rxq->next_dd & (rxq->nb_desc - 1));
	rxq->rxrearm_nb = (uint16_t)(rxq->rxrearm_nb + nb_pkts_recd);

	return nb_pkts_recd;
}
dpdk_virtio_dev_to_vm_tx_burst(struct dpdk_virtio_writer *p,
        vr_dpdk_virtioq_t *vq, struct rte_mbuf **pkts, uint32_t count)
{
    struct vring_desc *desc;
    struct rte_mbuf *buff;
    /* The virtio_hdr is initialised to 0. */
    struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0};
    uint64_t buff_addr = 0;
    uint64_t buff_hdr_addr = 0;
    uint32_t head[VR_DPDK_VIRTIO_TX_BURST_SZ];
    uint32_t head_idx, packet_success = 0;
    uint16_t avail_idx, res_cur_idx;
    uint16_t res_base_idx, res_end_idx;
    uint16_t free_entries;
    uint8_t success = 0;
    vr_uvh_client_t *vru_cl;

    if (unlikely(vq->vdv_ready_state == VQ_NOT_READY))
        return 0;

    vru_cl = vr_dpdk_virtio_get_vif_client(vq->vdv_vif_idx);
    if (unlikely(vru_cl == NULL))
        return 0;

    /*
     * As many data cores may want access to available buffers,
     * they need to be reserved.
     */
    do {
        res_base_idx = vq->vdv_last_used_idx_res;
        avail_idx = *((volatile uint16_t *)&vq->vdv_avail->idx);

        free_entries = (avail_idx - res_base_idx);
        /*check that we have enough buffers*/
        if (unlikely(count > free_entries))
            count = free_entries;

        if (unlikely(count == 0))
            return 0;

        res_end_idx = res_base_idx + count;
        /* vq->vdv_last_used_idx_res is atomically updated. */
        /* TODO: Allow to disable cmpset if no concurrency in application. */
        success = rte_atomic16_cmpset(&vq->vdv_last_used_idx_res,
                res_base_idx, res_end_idx);
    } while (unlikely(success == 0));
    res_cur_idx = res_base_idx;
    RTE_LOG(DEBUG, VROUTER, "%s: Current Index %d| End Index %d\n",
            __func__, res_cur_idx, res_end_idx);

    /* Prefetch available ring to retrieve indexes. */
    rte_prefetch0(&vq->vdv_avail->ring[res_cur_idx & (vq->vdv_size - 1)]);

    /* Retrieve all of the head indexes first to avoid caching issues. */
    for (head_idx = 0; head_idx < count; head_idx++)
        head[head_idx] = vq->vdv_avail->ring[(res_cur_idx + head_idx) &
                    (vq->vdv_size - 1)];

    /* Prefetch descriptor index. */
    rte_prefetch0(&vq->vdv_desc[head[packet_success]]);

    while (res_cur_idx != res_end_idx) {
        uint32_t offset = 0, vb_offset = 0;
        uint32_t pkt_len, len_to_cpy, data_len, total_copied = 0;
        uint8_t hdr = 0, uncompleted_pkt = 0;

        /* Get descriptor from available ring */
        desc = &vq->vdv_desc[head[packet_success]];

        buff = pkts[packet_success];

        /* Convert from gpa to vva (guest physical addr -> vhost virtual addr) */
        buff_addr = (uintptr_t)vr_dpdk_guest_phys_to_host_virt(vru_cl, desc->addr);
        /* Prefetch buffer address. */
        rte_prefetch0((void *)(uintptr_t)buff_addr);

        /* Copy virtio_hdr to packet and increment buffer address */
        buff_hdr_addr = buff_addr;

        /*
         * If the descriptors are chained the header and data are
         * placed in separate buffers.
         */
        if (likely(desc->flags & VRING_DESC_F_NEXT)
            && (desc->len == sizeof(struct virtio_net_hdr))) {
            /*
             * TODO: verify that desc->next is sane below.
             */
            desc = &vq->vdv_desc[desc->next];
            /* Buffer address translation. */
            buff_addr = (uintptr_t)vr_dpdk_guest_phys_to_host_virt(vru_cl, desc->addr);
        } else {
            vb_offset += sizeof(struct virtio_net_hdr);
            hdr = 1;
        }

        pkt_len = rte_pktmbuf_pkt_len(buff);
        data_len = rte_pktmbuf_data_len(buff);
        len_to_cpy = RTE_MIN(data_len,
            hdr ? desc->len - sizeof(struct virtio_net_hdr) : desc->len);
        while (total_copied < pkt_len) {
            /* Copy mbuf data to buffer */
            rte_memcpy((void *)(uintptr_t)(buff_addr + vb_offset),
                rte_pktmbuf_mtod_offset(buff, const void *, offset),
                len_to_cpy);

            offset += len_to_cpy;
            vb_offset += len_to_cpy;
            total_copied += len_to_cpy;

            /* The whole packet completes */
            if (likely(total_copied == pkt_len))
                break;

            /* The current segment completes */
            if (offset == data_len) {
                buff = buff->next;
                offset = 0;
                data_len = rte_pktmbuf_data_len(buff);
            }

            /* The current vring descriptor done */
            if (vb_offset == desc->len) {
                if (desc->flags & VRING_DESC_F_NEXT) {
                    desc = &vq->vdv_desc[desc->next];
                    buff_addr = (uintptr_t)vr_dpdk_guest_phys_to_host_virt(vru_cl, desc->addr);
                    vb_offset = 0;
                } else {
                    /* Room in vring buffer is not enough */
                    uncompleted_pkt = 1;
                    break;
                }
            }
            len_to_cpy = RTE_MIN(data_len - offset, desc->len - vb_offset);
        };

        /* Update used ring with desc information */
        vq->vdv_used->ring[res_cur_idx & (vq->vdv_size - 1)].id =
                            head[packet_success];

        /* Drop the packet if it is uncompleted */
        if (unlikely(uncompleted_pkt == 1))
            vq->vdv_used->ring[res_cur_idx & (vq->vdv_size - 1)].len =
                            sizeof(struct virtio_net_hdr);
        else
            vq->vdv_used->ring[res_cur_idx & (vq->vdv_size - 1)].len =
                            pkt_len + sizeof(struct virtio_net_hdr);

        res_cur_idx++;
        packet_success++;

        /* TODO: in DPDK 2.1 we do not copy the header
        if (unlikely(uncompleted_pkt == 1))
            continue;
        */
        rte_memcpy((void *)(uintptr_t)buff_hdr_addr,
            (const void *)&virtio_hdr, sizeof(struct virtio_net_hdr));

        if (likely(res_cur_idx < res_end_idx)) {
            /* Prefetch descriptor index. */
            rte_prefetch0(&vq->vdv_desc[head[packet_success]]);
        }
    }

    rte_compiler_barrier();

    /* Wait until it's our turn to add our buffer to the used ring. */
    while (unlikely(vq->vdv_last_used_idx != res_base_idx))
        rte_pause();

    *(volatile uint16_t *)&vq->vdv_used->idx += count;
    vq->vdv_last_used_idx = res_end_idx;
    RTE_LOG(DEBUG, VROUTER, "%s: vif %d vq %p last_used_idx %d used->idx %d\n",
            __func__, vq->vdv_vif_idx, vq, vq->vdv_last_used_idx, vq->vdv_used->idx);

    /* flush used->idx update before we read avail->flags. */
    rte_mb();

    /* Kick the guest if necessary. */
    if (unlikely(!(vq->vdv_avail->flags & VRING_AVAIL_F_NO_INTERRUPT))) {
        p->nb_syscalls++;
        eventfd_write(vq->vdv_callfd, 1);
    }
    return count;
}
Пример #22
0
/**
 * DPDK callback for RX with scattered packets support.
 *
 * @param dpdk_rxq
 *   Generic pointer to RX queue structure.
 * @param[out] pkts
 *   Array to store received packets.
 * @param pkts_n
 *   Maximum number of packets in array.
 *
 * @return
 *   Number of packets successfully received (<= pkts_n).
 */
uint16_t
mlx5_rx_burst_sp(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
{
	struct rxq *rxq = (struct rxq *)dpdk_rxq;
	struct rxq_elt_sp (*elts)[rxq->elts_n] = rxq->elts.sp;
	const unsigned int elts_n = rxq->elts_n;
	unsigned int elts_head = rxq->elts_head;
	unsigned int i;
	unsigned int pkts_ret = 0;
	int ret;

	if (unlikely(!rxq->sp))
		return mlx5_rx_burst(dpdk_rxq, pkts, pkts_n);
	if (unlikely(elts == NULL)) /* See RTE_DEV_CMD_SET_MTU. */
		return 0;
	for (i = 0; (i != pkts_n); ++i) {
		struct rxq_elt_sp *elt = &(*elts)[elts_head];
		unsigned int len;
		unsigned int pkt_buf_len;
		struct rte_mbuf *pkt_buf = NULL; /* Buffer returned in pkts. */
		struct rte_mbuf **pkt_buf_next = &pkt_buf;
		unsigned int seg_headroom = RTE_PKTMBUF_HEADROOM;
		unsigned int j = 0;
		uint32_t flags;
		uint16_t vlan_tci;

		/* Sanity checks. */
		assert(elts_head < rxq->elts_n);
		assert(rxq->elts_head < rxq->elts_n);
		ret = rxq->poll(rxq->cq, NULL, NULL, &flags, &vlan_tci);
		if (unlikely(ret < 0)) {
			struct ibv_wc wc;
			int wcs_n;

			DEBUG("rxq=%p, poll_length() failed (ret=%d)",
			      (void *)rxq, ret);
			/* ibv_poll_cq() must be used in case of failure. */
			wcs_n = ibv_poll_cq(rxq->cq, 1, &wc);
			if (unlikely(wcs_n == 0))
				break;
			if (unlikely(wcs_n < 0)) {
				DEBUG("rxq=%p, ibv_poll_cq() failed (wcs_n=%d)",
				      (void *)rxq, wcs_n);
				break;
			}
			assert(wcs_n == 1);
			if (unlikely(wc.status != IBV_WC_SUCCESS)) {
				/* Whatever, just repost the offending WR. */
				DEBUG("rxq=%p, wr_id=%" PRIu64 ": bad work"
				      " completion status (%d): %s",
				      (void *)rxq, wc.wr_id, wc.status,
				      ibv_wc_status_str(wc.status));
#ifdef MLX5_PMD_SOFT_COUNTERS
				/* Increment dropped packets counter. */
				++rxq->stats.idropped;
#endif
				goto repost;
			}
			ret = wc.byte_len;
		}
		if (ret == 0)
			break;
		assert(ret >= (rxq->crc_present << 2));
		len = ret - (rxq->crc_present << 2);
		pkt_buf_len = len;
		/*
		 * Replace spent segments with new ones, concatenate and
		 * return them as pkt_buf.
		 */
		while (1) {
			struct ibv_sge *sge = &elt->sges[j];
			struct rte_mbuf *seg = elt->bufs[j];
			struct rte_mbuf *rep;
			unsigned int seg_tailroom;

			assert(seg != NULL);
			/*
			 * Fetch initial bytes of packet descriptor into a
			 * cacheline while allocating rep.
			 */
			rte_prefetch0(seg);
			rep = __rte_mbuf_raw_alloc(rxq->mp);
			if (unlikely(rep == NULL)) {
				/*
				 * Unable to allocate a replacement mbuf,
				 * repost WR.
				 */
				DEBUG("rxq=%p: can't allocate a new mbuf",
				      (void *)rxq);
				if (pkt_buf != NULL) {
					*pkt_buf_next = NULL;
					rte_pktmbuf_free(pkt_buf);
				}
				/* Increment out of memory counters. */
				++rxq->stats.rx_nombuf;
				++rxq->priv->dev->data->rx_mbuf_alloc_failed;
				goto repost;
			}
#ifndef NDEBUG
			/* Poison user-modifiable fields in rep. */
			NEXT(rep) = (void *)((uintptr_t)-1);
			SET_DATA_OFF(rep, 0xdead);
			DATA_LEN(rep) = 0xd00d;
			PKT_LEN(rep) = 0xdeadd00d;
			NB_SEGS(rep) = 0x2a;
			PORT(rep) = 0x2a;
			rep->ol_flags = -1;
#endif
			assert(rep->buf_len == seg->buf_len);
			assert(rep->buf_len == rxq->mb_len);
			/* Reconfigure sge to use rep instead of seg. */
			assert(sge->lkey == rxq->mr->lkey);
			sge->addr = ((uintptr_t)rep->buf_addr + seg_headroom);
			elt->bufs[j] = rep;
			++j;
			/* Update pkt_buf if it's the first segment, or link
			 * seg to the previous one and update pkt_buf_next. */
			*pkt_buf_next = seg;
			pkt_buf_next = &NEXT(seg);
			/* Update seg information. */
			seg_tailroom = (seg->buf_len - seg_headroom);
			assert(sge->length == seg_tailroom);
			SET_DATA_OFF(seg, seg_headroom);
			if (likely(len <= seg_tailroom)) {
				/* Last segment. */
				DATA_LEN(seg) = len;
				PKT_LEN(seg) = len;
				/* Sanity check. */
				assert(rte_pktmbuf_headroom(seg) ==
				       seg_headroom);
				assert(rte_pktmbuf_tailroom(seg) ==
				       (seg_tailroom - len));
				break;
			}
			DATA_LEN(seg) = seg_tailroom;
			PKT_LEN(seg) = seg_tailroom;
			/* Sanity check. */
			assert(rte_pktmbuf_headroom(seg) == seg_headroom);
			assert(rte_pktmbuf_tailroom(seg) == 0);
			/* Fix len and clear headroom for next segments. */
			len -= seg_tailroom;
			seg_headroom = 0;
		}
		/* Update head and tail segments. */
		*pkt_buf_next = NULL;
		assert(pkt_buf != NULL);
		assert(j != 0);
		NB_SEGS(pkt_buf) = j;
		PORT(pkt_buf) = rxq->port_id;
		PKT_LEN(pkt_buf) = pkt_buf_len;
		if (rxq->csum | rxq->csum_l2tun | rxq->vlan_strip) {
			pkt_buf->packet_type = rxq_cq_to_pkt_type(flags);
			pkt_buf->ol_flags = rxq_cq_to_ol_flags(rxq, flags);
#ifdef HAVE_EXP_DEVICE_ATTR_VLAN_OFFLOADS
			if (flags & IBV_EXP_CQ_RX_CVLAN_STRIPPED_V1) {
				pkt_buf->ol_flags |= PKT_RX_VLAN_PKT;
				pkt_buf->vlan_tci = vlan_tci;
			}
#endif /* HAVE_EXP_DEVICE_ATTR_VLAN_OFFLOADS */
		}

		/* Return packet. */
		*(pkts++) = pkt_buf;
		++pkts_ret;
#ifdef MLX5_PMD_SOFT_COUNTERS
		/* Increment bytes counter. */
		rxq->stats.ibytes += pkt_buf_len;
#endif
repost:
		ret = rxq->recv(rxq->wq, elt->sges, RTE_DIM(elt->sges));
		if (unlikely(ret)) {
			/* Inability to repost WRs is fatal. */
			DEBUG("%p: recv_sg_list(): failed (ret=%d)",
			      (void *)rxq->priv,
			      ret);
			abort();
		}
		if (++elts_head >= elts_n)
			elts_head = 0;
		continue;
	}
	if (unlikely(i == 0))
		return 0;
	rxq->elts_head = elts_head;
#ifdef MLX5_PMD_SOFT_COUNTERS
	/* Increment packets counter. */
	rxq->stats.ipackets += pkts_ret;
#endif
	return pkts_ret;
}
Пример #23
0
virtio_dev_tx(struct virtio_net* dev, struct rte_mempool *mbuf_pool)
{
	struct rte_mbuf m;
	struct vhost_virtqueue *vq;
	struct vring_desc *desc;
	uint64_t buff_addr = 0;
	uint32_t head[MAX_PKT_BURST];
	uint32_t used_idx;
	uint32_t i;
	uint16_t free_entries, packet_success = 0;
	uint16_t avail_idx;

	vq = dev->virtqueue_tx;
	avail_idx = *((volatile uint16_t *)&vq->avail->idx);

	/* If there are no available buffers then return. */
	if (vq->last_used_idx == avail_idx)
		return;

	LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_tx()\n", dev->device_fh);

	/* Prefetch available ring to retrieve head indexes. */
	rte_prefetch0(&vq->avail->ring[vq->last_used_idx & (vq->size - 1)]);

	/*get the number of free entries in the ring*/
	free_entries = avail_idx - vq->last_used_idx;
	free_entries = unlikely(free_entries < MAX_PKT_BURST) ? free_entries : MAX_PKT_BURST;

	LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Buffers available %d\n", dev->device_fh, free_entries);
	/* Retrieve all of the head indexes first to avoid caching issues. */
	for (i = 0; i < free_entries; i++)
		head[i] = vq->avail->ring[(vq->last_used_idx + i) & (vq->size - 1)];

	/* Prefetch descriptor index. */
	rte_prefetch0(&vq->desc[head[packet_success]]);

	while (packet_success < free_entries) {
		desc = &vq->desc[head[packet_success]];
		/* Prefetch descriptor address. */
		rte_prefetch0(desc);

		if (packet_success < (free_entries - 1)) {
			/* Prefetch descriptor index. */
			rte_prefetch0(&vq->desc[head[packet_success+1]]);
		}

		/* Update used index buffer information. */
		used_idx = vq->last_used_idx & (vq->size - 1);
		vq->used->ring[used_idx].id = head[packet_success];
		vq->used->ring[used_idx].len = 0;

		/* Discard first buffer as it is the virtio header */
		desc = &vq->desc[desc->next];

		/* Buffer address translation. */
		buff_addr = gpa_to_vva(dev, desc->addr);
		/* Prefetch buffer address. */
		rte_prefetch0((void*)(uintptr_t)buff_addr);

		/* Setup dummy mbuf. This is copied to a real mbuf if transmitted out the physical port. */
		m.pkt.data_len = desc->len;
		m.pkt.data = (void*)(uintptr_t)buff_addr;
		m.pkt.nb_segs = 1;

		virtio_tx_route(dev, &m, mbuf_pool, 0);

		vq->last_used_idx++;
		packet_success++;
	}

	rte_compiler_barrier();
	vq->used->idx += packet_success;
	/* Kick guest if required. */
}
Пример #24
0
 /*
 * Notice:
 * - nb_pkts < RTE_I40E_DESCS_PER_LOOP, just return no packet
 * - nb_pkts > RTE_I40E_VPMD_RX_BURST, only scan RTE_I40E_VPMD_RX_BURST
 *   numbers of DD bits
 */
static inline uint16_t
_recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts,
		   uint16_t nb_pkts, uint8_t *split_packet)
{
	volatile union i40e_rx_desc *rxdp;
	struct i40e_rx_entry *sw_ring;
	uint16_t nb_pkts_recd;
	int pos;
	uint64_t var;
	__m128i shuf_msk;
	uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;

	__m128i crc_adjust = _mm_set_epi16(
				0, 0, 0,    /* ignore non-length fields */
				-rxq->crc_len, /* sub crc on data_len */
				0,          /* ignore high-16bits of pkt_len */
				-rxq->crc_len, /* sub crc on pkt_len */
				0, 0            /* ignore pkt_type field */
			);
	/*
	 * compile-time check the above crc_adjust layout is correct.
	 * NOTE: the first field (lowest address) is given last in set_epi16
	 * call above.
	 */
	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, pkt_len) !=
			offsetof(struct rte_mbuf, rx_descriptor_fields1) + 4);
	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_len) !=
			offsetof(struct rte_mbuf, rx_descriptor_fields1) + 8);
	__m128i dd_check, eop_check;

	/* nb_pkts shall be less equal than RTE_I40E_MAX_RX_BURST */
	nb_pkts = RTE_MIN(nb_pkts, RTE_I40E_MAX_RX_BURST);

	/* nb_pkts has to be floor-aligned to RTE_I40E_DESCS_PER_LOOP */
	nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, RTE_I40E_DESCS_PER_LOOP);

	/* Just the act of getting into the function from the application is
	 * going to cost about 7 cycles
	 */
	rxdp = rxq->rx_ring + rxq->rx_tail;

	rte_prefetch0(rxdp);

	/* See if we need to rearm the RX queue - gives the prefetch a bit
	 * of time to act
	 */
	if (rxq->rxrearm_nb > RTE_I40E_RXQ_REARM_THRESH)
		i40e_rxq_rearm(rxq);

	/* Before we start moving massive data around, check to see if
	 * there is actually a packet available
	 */
	if (!(rxdp->wb.qword1.status_error_len &
			rte_cpu_to_le_32(1 << I40E_RX_DESC_STATUS_DD_SHIFT)))
		return 0;

	/* 4 packets DD mask */
	dd_check = _mm_set_epi64x(0x0000000100000001LL, 0x0000000100000001LL);

	/* 4 packets EOP mask */
	eop_check = _mm_set_epi64x(0x0000000200000002LL, 0x0000000200000002LL);

	/* mask to shuffle from desc. to mbuf */
	shuf_msk = _mm_set_epi8(
		7, 6, 5, 4,  /* octet 4~7, 32bits rss */
		3, 2,        /* octet 2~3, low 16 bits vlan_macip */
		15, 14,      /* octet 15~14, 16 bits data_len */
		0xFF, 0xFF,  /* skip high 16 bits pkt_len, zero out */
		15, 14,      /* octet 15~14, low 16 bits pkt_len */
		0xFF, 0xFF,  /* pkt_type set as unknown */
		0xFF, 0xFF  /*pkt_type set as unknown */
		);
	/*
	 * Compile-time verify the shuffle mask
	 * NOTE: some field positions already verified above, but duplicated
	 * here for completeness in case of future modifications.
	 */
	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, pkt_len) !=
			offsetof(struct rte_mbuf, rx_descriptor_fields1) + 4);
	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_len) !=
			offsetof(struct rte_mbuf, rx_descriptor_fields1) + 8);
	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, vlan_tci) !=
			offsetof(struct rte_mbuf, rx_descriptor_fields1) + 10);
	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, hash) !=
			offsetof(struct rte_mbuf, rx_descriptor_fields1) + 12);

	/* Cache is empty -> need to scan the buffer rings, but first move
	 * the next 'n' mbufs into the cache
	 */
	sw_ring = &rxq->sw_ring[rxq->rx_tail];

	/* A. load 4 packet in one loop
	 * [A*. mask out 4 unused dirty field in desc]
	 * B. copy 4 mbuf point from swring to rx_pkts
	 * C. calc the number of DD bits among the 4 packets
	 * [C*. extract the end-of-packet bit, if requested]
	 * D. fill info. from desc to mbuf
	 */

	for (pos = 0, nb_pkts_recd = 0; pos < nb_pkts;
			pos += RTE_I40E_DESCS_PER_LOOP,
			rxdp += RTE_I40E_DESCS_PER_LOOP) {
		__m128i descs[RTE_I40E_DESCS_PER_LOOP];
		__m128i pkt_mb1, pkt_mb2, pkt_mb3, pkt_mb4;
		__m128i zero, staterr, sterr_tmp1, sterr_tmp2;
		/* 2 64 bit or 4 32 bit mbuf pointers in one XMM reg. */
		__m128i mbp1;
#if defined(RTE_ARCH_X86_64)
		__m128i mbp2;
#endif

		/* B.1 load 2 (64 bit) or 4 (32 bit) mbuf points */
		mbp1 = _mm_loadu_si128((__m128i *)&sw_ring[pos]);
		/* Read desc statuses backwards to avoid race condition */
		/* A.1 load 4 pkts desc */
		descs[3] = _mm_loadu_si128((__m128i *)(rxdp + 3));
		rte_compiler_barrier();

		/* B.2 copy 2 64 bit or 4 32 bit mbuf point into rx_pkts */
		_mm_storeu_si128((__m128i *)&rx_pkts[pos], mbp1);

#if defined(RTE_ARCH_X86_64)
		/* B.1 load 2 64 bit mbuf points */
		mbp2 = _mm_loadu_si128((__m128i *)&sw_ring[pos+2]);
#endif

		descs[2] = _mm_loadu_si128((__m128i *)(rxdp + 2));
		rte_compiler_barrier();
		/* B.1 load 2 mbuf point */
		descs[1] = _mm_loadu_si128((__m128i *)(rxdp + 1));
		rte_compiler_barrier();
		descs[0] = _mm_loadu_si128((__m128i *)(rxdp));

#if defined(RTE_ARCH_X86_64)
		/* B.2 copy 2 mbuf point into rx_pkts  */
		_mm_storeu_si128((__m128i *)&rx_pkts[pos+2], mbp2);
#endif

		if (split_packet) {
			rte_mbuf_prefetch_part2(rx_pkts[pos]);
			rte_mbuf_prefetch_part2(rx_pkts[pos + 1]);
			rte_mbuf_prefetch_part2(rx_pkts[pos + 2]);
			rte_mbuf_prefetch_part2(rx_pkts[pos + 3]);
		}

		/* avoid compiler reorder optimization */
		rte_compiler_barrier();

		/* pkt 3,4 shift the pktlen field to be 16-bit aligned*/
		const __m128i len3 = _mm_slli_epi32(descs[3], PKTLEN_SHIFT);
		const __m128i len2 = _mm_slli_epi32(descs[2], PKTLEN_SHIFT);

		/* merge the now-aligned packet length fields back in */
		descs[3] = _mm_blend_epi16(descs[3], len3, 0x80);
		descs[2] = _mm_blend_epi16(descs[2], len2, 0x80);

		/* D.1 pkt 3,4 convert format from desc to pktmbuf */
		pkt_mb4 = _mm_shuffle_epi8(descs[3], shuf_msk);
		pkt_mb3 = _mm_shuffle_epi8(descs[2], shuf_msk);

		/* C.1 4=>2 filter staterr info only */
		sterr_tmp2 = _mm_unpackhi_epi32(descs[3], descs[2]);
		/* C.1 4=>2 filter staterr info only */
		sterr_tmp1 = _mm_unpackhi_epi32(descs[1], descs[0]);

		desc_to_olflags_v(rxq, descs, &rx_pkts[pos]);

		/* D.2 pkt 3,4 set in_port/nb_seg and remove crc */
		pkt_mb4 = _mm_add_epi16(pkt_mb4, crc_adjust);
		pkt_mb3 = _mm_add_epi16(pkt_mb3, crc_adjust);

		/* pkt 1,2 shift the pktlen field to be 16-bit aligned*/
		const __m128i len1 = _mm_slli_epi32(descs[1], PKTLEN_SHIFT);
		const __m128i len0 = _mm_slli_epi32(descs[0], PKTLEN_SHIFT);

		/* merge the now-aligned packet length fields back in */
		descs[1] = _mm_blend_epi16(descs[1], len1, 0x80);
		descs[0] = _mm_blend_epi16(descs[0], len0, 0x80);

		/* D.1 pkt 1,2 convert format from desc to pktmbuf */
		pkt_mb2 = _mm_shuffle_epi8(descs[1], shuf_msk);
		pkt_mb1 = _mm_shuffle_epi8(descs[0], shuf_msk);

		/* C.2 get 4 pkts staterr value  */
		zero = _mm_xor_si128(dd_check, dd_check);
		staterr = _mm_unpacklo_epi32(sterr_tmp1, sterr_tmp2);

		/* D.3 copy final 3,4 data to rx_pkts */
		_mm_storeu_si128((void *)&rx_pkts[pos+3]->rx_descriptor_fields1,
				 pkt_mb4);
		_mm_storeu_si128((void *)&rx_pkts[pos+2]->rx_descriptor_fields1,
				 pkt_mb3);

		/* D.2 pkt 1,2 set in_port/nb_seg and remove crc */
		pkt_mb2 = _mm_add_epi16(pkt_mb2, crc_adjust);
		pkt_mb1 = _mm_add_epi16(pkt_mb1, crc_adjust);

		/* C* extract and record EOP bit */
		if (split_packet) {
			__m128i eop_shuf_mask = _mm_set_epi8(
					0xFF, 0xFF, 0xFF, 0xFF,
					0xFF, 0xFF, 0xFF, 0xFF,
					0xFF, 0xFF, 0xFF, 0xFF,
					0x04, 0x0C, 0x00, 0x08
					);

			/* and with mask to extract bits, flipping 1-0 */
			__m128i eop_bits = _mm_andnot_si128(staterr, eop_check);
			/* the staterr values are not in order, as the count
			 * count of dd bits doesn't care. However, for end of
			 * packet tracking, we do care, so shuffle. This also
			 * compresses the 32-bit values to 8-bit
			 */
			eop_bits = _mm_shuffle_epi8(eop_bits, eop_shuf_mask);
			/* store the resulting 32-bit value */
			*(int *)split_packet = _mm_cvtsi128_si32(eop_bits);
			split_packet += RTE_I40E_DESCS_PER_LOOP;
		}

		/* C.3 calc available number of desc */
		staterr = _mm_and_si128(staterr, dd_check);
		staterr = _mm_packs_epi32(staterr, zero);

		/* D.3 copy final 1,2 data to rx_pkts */
		_mm_storeu_si128((void *)&rx_pkts[pos+1]->rx_descriptor_fields1,
				 pkt_mb2);
		_mm_storeu_si128((void *)&rx_pkts[pos]->rx_descriptor_fields1,
				 pkt_mb1);
		desc_to_ptype_v(descs, &rx_pkts[pos], ptype_tbl);
		/* C.4 calc avaialbe number of desc */
		var = __builtin_popcountll(_mm_cvtsi128_si64(staterr));
		nb_pkts_recd += var;
		if (likely(var != RTE_I40E_DESCS_PER_LOOP))
			break;
	}

	/* Update our internal tail pointer */
	rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_pkts_recd);
	rxq->rx_tail = (uint16_t)(rxq->rx_tail & (rxq->nb_rx_desc - 1));
	rxq->rxrearm_nb = (uint16_t)(rxq->rxrearm_nb + nb_pkts_recd);

	return nb_pkts_recd;
}