/* free mbufs from death row */ int #else void #endif rte_ip_frag_free_death_row(struct rte_ip_frag_death_row *dr, uint32_t prefetch) { uint32_t i, k, n; k = RTE_MIN(prefetch, dr->cnt); n = dr->cnt; for (i = 0; i != k; i++) rte_prefetch0(dr->row[i]); for (i = 0; i != n - k; i++) { rte_prefetch0(dr->row[i + k]); rte_pktmbuf_free(dr->row[i]); } for (; i != n; i++) rte_pktmbuf_free(dr->row[i]); dr->cnt = 0; #ifdef RTE_LIBRW_PIOT return n; #endif }
static uint16_t virtqueue_dequeue_burst_rx(struct virtqueue *vq, struct rte_mbuf **rx_pkts, uint32_t *len, uint16_t num) { struct vring_used_elem *uep; struct rte_mbuf *cookie; uint16_t used_idx, desc_idx; uint16_t i; /* Caller does the check */ for (i = 0; i < num ; i++) { used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1)); uep = &vq->vq_ring.used->ring[used_idx]; desc_idx = (uint16_t) uep->id; len[i] = uep->len; cookie = (struct rte_mbuf *)vq->vq_descx[desc_idx].cookie; if (unlikely(cookie == NULL)) { PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u\n", vq->vq_used_cons_idx); break; } rte_prefetch0(cookie); rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *)); rx_pkts[i] = cookie; vq->vq_used_cons_idx++; vq_ring_free_chain(vq, desc_idx); vq->vq_descx[desc_idx].cookie = NULL; } return i; }
int test_prefetch(void) { int a; rte_prefetch0(&a); rte_prefetch1(&a); rte_prefetch2(&a); return 0; }
static int perf_atq_worker_burst(void *arg, const int enable_fwd_latency) { PERF_WORKER_INIT; uint16_t i; /* +1 to avoid prefetch out of array check */ struct rte_event ev[BURST_SIZE + 1]; while (t->done == false) { uint16_t const nb_rx = rte_event_dequeue_burst(dev, port, ev, BURST_SIZE, 0); if (!nb_rx) { rte_pause(); continue; } for (i = 0; i < nb_rx; i++) { if (enable_fwd_latency && !prod_timer_type) { rte_prefetch0(ev[i+1].event_ptr); /* first stage in pipeline. * mark time stamp to compute fwd latency */ atq_mark_fwd_latency(&ev[i]); } /* last stage in pipeline */ if (unlikely((ev[i].sub_event_type % nb_stages) == laststage)) { if (enable_fwd_latency) cnt = perf_process_last_stage_latency( pool, &ev[i], w, bufs, sz, cnt); else cnt = perf_process_last_stage(pool, &ev[i], w, bufs, sz, cnt); ev[i].op = RTE_EVENT_OP_RELEASE; } else { atq_fwd_event(&ev[i], sched_type_list, nb_stages); } } uint16_t enq; enq = rte_event_enqueue_burst(dev, port, ev, nb_rx); while (enq < nb_rx) { enq += rte_event_enqueue_burst(dev, port, ev + enq, nb_rx - enq); } } return 0; }
/* free mbufs from death row */ void rte_ip_frag_free_death_row(struct rte_ip_frag_death_row *dr, uint32_t prefetch) { uint32_t i, k, n; k = RTE_MIN(prefetch, dr->cnt); n = dr->cnt; for (i = 0; i != k; i++) rte_prefetch0(dr->row[i]); for (i = 0; i != n - k; i++) { rte_prefetch0(dr->row[i + k]); rte_pktmbuf_free(dr->row[i]); } for (; i != n; i++) rte_pktmbuf_free(dr->row[i]); dr->cnt = 0; }
uint16_t bnxt_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) { struct bnxt_rx_queue *rxq = rx_queue; struct bnxt_cp_ring_info *cpr = rxq->cp_ring; struct bnxt_rx_ring_info *rxr = rxq->rx_ring; uint32_t raw_cons = cpr->cp_raw_cons; uint32_t cons; int nb_rx_pkts = 0; bool rx_event = false; struct rx_pkt_cmpl *rxcmp; /* Handle RX burst request */ while (1) { int rc; cons = RING_CMP(cpr->cp_ring_struct, raw_cons); rte_prefetch0(&cpr->cp_desc_ring[cons]); rxcmp = (struct rx_pkt_cmpl *)&cpr->cp_desc_ring[cons]; if (!CMP_VALID(rxcmp, raw_cons, cpr->cp_ring_struct)) break; /* TODO: Avoid magic numbers... */ if ((CMP_TYPE(rxcmp) & 0x30) == 0x10) { rc = bnxt_rx_pkt(&rx_pkts[nb_rx_pkts], rxq, &raw_cons); if (likely(!rc)) nb_rx_pkts++; else if (rc == -EBUSY) /* partial completion */ break; rx_event = true; } raw_cons = NEXT_RAW_CMP(raw_cons); if (nb_rx_pkts == nb_pkts) break; } if (raw_cons == cpr->cp_raw_cons) { /* * For PMD, there is no need to keep on pushing to REARM * the doorbell if there are no new completions */ return nb_rx_pkts; } cpr->cp_raw_cons = raw_cons; B_CP_DIS_DB(cpr, cpr->cp_raw_cons); if (rx_event) B_RX_DB(rxr->rx_doorbell, rxr->rx_prod); return nb_rx_pkts; }
static int get_tx_q(struct port *p, queue_t qid, snb_array_t pkts, int max_cnt) { struct vport_priv *priv = get_port_priv(p); struct queue *tx_queue = &priv->inc_qs[qid]; void *objs[max_cnt]; int cnt; int i; cnt = llring_dequeue_burst(tx_queue->drv_to_sn, objs, max_cnt); if (cnt == 0) return 0; for (i = 0; i < cnt; i++) { pkts[i] = (struct snbuf *) objs[i]; rte_prefetch0(snb_head_data(pkts[i])); } refill_tx_bufs(tx_queue->sn_to_drv, max_cnt); for (i = 0; i < cnt; i++) { struct sn_tx_metadata *tx_meta; int legit_size; tx_meta = (struct sn_tx_metadata *)snb_head_data(pkts[i]); #if OLD_METADATA pkts[i]->in_port = vport->port.port_id; pkts[i]->in_queue = txq; /* TODO: sanity check for the metadata */ pkts[i]->tx.csum_start = tx_meta->csum_start; pkts[i]->tx.csum_dest = tx_meta->csum_dest; #endif legit_size = (snb_append(pkts[i], sizeof(struct sn_tx_metadata) + tx_meta->length) != NULL); assert(legit_size); snb_adj(pkts[i], sizeof(struct sn_tx_metadata)); } return cnt; }
/* * vPMD raw receive routine, only accept(nb_pkts >= RTE_IXGBE_DESCS_PER_LOOP) * * Notice: * - nb_pkts < RTE_IXGBE_DESCS_PER_LOOP, just return no packet * - nb_pkts > RTE_IXGBE_MAX_RX_BURST, only scan RTE_IXGBE_MAX_RX_BURST * numbers of DD bit * - floor align nb_pkts to a RTE_IXGBE_DESC_PER_LOOP power-of-two */ static inline uint16_t _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts, uint8_t *split_packet) { volatile union ixgbe_adv_rx_desc *rxdp; struct ixgbe_rx_entry *sw_ring; uint16_t nb_pkts_recd; int pos; uint64_t var; __m128i shuf_msk; __m128i crc_adjust = _mm_set_epi16( 0, 0, 0, /* ignore non-length fields */ -rxq->crc_len, /* sub crc on data_len */ 0, /* ignore high-16bits of pkt_len */ -rxq->crc_len, /* sub crc on pkt_len */ 0, 0 /* ignore pkt_type field */ ); __m128i dd_check, eop_check; uint8_t vlan_flags; /* nb_pkts shall be less equal than RTE_IXGBE_MAX_RX_BURST */ nb_pkts = RTE_MIN(nb_pkts, RTE_IXGBE_MAX_RX_BURST); /* nb_pkts has to be floor-aligned to RTE_IXGBE_DESCS_PER_LOOP */ nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, RTE_IXGBE_DESCS_PER_LOOP); /* Just the act of getting into the function from the application is * going to cost about 7 cycles */ rxdp = rxq->rx_ring + rxq->rx_tail; rte_prefetch0(rxdp); /* See if we need to rearm the RX queue - gives the prefetch a bit * of time to act */ if (rxq->rxrearm_nb > RTE_IXGBE_RXQ_REARM_THRESH) ixgbe_rxq_rearm(rxq); /* Before we start moving massive data around, check to see if * there is actually a packet available */ if (!(rxdp->wb.upper.status_error & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))) return 0; /* 4 packets DD mask */ dd_check = _mm_set_epi64x(0x0000000100000001LL, 0x0000000100000001LL); /* 4 packets EOP mask */ eop_check = _mm_set_epi64x(0x0000000200000002LL, 0x0000000200000002LL); /* mask to shuffle from desc. to mbuf */ shuf_msk = _mm_set_epi8( 7, 6, 5, 4, /* octet 4~7, 32bits rss */ 15, 14, /* octet 14~15, low 16 bits vlan_macip */ 13, 12, /* octet 12~13, 16 bits data_len */ 0xFF, 0xFF, /* skip high 16 bits pkt_len, zero out */ 13, 12, /* octet 12~13, low 16 bits pkt_len */ 0xFF, 0xFF, /* skip 32 bit pkt_type */ 0xFF, 0xFF ); /* Cache is empty -> need to scan the buffer rings, but first move * the next 'n' mbufs into the cache */ sw_ring = &rxq->sw_ring[rxq->rx_tail]; /* ensure these 2 flags are in the lower 8 bits */ RTE_BUILD_BUG_ON((PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED) > UINT8_MAX); vlan_flags = rxq->vlan_flags & UINT8_MAX; /* A. load 4 packet in one loop * [A*. mask out 4 unused dirty field in desc] * B. copy 4 mbuf point from swring to rx_pkts * C. calc the number of DD bits among the 4 packets * [C*. extract the end-of-packet bit, if requested] * D. fill info. from desc to mbuf */ for (pos = 0, nb_pkts_recd = 0; pos < nb_pkts; pos += RTE_IXGBE_DESCS_PER_LOOP, rxdp += RTE_IXGBE_DESCS_PER_LOOP) { __m128i descs[RTE_IXGBE_DESCS_PER_LOOP]; __m128i pkt_mb1, pkt_mb2, pkt_mb3, pkt_mb4; __m128i zero, staterr, sterr_tmp1, sterr_tmp2; __m128i mbp1, mbp2; /* two mbuf pointer in one XMM reg. */ /* B.1 load 1 mbuf point */ mbp1 = _mm_loadu_si128((__m128i *)&sw_ring[pos]); /* Read desc statuses backwards to avoid race condition */ /* A.1 load 4 pkts desc */ descs[3] = _mm_loadu_si128((__m128i *)(rxdp + 3)); rte_compiler_barrier(); /* B.2 copy 2 mbuf point into rx_pkts */ _mm_storeu_si128((__m128i *)&rx_pkts[pos], mbp1); /* B.1 load 1 mbuf point */ mbp2 = _mm_loadu_si128((__m128i *)&sw_ring[pos+2]); descs[2] = _mm_loadu_si128((__m128i *)(rxdp + 2)); rte_compiler_barrier(); /* B.1 load 2 mbuf point */ descs[1] = _mm_loadu_si128((__m128i *)(rxdp + 1)); rte_compiler_barrier(); descs[0] = _mm_loadu_si128((__m128i *)(rxdp)); /* B.2 copy 2 mbuf point into rx_pkts */ _mm_storeu_si128((__m128i *)&rx_pkts[pos+2], mbp2); if (split_packet) { rte_mbuf_prefetch_part2(rx_pkts[pos]); rte_mbuf_prefetch_part2(rx_pkts[pos + 1]); rte_mbuf_prefetch_part2(rx_pkts[pos + 2]); rte_mbuf_prefetch_part2(rx_pkts[pos + 3]); } /* avoid compiler reorder optimization */ rte_compiler_barrier(); /* D.1 pkt 3,4 convert format from desc to pktmbuf */ pkt_mb4 = _mm_shuffle_epi8(descs[3], shuf_msk); pkt_mb3 = _mm_shuffle_epi8(descs[2], shuf_msk); /* D.1 pkt 1,2 convert format from desc to pktmbuf */ pkt_mb2 = _mm_shuffle_epi8(descs[1], shuf_msk); pkt_mb1 = _mm_shuffle_epi8(descs[0], shuf_msk); /* C.1 4=>2 filter staterr info only */ sterr_tmp2 = _mm_unpackhi_epi32(descs[3], descs[2]); /* C.1 4=>2 filter staterr info only */ sterr_tmp1 = _mm_unpackhi_epi32(descs[1], descs[0]); /* set ol_flags with vlan packet type */ desc_to_olflags_v(descs, vlan_flags, &rx_pkts[pos]); /* D.2 pkt 3,4 set in_port/nb_seg and remove crc */ pkt_mb4 = _mm_add_epi16(pkt_mb4, crc_adjust); pkt_mb3 = _mm_add_epi16(pkt_mb3, crc_adjust); /* C.2 get 4 pkts staterr value */ zero = _mm_xor_si128(dd_check, dd_check); staterr = _mm_unpacklo_epi32(sterr_tmp1, sterr_tmp2); /* D.3 copy final 3,4 data to rx_pkts */ _mm_storeu_si128((void *)&rx_pkts[pos+3]->rx_descriptor_fields1, pkt_mb4); _mm_storeu_si128((void *)&rx_pkts[pos+2]->rx_descriptor_fields1, pkt_mb3); /* D.2 pkt 1,2 set in_port/nb_seg and remove crc */ pkt_mb2 = _mm_add_epi16(pkt_mb2, crc_adjust); pkt_mb1 = _mm_add_epi16(pkt_mb1, crc_adjust); /* C* extract and record EOP bit */ if (split_packet) { __m128i eop_shuf_mask = _mm_set_epi8( 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x04, 0x0C, 0x00, 0x08 ); /* and with mask to extract bits, flipping 1-0 */ __m128i eop_bits = _mm_andnot_si128(staterr, eop_check); /* the staterr values are not in order, as the count * count of dd bits doesn't care. However, for end of * packet tracking, we do care, so shuffle. This also * compresses the 32-bit values to 8-bit */ eop_bits = _mm_shuffle_epi8(eop_bits, eop_shuf_mask); /* store the resulting 32-bit value */ *(int *)split_packet = _mm_cvtsi128_si32(eop_bits); split_packet += RTE_IXGBE_DESCS_PER_LOOP; /* zero-out next pointers */ rx_pkts[pos]->next = NULL; rx_pkts[pos + 1]->next = NULL; rx_pkts[pos + 2]->next = NULL; rx_pkts[pos + 3]->next = NULL; } /* C.3 calc available number of desc */ staterr = _mm_and_si128(staterr, dd_check); staterr = _mm_packs_epi32(staterr, zero); /* D.3 copy final 1,2 data to rx_pkts */ _mm_storeu_si128((void *)&rx_pkts[pos+1]->rx_descriptor_fields1, pkt_mb2); _mm_storeu_si128((void *)&rx_pkts[pos]->rx_descriptor_fields1, pkt_mb1); /* C.4 calc avaialbe number of desc */ var = __builtin_popcountll(_mm_cvtsi128_si64(staterr)); nb_pkts_recd += var; if (likely(var != RTE_IXGBE_DESCS_PER_LOOP)) break; } /* Update our internal tail pointer */ rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_pkts_recd); rxq->rx_tail = (uint16_t)(rxq->rx_tail & (rxq->nb_rx_desc - 1)); rxq->rxrearm_nb = (uint16_t)(rxq->rxrearm_nb + nb_pkts_recd); return nb_pkts_recd; }
uint16_t rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id, struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count) { struct rte_mbuf *m, *prev; struct vhost_virtqueue *vq; struct vring_desc *desc; uint64_t vb_addr = 0; uint32_t head[MAX_PKT_BURST]; uint32_t used_idx; uint32_t i; uint16_t free_entries, entry_success = 0; uint16_t avail_idx; if (unlikely(queue_id != VIRTIO_TXQ)) { LOG_DEBUG(VHOST_DATA, "mq isn't supported in this version.\n"); return 0; } vq = dev->virtqueue[VIRTIO_TXQ]; avail_idx = *((volatile uint16_t *)&vq->avail->idx); /* If there are no available buffers then return. */ if (vq->last_used_idx == avail_idx) return 0; LOG_DEBUG(VHOST_DATA, "%s (%"PRIu64")\n", __func__, dev->device_fh); /* Prefetch available ring to retrieve head indexes. */ rte_prefetch0(&vq->avail->ring[vq->last_used_idx & (vq->size - 1)]); /*get the number of free entries in the ring*/ free_entries = (avail_idx - vq->last_used_idx); free_entries = RTE_MIN(free_entries, count); /* Limit to MAX_PKT_BURST. */ free_entries = RTE_MIN(free_entries, MAX_PKT_BURST); LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Buffers available %d\n", dev->device_fh, free_entries); /* Retrieve all of the head indexes first to avoid caching issues. */ for (i = 0; i < free_entries; i++) head[i] = vq->avail->ring[(vq->last_used_idx + i) & (vq->size - 1)]; /* Prefetch descriptor index. */ rte_prefetch0(&vq->desc[head[entry_success]]); rte_prefetch0(&vq->used->ring[vq->last_used_idx & (vq->size - 1)]); while (entry_success < free_entries) { uint32_t vb_avail, vb_offset; uint32_t seg_avail, seg_offset; uint32_t cpy_len; uint32_t seg_num = 0; struct rte_mbuf *cur; uint8_t alloc_err = 0; desc = &vq->desc[head[entry_success]]; /* Discard first buffer as it is the virtio header */ if (desc->flags & VRING_DESC_F_NEXT) { desc = &vq->desc[desc->next]; vb_offset = 0; vb_avail = desc->len; } else { vb_offset = vq->vhost_hlen; vb_avail = desc->len - vb_offset; } /* Buffer address translation. */ vb_addr = gpa_to_vva(dev, desc->addr); /* Prefetch buffer address. */ rte_prefetch0((void *)(uintptr_t)vb_addr); used_idx = vq->last_used_idx & (vq->size - 1); if (entry_success < (free_entries - 1)) { /* Prefetch descriptor index. */ rte_prefetch0(&vq->desc[head[entry_success+1]]); rte_prefetch0(&vq->used->ring[(used_idx + 1) & (vq->size - 1)]); } /* Update used index buffer information. */ vq->used->ring[used_idx].id = head[entry_success]; vq->used->ring[used_idx].len = 0; /* Allocate an mbuf and populate the structure. */ m = rte_pktmbuf_alloc(mbuf_pool); if (unlikely(m == NULL)) { RTE_LOG(ERR, VHOST_DATA, "Failed to allocate memory for mbuf.\n"); break; } seg_offset = 0; seg_avail = m->buf_len - RTE_PKTMBUF_HEADROOM; cpy_len = RTE_MIN(vb_avail, seg_avail); PRINT_PACKET(dev, (uintptr_t)vb_addr, desc->len, 0); seg_num++; cur = m; prev = m; while (cpy_len != 0) { rte_memcpy((void *)(rte_pktmbuf_mtod(cur, char *) + seg_offset), (void *)((uintptr_t)(vb_addr + vb_offset)), cpy_len); seg_offset += cpy_len; vb_offset += cpy_len; vb_avail -= cpy_len; seg_avail -= cpy_len; if (vb_avail != 0) { /* * The segment reachs to its end, * while the virtio buffer in TX vring has * more data to be copied. */ cur->data_len = seg_offset; m->pkt_len += seg_offset; /* Allocate mbuf and populate the structure. */ cur = rte_pktmbuf_alloc(mbuf_pool); if (unlikely(cur == NULL)) { RTE_LOG(ERR, VHOST_DATA, "Failed to " "allocate memory for mbuf.\n"); rte_pktmbuf_free(m); alloc_err = 1; break; } seg_num++; prev->next = cur; prev = cur; seg_offset = 0; seg_avail = cur->buf_len - RTE_PKTMBUF_HEADROOM; } else { if (desc->flags & VRING_DESC_F_NEXT) { /* * There are more virtio buffers in * same vring entry need to be copied. */ if (seg_avail == 0) { /* * The current segment hasn't * room to accomodate more * data. */ cur->data_len = seg_offset; m->pkt_len += seg_offset; /* * Allocate an mbuf and * populate the structure. */ cur = rte_pktmbuf_alloc(mbuf_pool); if (unlikely(cur == NULL)) { RTE_LOG(ERR, VHOST_DATA, "Failed to " "allocate memory " "for mbuf\n"); rte_pktmbuf_free(m); alloc_err = 1; break; } seg_num++; prev->next = cur; prev = cur; seg_offset = 0; seg_avail = cur->buf_len - RTE_PKTMBUF_HEADROOM; } desc = &vq->desc[desc->next]; /* Buffer address translation. */ vb_addr = gpa_to_vva(dev, desc->addr); /* Prefetch buffer address. */ rte_prefetch0((void *)(uintptr_t)vb_addr); vb_offset = 0; vb_avail = desc->len; PRINT_PACKET(dev, (uintptr_t)vb_addr, desc->len, 0); } else { /* The whole packet completes. */ cur->data_len = seg_offset; m->pkt_len += seg_offset; vb_avail = 0; } } cpy_len = RTE_MIN(vb_avail, seg_avail); } if (unlikely(alloc_err == 1)) break; m->nb_segs = seg_num; pkts[entry_success] = m; vq->last_used_idx++; entry_success++; } rte_compiler_barrier(); vq->used->idx += entry_success; /* Kick guest if required. */ if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) eventfd_write((int)vq->callfd, 1); return entry_success; }
virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id, struct rte_mbuf **pkts, uint32_t count) { struct vhost_virtqueue *vq; struct vring_desc *desc; struct rte_mbuf *buff; /* The virtio_hdr is initialised to 0. */ struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0}; uint64_t buff_addr = 0; uint64_t buff_hdr_addr = 0; uint32_t head[MAX_PKT_BURST]; uint32_t head_idx, packet_success = 0; uint16_t avail_idx, res_cur_idx; uint16_t res_base_idx, res_end_idx; uint16_t free_entries; uint8_t success = 0; LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_rx()\n", dev->device_fh); if (unlikely(queue_id != VIRTIO_RXQ)) { LOG_DEBUG(VHOST_DATA, "mq isn't supported in this version.\n"); return 0; } vq = dev->virtqueue[VIRTIO_RXQ]; count = (count > MAX_PKT_BURST) ? MAX_PKT_BURST : count; /* * As many data cores may want access to available buffers, * they need to be reserved. */ do { res_base_idx = vq->last_used_idx_res; avail_idx = *((volatile uint16_t *)&vq->avail->idx); free_entries = (avail_idx - res_base_idx); /*check that we have enough buffers*/ if (unlikely(count > free_entries)) count = free_entries; if (count == 0) return 0; res_end_idx = res_base_idx + count; /* vq->last_used_idx_res is atomically updated. */ /* TODO: Allow to disable cmpset if no concurrency in application. */ success = rte_atomic16_cmpset(&vq->last_used_idx_res, res_base_idx, res_end_idx); } while (unlikely(success == 0)); res_cur_idx = res_base_idx; LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Current Index %d| End Index %d\n", dev->device_fh, res_cur_idx, res_end_idx); /* Prefetch available ring to retrieve indexes. */ rte_prefetch0(&vq->avail->ring[res_cur_idx & (vq->size - 1)]); /* Retrieve all of the head indexes first to avoid caching issues. */ for (head_idx = 0; head_idx < count; head_idx++) head[head_idx] = vq->avail->ring[(res_cur_idx + head_idx) & (vq->size - 1)]; /*Prefetch descriptor index. */ rte_prefetch0(&vq->desc[head[packet_success]]); while (res_cur_idx != res_end_idx) { uint32_t offset = 0, vb_offset = 0; uint32_t pkt_len, len_to_cpy, data_len, total_copied = 0; uint8_t hdr = 0, uncompleted_pkt = 0; /* Get descriptor from available ring */ desc = &vq->desc[head[packet_success]]; buff = pkts[packet_success]; /* Convert from gpa to vva (guest physical addr -> vhost virtual addr) */ buff_addr = gpa_to_vva(dev, desc->addr); /* Prefetch buffer address. */ rte_prefetch0((void *)(uintptr_t)buff_addr); /* Copy virtio_hdr to packet and increment buffer address */ buff_hdr_addr = buff_addr; /* * If the descriptors are chained the header and data are * placed in separate buffers. */ if ((desc->flags & VRING_DESC_F_NEXT) && (desc->len == vq->vhost_hlen)) { desc = &vq->desc[desc->next]; /* Buffer address translation. */ buff_addr = gpa_to_vva(dev, desc->addr); } else { vb_offset += vq->vhost_hlen; hdr = 1; } pkt_len = rte_pktmbuf_pkt_len(buff); data_len = rte_pktmbuf_data_len(buff); len_to_cpy = RTE_MIN(data_len, hdr ? desc->len - vq->vhost_hlen : desc->len); while (total_copied < pkt_len) { /* Copy mbuf data to buffer */ rte_memcpy((void *)(uintptr_t)(buff_addr + vb_offset), (const void *)(rte_pktmbuf_mtod(buff, const char *) + offset), len_to_cpy); PRINT_PACKET(dev, (uintptr_t)(buff_addr + vb_offset), len_to_cpy, 0); offset += len_to_cpy; vb_offset += len_to_cpy; total_copied += len_to_cpy; /* The whole packet completes */ if (total_copied == pkt_len) break; /* The current segment completes */ if (offset == data_len) { buff = buff->next; offset = 0; data_len = rte_pktmbuf_data_len(buff); } /* The current vring descriptor done */ if (vb_offset == desc->len) { if (desc->flags & VRING_DESC_F_NEXT) { desc = &vq->desc[desc->next]; buff_addr = gpa_to_vva(dev, desc->addr); vb_offset = 0; } else { /* Room in vring buffer is not enough */ uncompleted_pkt = 1; break; } } len_to_cpy = RTE_MIN(data_len - offset, desc->len - vb_offset); }; /* Update used ring with desc information */ vq->used->ring[res_cur_idx & (vq->size - 1)].id = head[packet_success]; /* Drop the packet if it is uncompleted */ if (unlikely(uncompleted_pkt == 1)) vq->used->ring[res_cur_idx & (vq->size - 1)].len = vq->vhost_hlen; else vq->used->ring[res_cur_idx & (vq->size - 1)].len = pkt_len + vq->vhost_hlen; res_cur_idx++; packet_success++; if (unlikely(uncompleted_pkt == 1)) continue; rte_memcpy((void *)(uintptr_t)buff_hdr_addr, (const void *)&virtio_hdr, vq->vhost_hlen); PRINT_PACKET(dev, (uintptr_t)buff_hdr_addr, vq->vhost_hlen, 1); if (res_cur_idx < res_end_idx) { /* Prefetch descriptor index. */ rte_prefetch0(&vq->desc[head[packet_success]]); } } rte_compiler_barrier(); /* Wait until it's our turn to add our buffer to the used ring. */ while (unlikely(vq->last_used_idx != res_base_idx)) rte_pause(); *(volatile uint16_t *)&vq->used->idx += count; vq->last_used_idx = res_end_idx; /* flush used->idx update before we read avail->flags. */ rte_mb(); /* Kick the guest if necessary. */ if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) eventfd_write((int)vq->callfd, 1); return count; }
copy_from_mbuf_to_vring(struct virtio_net *dev, uint16_t res_base_idx, uint16_t res_end_idx, struct rte_mbuf *pkt) { uint32_t vec_idx = 0; uint32_t entry_success = 0; struct vhost_virtqueue *vq; /* The virtio_hdr is initialised to 0. */ struct virtio_net_hdr_mrg_rxbuf virtio_hdr = { {0, 0, 0, 0, 0, 0}, 0}; uint16_t cur_idx = res_base_idx; uint64_t vb_addr = 0; uint64_t vb_hdr_addr = 0; uint32_t seg_offset = 0; uint32_t vb_offset = 0; uint32_t seg_avail; uint32_t vb_avail; uint32_t cpy_len, entry_len; if (pkt == NULL) return 0; LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Current Index %d| " "End Index %d\n", dev->device_fh, cur_idx, res_end_idx); /* * Convert from gpa to vva * (guest physical addr -> vhost virtual addr) */ vq = dev->virtqueue[VIRTIO_RXQ]; vb_addr = gpa_to_vva(dev, vq->buf_vec[vec_idx].buf_addr); vb_hdr_addr = vb_addr; /* Prefetch buffer address. */ rte_prefetch0((void *)(uintptr_t)vb_addr); virtio_hdr.num_buffers = res_end_idx - res_base_idx; LOG_DEBUG(VHOST_DATA, "(%"PRIu64") RX: Num merge buffers %d\n", dev->device_fh, virtio_hdr.num_buffers); rte_memcpy((void *)(uintptr_t)vb_hdr_addr, (const void *)&virtio_hdr, vq->vhost_hlen); PRINT_PACKET(dev, (uintptr_t)vb_hdr_addr, vq->vhost_hlen, 1); seg_avail = rte_pktmbuf_data_len(pkt); vb_offset = vq->vhost_hlen; vb_avail = vq->buf_vec[vec_idx].buf_len - vq->vhost_hlen; entry_len = vq->vhost_hlen; if (vb_avail == 0) { uint32_t desc_idx = vq->buf_vec[vec_idx].desc_idx; if ((vq->desc[desc_idx].flags & VRING_DESC_F_NEXT) == 0) { /* Update used ring with desc information */ vq->used->ring[cur_idx & (vq->size - 1)].id = vq->buf_vec[vec_idx].desc_idx; vq->used->ring[cur_idx & (vq->size - 1)].len = entry_len; entry_len = 0; cur_idx++; entry_success++; } vec_idx++; vb_addr = gpa_to_vva(dev, vq->buf_vec[vec_idx].buf_addr); /* Prefetch buffer address. */ rte_prefetch0((void *)(uintptr_t)vb_addr); vb_offset = 0; vb_avail = vq->buf_vec[vec_idx].buf_len; } cpy_len = RTE_MIN(vb_avail, seg_avail); while (cpy_len > 0) { /* Copy mbuf data to vring buffer */ rte_memcpy((void *)(uintptr_t)(vb_addr + vb_offset), (const void *)(rte_pktmbuf_mtod(pkt, char*) + seg_offset), cpy_len); PRINT_PACKET(dev, (uintptr_t)(vb_addr + vb_offset), cpy_len, 0); seg_offset += cpy_len; vb_offset += cpy_len; seg_avail -= cpy_len; vb_avail -= cpy_len; entry_len += cpy_len; if (seg_avail != 0) { /* * The virtio buffer in this vring * entry reach to its end. * But the segment doesn't complete. */ if ((vq->desc[vq->buf_vec[vec_idx].desc_idx].flags & VRING_DESC_F_NEXT) == 0) { /* Update used ring with desc information */ vq->used->ring[cur_idx & (vq->size - 1)].id = vq->buf_vec[vec_idx].desc_idx; vq->used->ring[cur_idx & (vq->size - 1)].len = entry_len; entry_len = 0; cur_idx++; entry_success++; } vec_idx++; vb_addr = gpa_to_vva(dev, vq->buf_vec[vec_idx].buf_addr); vb_offset = 0; vb_avail = vq->buf_vec[vec_idx].buf_len; cpy_len = RTE_MIN(vb_avail, seg_avail); } else { /* * This current segment complete, need continue to * check if the whole packet complete or not. */ pkt = pkt->next; if (pkt != NULL) { /* * There are more segments. */ if (vb_avail == 0) { /* * This current buffer from vring is * used up, need fetch next buffer * from buf_vec. */ uint32_t desc_idx = vq->buf_vec[vec_idx].desc_idx; if ((vq->desc[desc_idx].flags & VRING_DESC_F_NEXT) == 0) { uint16_t wrapped_idx = cur_idx & (vq->size - 1); /* * Update used ring with the * descriptor information */ vq->used->ring[wrapped_idx].id = desc_idx; vq->used->ring[wrapped_idx].len = entry_len; entry_success++; entry_len = 0; cur_idx++; } /* Get next buffer from buf_vec. */ vec_idx++; vb_addr = gpa_to_vva(dev, vq->buf_vec[vec_idx].buf_addr); vb_avail = vq->buf_vec[vec_idx].buf_len; vb_offset = 0; } seg_offset = 0; seg_avail = rte_pktmbuf_data_len(pkt); cpy_len = RTE_MIN(vb_avail, seg_avail); } else { /* * This whole packet completes. */ /* Update used ring with desc information */ vq->used->ring[cur_idx & (vq->size - 1)].id = vq->buf_vec[vec_idx].desc_idx; vq->used->ring[cur_idx & (vq->size - 1)].len = entry_len; entry_success++; break; } } } return entry_success; }
uint16_t fm10k_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) { struct rte_mbuf *mbuf; union fm10k_rx_desc desc; struct fm10k_rx_queue *q = rx_queue; uint16_t count = 0; uint16_t nb_rcv, nb_seg; int alloc = 0; uint16_t next_dd; struct rte_mbuf *first_seg = q->pkt_first_seg; struct rte_mbuf *last_seg = q->pkt_last_seg; int ret; next_dd = q->next_dd; nb_rcv = 0; nb_seg = RTE_MIN(nb_pkts, q->alloc_thresh); for (count = 0; count < nb_seg; count++) { mbuf = q->sw_ring[next_dd]; desc = q->hw_ring[next_dd]; if (!(desc.d.staterr & FM10K_RXD_STATUS_DD)) break; #ifdef RTE_LIBRTE_FM10K_DEBUG_RX dump_rxd(&desc); #endif if (++next_dd == q->nb_desc) { next_dd = 0; alloc = 1; } /* Prefetch next mbuf while processing current one. */ rte_prefetch0(q->sw_ring[next_dd]); /* * When next RX descriptor is on a cache-line boundary, * prefetch the next 4 RX descriptors and the next 8 pointers * to mbufs. */ if ((next_dd & 0x3) == 0) { rte_prefetch0(&q->hw_ring[next_dd]); rte_prefetch0(&q->sw_ring[next_dd]); } /* Fill data length */ rte_pktmbuf_data_len(mbuf) = desc.w.length; /* * If this is the first buffer of the received packet, * set the pointer to the first mbuf of the packet and * initialize its context. * Otherwise, update the total length and the number of segments * of the current scattered packet, and update the pointer to * the last mbuf of the current packet. */ if (!first_seg) { first_seg = mbuf; first_seg->pkt_len = desc.w.length; } else { first_seg->pkt_len = (uint16_t)(first_seg->pkt_len + rte_pktmbuf_data_len(mbuf)); first_seg->nb_segs++; last_seg->next = mbuf; } /* * If this is not the last buffer of the received packet, * update the pointer to the last mbuf of the current scattered * packet and continue to parse the RX ring. */ if (!(desc.d.staterr & FM10K_RXD_STATUS_EOP)) { last_seg = mbuf; continue; } first_seg->ol_flags = 0; #ifdef RTE_LIBRTE_FM10K_RX_OLFLAGS_ENABLE rx_desc_to_ol_flags(first_seg, &desc); #endif first_seg->hash.rss = desc.d.rss; /* Prefetch data of first segment, if configured to do so. */ rte_packet_prefetch((char *)first_seg->buf_addr + first_seg->data_off); /* * Store the mbuf address into the next entry of the array * of returned packets. */ rx_pkts[nb_rcv++] = first_seg; /* * Setup receipt context for a new packet. */ first_seg = NULL; } q->next_dd = next_dd; if ((q->next_dd > q->next_trigger) || (alloc == 1)) { ret = rte_mempool_get_bulk(q->mp, (void **)&q->sw_ring[q->next_alloc], q->alloc_thresh); if (unlikely(ret != 0)) { uint8_t port = q->port_id; PMD_RX_LOG(ERR, "Failed to alloc mbuf"); /* * Need to restore next_dd if we cannot allocate new * buffers to replenish the old ones. */ q->next_dd = (q->next_dd + q->nb_desc - count) % q->nb_desc; rte_eth_devices[port].data->rx_mbuf_alloc_failed++; return 0; } for (; q->next_alloc <= q->next_trigger; ++q->next_alloc) { mbuf = q->sw_ring[q->next_alloc]; /* setup static mbuf fields */ fm10k_pktmbuf_reset(mbuf, q->port_id); /* write descriptor */ desc.q.pkt_addr = MBUF_DMA_ADDR_DEFAULT(mbuf); desc.q.hdr_addr = MBUF_DMA_ADDR_DEFAULT(mbuf); q->hw_ring[q->next_alloc] = desc; } FM10K_PCI_REG_WRITE(q->tail_ptr, q->next_trigger); q->next_trigger += q->alloc_thresh; if (q->next_trigger >= q->nb_desc) { q->next_trigger = q->alloc_thresh - 1; q->next_alloc = 0; } } q->pkt_first_seg = first_seg; q->pkt_last_seg = last_seg; return nb_rcv; }
uint16_t fm10k_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) { struct rte_mbuf *mbuf; union fm10k_rx_desc desc; struct fm10k_rx_queue *q = rx_queue; uint16_t count = 0; int alloc = 0; uint16_t next_dd; int ret; next_dd = q->next_dd; nb_pkts = RTE_MIN(nb_pkts, q->alloc_thresh); for (count = 0; count < nb_pkts; ++count) { mbuf = q->sw_ring[next_dd]; desc = q->hw_ring[next_dd]; if (!(desc.d.staterr & FM10K_RXD_STATUS_DD)) break; #ifdef RTE_LIBRTE_FM10K_DEBUG_RX dump_rxd(&desc); #endif rte_pktmbuf_pkt_len(mbuf) = desc.w.length; rte_pktmbuf_data_len(mbuf) = desc.w.length; mbuf->ol_flags = 0; #ifdef RTE_LIBRTE_FM10K_RX_OLFLAGS_ENABLE rx_desc_to_ol_flags(mbuf, &desc); #endif mbuf->hash.rss = desc.d.rss; rx_pkts[count] = mbuf; if (++next_dd == q->nb_desc) { next_dd = 0; alloc = 1; } /* Prefetch next mbuf while processing current one. */ rte_prefetch0(q->sw_ring[next_dd]); /* * When next RX descriptor is on a cache-line boundary, * prefetch the next 4 RX descriptors and the next 8 pointers * to mbufs. */ if ((next_dd & 0x3) == 0) { rte_prefetch0(&q->hw_ring[next_dd]); rte_prefetch0(&q->sw_ring[next_dd]); } } q->next_dd = next_dd; if ((q->next_dd > q->next_trigger) || (alloc == 1)) { ret = rte_mempool_get_bulk(q->mp, (void **)&q->sw_ring[q->next_alloc], q->alloc_thresh); if (unlikely(ret != 0)) { uint8_t port = q->port_id; PMD_RX_LOG(ERR, "Failed to alloc mbuf"); /* * Need to restore next_dd if we cannot allocate new * buffers to replenish the old ones. */ q->next_dd = (q->next_dd + q->nb_desc - count) % q->nb_desc; rte_eth_devices[port].data->rx_mbuf_alloc_failed++; return 0; } for (; q->next_alloc <= q->next_trigger; ++q->next_alloc) { mbuf = q->sw_ring[q->next_alloc]; /* setup static mbuf fields */ fm10k_pktmbuf_reset(mbuf, q->port_id); /* write descriptor */ desc.q.pkt_addr = MBUF_DMA_ADDR_DEFAULT(mbuf); desc.q.hdr_addr = MBUF_DMA_ADDR_DEFAULT(mbuf); q->hw_ring[q->next_alloc] = desc; } FM10K_PCI_REG_WRITE(q->tail_ptr, q->next_trigger); q->next_trigger += q->alloc_thresh; if (q->next_trigger >= q->nb_desc) { q->next_trigger = q->alloc_thresh - 1; q->next_alloc = 0; } } return count; }
tx_burst_sg(struct txq *txq, unsigned int segs, struct txq_elt *elt, struct rte_mbuf *buf, unsigned int elts_head, struct ibv_sge (*sges)[MLX5_PMD_SGE_WR_N]) { unsigned int sent_size = 0; unsigned int j; int linearize = 0; /* When there are too many segments, extra segments are * linearized in the last SGE. */ if (unlikely(segs > RTE_DIM(*sges))) { segs = (RTE_DIM(*sges) - 1); linearize = 1; } /* Update element. */ elt->buf = buf; /* Register segments as SGEs. */ for (j = 0; (j != segs); ++j) { struct ibv_sge *sge = &(*sges)[j]; uint32_t lkey; /* Retrieve Memory Region key for this memory pool. */ lkey = txq_mp2mr(txq, txq_mb2mp(buf)); if (unlikely(lkey == (uint32_t)-1)) { /* MR does not exist. */ DEBUG("%p: unable to get MP <-> MR association", (void *)txq); /* Clean up TX element. */ elt->buf = NULL; goto stop; } /* Update SGE. */ sge->addr = rte_pktmbuf_mtod(buf, uintptr_t); if (txq->priv->vf) rte_prefetch0((volatile void *) (uintptr_t)sge->addr); sge->length = DATA_LEN(buf); sge->lkey = lkey; sent_size += sge->length; buf = NEXT(buf); } /* If buf is not NULL here and is not going to be linearized, * nb_segs is not valid. */ assert(j == segs); assert((buf == NULL) || (linearize)); /* Linearize extra segments. */ if (linearize) { struct ibv_sge *sge = &(*sges)[segs]; linear_t *linear = &(*txq->elts_linear)[elts_head]; unsigned int size = linearize_mbuf(linear, buf); assert(segs == (RTE_DIM(*sges) - 1)); if (size == 0) { /* Invalid packet. */ DEBUG("%p: packet too large to be linearized.", (void *)txq); /* Clean up TX element. */ elt->buf = NULL; goto stop; } /* If MLX5_PMD_SGE_WR_N is 1, free mbuf immediately. */ if (RTE_DIM(*sges) == 1) { do { struct rte_mbuf *next = NEXT(buf); rte_pktmbuf_free_seg(buf); buf = next; } while (buf != NULL); elt->buf = NULL; } /* Update SGE. */ sge->addr = (uintptr_t)&(*linear)[0]; sge->length = size; sge->lkey = txq->mr_linear->lkey; sent_size += size; /* Include last segment. */ segs++; } return (struct tx_burst_sg_ret){ .length = sent_size, .num = segs, }; stop: return (struct tx_burst_sg_ret){ .length = -1, .num = -1, }; } #endif /* MLX5_PMD_SGE_WR_N > 1 */ /** * DPDK callback for TX. * * @param dpdk_txq * Generic pointer to TX queue structure. * @param[in] pkts * Packets to transmit. * @param pkts_n * Number of packets in array. * * @return * Number of packets successfully transmitted (<= pkts_n). */ uint16_t mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) { struct txq *txq = (struct txq *)dpdk_txq; unsigned int elts_head = txq->elts_head; const unsigned int elts_n = txq->elts_n; unsigned int elts_comp_cd = txq->elts_comp_cd; unsigned int elts_comp = 0; unsigned int i; unsigned int max; int err; struct rte_mbuf *buf = pkts[0]; assert(elts_comp_cd != 0); /* Prefetch first packet cacheline. */ rte_prefetch0(buf); txq_complete(txq); max = (elts_n - (elts_head - txq->elts_tail)); if (max > elts_n) max -= elts_n; assert(max >= 1); assert(max <= elts_n); /* Always leave one free entry in the ring. */ --max; if (max == 0) return 0; if (max > pkts_n) max = pkts_n; for (i = 0; (i != max); ++i) { struct rte_mbuf *buf_next = pkts[i + 1]; unsigned int elts_head_next = (((elts_head + 1) == elts_n) ? 0 : elts_head + 1); struct txq_elt *elt = &(*txq->elts)[elts_head]; unsigned int segs = NB_SEGS(buf); #ifdef MLX5_PMD_SOFT_COUNTERS unsigned int sent_size = 0; #endif uint32_t send_flags = 0; #ifdef HAVE_VERBS_VLAN_INSERTION int insert_vlan = 0; #endif /* HAVE_VERBS_VLAN_INSERTION */ if (i + 1 < max) rte_prefetch0(buf_next); /* Request TX completion. */ if (unlikely(--elts_comp_cd == 0)) { elts_comp_cd = txq->elts_comp_cd_init; ++elts_comp; send_flags |= IBV_EXP_QP_BURST_SIGNALED; } /* Should we enable HW CKSUM offload */ if (buf->ol_flags & (PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM)) { send_flags |= IBV_EXP_QP_BURST_IP_CSUM; /* HW does not support checksum offloads at arbitrary * offsets but automatically recognizes the packet * type. For inner L3/L4 checksums, only VXLAN (UDP) * tunnels are currently supported. */ if (RTE_ETH_IS_TUNNEL_PKT(buf->packet_type)) send_flags |= IBV_EXP_QP_BURST_TUNNEL; } if (buf->ol_flags & PKT_TX_VLAN_PKT) { #ifdef HAVE_VERBS_VLAN_INSERTION if (!txq->priv->mps) insert_vlan = 1; else #endif /* HAVE_VERBS_VLAN_INSERTION */ { err = insert_vlan_sw(buf); if (unlikely(err)) goto stop; } } if (likely(segs == 1)) { uintptr_t addr; uint32_t length; uint32_t lkey; uintptr_t buf_next_addr; /* Retrieve buffer information. */ addr = rte_pktmbuf_mtod(buf, uintptr_t); length = DATA_LEN(buf); /* Update element. */ elt->buf = buf; if (txq->priv->vf) rte_prefetch0((volatile void *) (uintptr_t)addr); /* Prefetch next buffer data. */ if (i + 1 < max) { buf_next_addr = rte_pktmbuf_mtod(buf_next, uintptr_t); rte_prefetch0((volatile void *) (uintptr_t)buf_next_addr); } /* Put packet into send queue. */ #if MLX5_PMD_MAX_INLINE > 0 if (length <= txq->max_inline) { #ifdef HAVE_VERBS_VLAN_INSERTION if (insert_vlan) err = txq->send_pending_inline_vlan (txq->qp, (void *)addr, length, send_flags, &buf->vlan_tci); else #endif /* HAVE_VERBS_VLAN_INSERTION */ err = txq->send_pending_inline (txq->qp, (void *)addr, length, send_flags); } else #endif { /* Retrieve Memory Region key for this * memory pool. */ lkey = txq_mp2mr(txq, txq_mb2mp(buf)); if (unlikely(lkey == (uint32_t)-1)) { /* MR does not exist. */ DEBUG("%p: unable to get MP <-> MR" " association", (void *)txq); /* Clean up TX element. */ elt->buf = NULL; goto stop; } #ifdef HAVE_VERBS_VLAN_INSERTION if (insert_vlan) err = txq->send_pending_vlan (txq->qp, addr, length, lkey, send_flags, &buf->vlan_tci); else #endif /* HAVE_VERBS_VLAN_INSERTION */ err = txq->send_pending (txq->qp, addr, length, lkey, send_flags); } if (unlikely(err)) goto stop; #ifdef MLX5_PMD_SOFT_COUNTERS sent_size += length; #endif } else { #if MLX5_PMD_SGE_WR_N > 1 struct ibv_sge sges[MLX5_PMD_SGE_WR_N]; struct tx_burst_sg_ret ret; ret = tx_burst_sg(txq, segs, elt, buf, elts_head, &sges); if (ret.length == (unsigned int)-1) goto stop; /* Put SG list into send queue. */ #ifdef HAVE_VERBS_VLAN_INSERTION if (insert_vlan) err = txq->send_pending_sg_list_vlan (txq->qp, sges, ret.num, send_flags, &buf->vlan_tci); else #endif /* HAVE_VERBS_VLAN_INSERTION */ err = txq->send_pending_sg_list (txq->qp, sges, ret.num, send_flags); if (unlikely(err)) goto stop; #ifdef MLX5_PMD_SOFT_COUNTERS sent_size += ret.length; #endif #else /* MLX5_PMD_SGE_WR_N > 1 */ DEBUG("%p: TX scattered buffers support not" " compiled in", (void *)txq); goto stop; #endif /* MLX5_PMD_SGE_WR_N > 1 */ } elts_head = elts_head_next; buf = buf_next; #ifdef MLX5_PMD_SOFT_COUNTERS /* Increment sent bytes counter. */ txq->stats.obytes += sent_size; #endif } stop: /* Take a shortcut if nothing must be sent. */ if (unlikely(i == 0)) return 0; #ifdef MLX5_PMD_SOFT_COUNTERS /* Increment sent packets counter. */ txq->stats.opackets += i; #endif /* Ring QP doorbell. */ err = txq->send_flush(txq->qp); if (unlikely(err)) { /* A nonzero value is not supposed to be returned. * Nothing can be done about it. */ DEBUG("%p: send_flush() failed with error %d", (void *)txq, err); } txq->elts_head = elts_head; txq->elts_comp += elts_comp; txq->elts_comp_cd = elts_comp_cd; return i; } /** * Translate RX completion flags to packet type. * * @param flags * RX completion flags returned by poll_length_flags(). * * @note: fix mlx5_dev_supported_ptypes_get() if any change here. * * @return * Packet type for struct rte_mbuf. */ static inline uint32_t rxq_cq_to_pkt_type(uint32_t flags) { uint32_t pkt_type; if (flags & IBV_EXP_CQ_RX_TUNNEL_PACKET) pkt_type = TRANSPOSE(flags, IBV_EXP_CQ_RX_OUTER_IPV4_PACKET, RTE_PTYPE_L3_IPV4) | TRANSPOSE(flags, IBV_EXP_CQ_RX_OUTER_IPV6_PACKET, RTE_PTYPE_L3_IPV6) | TRANSPOSE(flags, IBV_EXP_CQ_RX_IPV4_PACKET, RTE_PTYPE_INNER_L3_IPV4) | TRANSPOSE(flags, IBV_EXP_CQ_RX_IPV6_PACKET, RTE_PTYPE_INNER_L3_IPV6); else pkt_type = TRANSPOSE(flags, IBV_EXP_CQ_RX_IPV4_PACKET, RTE_PTYPE_L3_IPV4) | TRANSPOSE(flags, IBV_EXP_CQ_RX_IPV6_PACKET, RTE_PTYPE_L3_IPV6); return pkt_type; }
static uint16_t schedule_enqueue(void *qp, struct rte_crypto_op **ops, uint16_t nb_ops) { struct scheduler_qp_ctx *qp_ctx = qp; struct psd_scheduler_qp_ctx *psd_qp_ctx = qp_ctx->private_qp_ctx; struct rte_crypto_op *sched_ops[NB_PKT_SIZE_SLAVES][nb_ops]; struct scheduler_session *sess; uint32_t in_flight_ops[NB_PKT_SIZE_SLAVES] = { psd_qp_ctx->primary_slave.nb_inflight_cops, psd_qp_ctx->secondary_slave.nb_inflight_cops }; struct psd_schedule_op enq_ops[NB_PKT_SIZE_SLAVES] = { {PRIMARY_SLAVE_IDX, 0}, {SECONDARY_SLAVE_IDX, 0} }; struct psd_schedule_op *p_enq_op; uint16_t i, processed_ops_pri = 0, processed_ops_sec = 0; uint32_t job_len; if (unlikely(nb_ops == 0)) return 0; for (i = 0; i < nb_ops && i < 4; i++) { rte_prefetch0(ops[i]->sym); rte_prefetch0(ops[i]->sym->session); } for (i = 0; (i < (nb_ops - 8)) && (nb_ops > 8); i += 4) { rte_prefetch0(ops[i + 4]->sym); rte_prefetch0(ops[i + 4]->sym->session); rte_prefetch0(ops[i + 5]->sym); rte_prefetch0(ops[i + 5]->sym->session); rte_prefetch0(ops[i + 6]->sym); rte_prefetch0(ops[i + 6]->sym->session); rte_prefetch0(ops[i + 7]->sym); rte_prefetch0(ops[i + 7]->sym->session); sess = (struct scheduler_session *) ops[i]->sym->session->_private; /* job_len is initialized as cipher data length, once * it is 0, equals to auth data length */ job_len = ops[i]->sym->cipher.data.length; job_len += (ops[i]->sym->cipher.data.length == 0) * ops[i]->sym->auth.data.length; /* decide the target op based on the job length */ p_enq_op = &enq_ops[!(job_len & psd_qp_ctx->threshold)]; /* stop schedule cops before the queue is full, this shall * prevent the failed enqueue */ if (p_enq_op->pos + in_flight_ops[p_enq_op->slave_idx] == qp_ctx->max_nb_objs) { i = nb_ops; break; } sched_ops[p_enq_op->slave_idx][p_enq_op->pos] = ops[i]; ops[i]->sym->session = sess->sessions[p_enq_op->slave_idx]; p_enq_op->pos++; sess = (struct scheduler_session *) ops[i+1]->sym->session->_private; job_len = ops[i+1]->sym->cipher.data.length; job_len += (ops[i+1]->sym->cipher.data.length == 0) * ops[i+1]->sym->auth.data.length; p_enq_op = &enq_ops[!(job_len & psd_qp_ctx->threshold)]; if (p_enq_op->pos + in_flight_ops[p_enq_op->slave_idx] == qp_ctx->max_nb_objs) { i = nb_ops; break; } sched_ops[p_enq_op->slave_idx][p_enq_op->pos] = ops[i+1]; ops[i+1]->sym->session = sess->sessions[p_enq_op->slave_idx]; p_enq_op->pos++; sess = (struct scheduler_session *) ops[i+2]->sym->session->_private; job_len = ops[i+2]->sym->cipher.data.length; job_len += (ops[i+2]->sym->cipher.data.length == 0) * ops[i+2]->sym->auth.data.length; p_enq_op = &enq_ops[!(job_len & psd_qp_ctx->threshold)]; if (p_enq_op->pos + in_flight_ops[p_enq_op->slave_idx] == qp_ctx->max_nb_objs) { i = nb_ops; break; } sched_ops[p_enq_op->slave_idx][p_enq_op->pos] = ops[i+2]; ops[i+2]->sym->session = sess->sessions[p_enq_op->slave_idx]; p_enq_op->pos++; sess = (struct scheduler_session *) ops[i+3]->sym->session->_private; job_len = ops[i+3]->sym->cipher.data.length; job_len += (ops[i+3]->sym->cipher.data.length == 0) * ops[i+3]->sym->auth.data.length; p_enq_op = &enq_ops[!(job_len & psd_qp_ctx->threshold)]; if (p_enq_op->pos + in_flight_ops[p_enq_op->slave_idx] == qp_ctx->max_nb_objs) { i = nb_ops; break; } sched_ops[p_enq_op->slave_idx][p_enq_op->pos] = ops[i+3]; ops[i+3]->sym->session = sess->sessions[p_enq_op->slave_idx]; p_enq_op->pos++; } for (; i < nb_ops; i++) { sess = (struct scheduler_session *) ops[i]->sym->session->_private; job_len = ops[i]->sym->cipher.data.length; job_len += (ops[i]->sym->cipher.data.length == 0) * ops[i]->sym->auth.data.length; p_enq_op = &enq_ops[!(job_len & psd_qp_ctx->threshold)]; if (p_enq_op->pos + in_flight_ops[p_enq_op->slave_idx] == qp_ctx->max_nb_objs) { i = nb_ops; break; } sched_ops[p_enq_op->slave_idx][p_enq_op->pos] = ops[i]; ops[i]->sym->session = sess->sessions[p_enq_op->slave_idx]; p_enq_op->pos++; } processed_ops_pri = rte_cryptodev_enqueue_burst( psd_qp_ctx->primary_slave.dev_id, psd_qp_ctx->primary_slave.qp_id, sched_ops[PRIMARY_SLAVE_IDX], enq_ops[PRIMARY_SLAVE_IDX].pos); /* enqueue shall not fail as the slave queue is monitored */ RTE_ASSERT(processed_ops_pri == enq_ops[PRIMARY_SLAVE_IDX].pos); psd_qp_ctx->primary_slave.nb_inflight_cops += processed_ops_pri; processed_ops_sec = rte_cryptodev_enqueue_burst( psd_qp_ctx->secondary_slave.dev_id, psd_qp_ctx->secondary_slave.qp_id, sched_ops[SECONDARY_SLAVE_IDX], enq_ops[SECONDARY_SLAVE_IDX].pos); RTE_ASSERT(processed_ops_sec == enq_ops[SECONDARY_SLAVE_IDX].pos); psd_qp_ctx->secondary_slave.nb_inflight_cops += processed_ops_sec; return processed_ops_pri + processed_ops_sec; }
static uint16_t bnxt_rx_pkt(struct rte_mbuf **rx_pkt, struct bnxt_rx_queue *rxq, uint32_t *raw_cons) { struct bnxt_cp_ring_info *cpr = rxq->cp_ring; struct bnxt_rx_ring_info *rxr = rxq->rx_ring; struct rx_pkt_cmpl *rxcmp; struct rx_pkt_cmpl_hi *rxcmp1; uint32_t tmp_raw_cons = *raw_cons; uint16_t cons, prod, cp_cons = RING_CMP(cpr->cp_ring_struct, tmp_raw_cons); struct bnxt_sw_rx_bd *rx_buf; struct rte_mbuf *mbuf; int rc = 0; rxcmp = (struct rx_pkt_cmpl *) &cpr->cp_desc_ring[cp_cons]; tmp_raw_cons = NEXT_RAW_CMP(tmp_raw_cons); cp_cons = RING_CMP(cpr->cp_ring_struct, tmp_raw_cons); rxcmp1 = (struct rx_pkt_cmpl_hi *)&cpr->cp_desc_ring[cp_cons]; if (!CMP_VALID(rxcmp1, tmp_raw_cons, cpr->cp_ring_struct)) return -EBUSY; prod = rxr->rx_prod; /* EW - GRO deferred to phase 3 */ cons = rxcmp->opaque; rx_buf = &rxr->rx_buf_ring[cons]; mbuf = rx_buf->mbuf; rte_prefetch0(mbuf); mbuf->nb_segs = 1; mbuf->next = NULL; mbuf->pkt_len = rxcmp->len; mbuf->data_len = mbuf->pkt_len; mbuf->port = rxq->port_id; mbuf->ol_flags = 0; if (rxcmp->flags_type & RX_PKT_CMPL_FLAGS_RSS_VALID) { mbuf->hash.rss = rxcmp->rss_hash; mbuf->ol_flags |= PKT_RX_RSS_HASH; } else { mbuf->hash.fdir.id = rxcmp1->cfa_code; mbuf->ol_flags |= PKT_RX_FDIR | PKT_RX_FDIR_ID; } if (rxcmp1->flags2 & RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN) { mbuf->vlan_tci = rxcmp1->metadata & (RX_PKT_CMPL_METADATA_VID_MASK | RX_PKT_CMPL_METADATA_DE | RX_PKT_CMPL_METADATA_PRI_MASK); mbuf->ol_flags |= PKT_RX_VLAN_PKT; } rx_buf->mbuf = NULL; if (rxcmp1->errors_v2 & RX_CMP_L2_ERRORS) { /* Re-install the mbuf back to the rx ring */ bnxt_reuse_rx_mbuf(rxr, cons, mbuf); rc = -EIO; goto next_rx; } /* * TODO: Redesign this.... * If the allocation fails, the packet does not get received. * Simply returning this will result in slowly falling behind * on the producer ring buffers. * Instead, "filling up" the producer just before ringing the * doorbell could be a better solution since it will let the * producer ring starve until memory is available again pushing * the drops into hardware and getting them out of the driver * allowing recovery to a full producer ring. * * This could also help with cache usage by preventing per-packet * calls in favour of a tight loop with the same function being called * in it. */ if (bnxt_alloc_rx_data(rxq, rxr, prod)) { RTE_LOG(ERR, PMD, "mbuf alloc failed with prod=0x%x\n", prod); rc = -ENOMEM; goto next_rx; } /* * All MBUFs are allocated with the same size under DPDK, * no optimization for rx_copy_thresh */ /* AGG buf operation is deferred */ /* EW - VLAN reception. Must compare against the ol_flags */ *rx_pkt = mbuf; next_rx: rxr->rx_prod = RING_NEXT(rxr->rx_ring_struct, prod); *raw_cons = tmp_raw_cons; return rc; }
/** * DPDK callback for RX. * * The following function is the same as mlx5_rx_burst_sp(), except it doesn't * manage scattered packets. Improves performance when MRU is lower than the * size of the first segment. * * @param dpdk_rxq * Generic pointer to RX queue structure. * @param[out] pkts * Array to store received packets. * @param pkts_n * Maximum number of packets in array. * * @return * Number of packets successfully received (<= pkts_n). */ uint16_t mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) { struct rxq *rxq = (struct rxq *)dpdk_rxq; struct rxq_elt (*elts)[rxq->elts_n] = rxq->elts.no_sp; const unsigned int elts_n = rxq->elts_n; unsigned int elts_head = rxq->elts_head; struct ibv_sge sges[pkts_n]; unsigned int i; unsigned int pkts_ret = 0; int ret; if (unlikely(rxq->sp)) return mlx5_rx_burst_sp(dpdk_rxq, pkts, pkts_n); for (i = 0; (i != pkts_n); ++i) { struct rxq_elt *elt = &(*elts)[elts_head]; unsigned int len; struct rte_mbuf *seg = elt->buf; struct rte_mbuf *rep; uint32_t flags; uint16_t vlan_tci; /* Sanity checks. */ assert(seg != NULL); assert(elts_head < rxq->elts_n); assert(rxq->elts_head < rxq->elts_n); /* * Fetch initial bytes of packet descriptor into a * cacheline while allocating rep. */ rte_prefetch0(seg); rte_prefetch0(&seg->cacheline1); ret = rxq->poll(rxq->cq, NULL, NULL, &flags, &vlan_tci); if (unlikely(ret < 0)) { struct ibv_wc wc; int wcs_n; DEBUG("rxq=%p, poll_length() failed (ret=%d)", (void *)rxq, ret); /* ibv_poll_cq() must be used in case of failure. */ wcs_n = ibv_poll_cq(rxq->cq, 1, &wc); if (unlikely(wcs_n == 0)) break; if (unlikely(wcs_n < 0)) { DEBUG("rxq=%p, ibv_poll_cq() failed (wcs_n=%d)", (void *)rxq, wcs_n); break; } assert(wcs_n == 1); if (unlikely(wc.status != IBV_WC_SUCCESS)) { /* Whatever, just repost the offending WR. */ DEBUG("rxq=%p, wr_id=%" PRIu64 ": bad work" " completion status (%d): %s", (void *)rxq, wc.wr_id, wc.status, ibv_wc_status_str(wc.status)); #ifdef MLX5_PMD_SOFT_COUNTERS /* Increment dropped packets counter. */ ++rxq->stats.idropped; #endif /* Add SGE to array for repost. */ sges[i] = elt->sge; goto repost; } ret = wc.byte_len; } if (ret == 0) break; assert(ret >= (rxq->crc_present << 2)); len = ret - (rxq->crc_present << 2); rep = __rte_mbuf_raw_alloc(rxq->mp); if (unlikely(rep == NULL)) { /* * Unable to allocate a replacement mbuf, * repost WR. */ DEBUG("rxq=%p: can't allocate a new mbuf", (void *)rxq); /* Increment out of memory counters. */ ++rxq->stats.rx_nombuf; ++rxq->priv->dev->data->rx_mbuf_alloc_failed; goto repost; } /* Reconfigure sge to use rep instead of seg. */ elt->sge.addr = (uintptr_t)rep->buf_addr + RTE_PKTMBUF_HEADROOM; assert(elt->sge.lkey == rxq->mr->lkey); elt->buf = rep; /* Add SGE to array for repost. */ sges[i] = elt->sge; /* Update seg information. */ SET_DATA_OFF(seg, RTE_PKTMBUF_HEADROOM); NB_SEGS(seg) = 1; PORT(seg) = rxq->port_id; NEXT(seg) = NULL; PKT_LEN(seg) = len; DATA_LEN(seg) = len; if (rxq->csum | rxq->csum_l2tun | rxq->vlan_strip) { seg->packet_type = rxq_cq_to_pkt_type(flags); seg->ol_flags = rxq_cq_to_ol_flags(rxq, flags); #ifdef HAVE_EXP_DEVICE_ATTR_VLAN_OFFLOADS if (flags & IBV_EXP_CQ_RX_CVLAN_STRIPPED_V1) { seg->ol_flags |= PKT_RX_VLAN_PKT; seg->vlan_tci = vlan_tci; } #endif /* HAVE_EXP_DEVICE_ATTR_VLAN_OFFLOADS */ } /* Return packet. */ *(pkts++) = seg; ++pkts_ret; #ifdef MLX5_PMD_SOFT_COUNTERS /* Increment bytes counter. */ rxq->stats.ibytes += len; #endif repost: if (++elts_head >= elts_n) elts_head = 0; continue; } if (unlikely(i == 0)) return 0; /* Repost WRs. */ #ifdef DEBUG_RECV DEBUG("%p: reposting %u WRs", (void *)rxq, i); #endif ret = rxq->recv(rxq->wq, sges, i); if (unlikely(ret)) { /* Inability to repost WRs is fatal. */ DEBUG("%p: recv_burst(): failed (ret=%d)", (void *)rxq->priv, ret); abort(); } rxq->elts_head = elts_head; #ifdef MLX5_PMD_SOFT_COUNTERS /* Increment packets counter. */ rxq->stats.ipackets += pkts_ret; #endif return pkts_ret; }
virtio_dev_rx(struct virtio_net *dev, struct rte_mbuf **pkts, uint32_t count) { struct vhost_virtqueue *vq; struct vring_desc *desc; struct rte_mbuf *buff; /* The virtio_hdr is initialised to 0. */ struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0,0,0,0,0,0},0}; uint64_t buff_addr = 0; uint64_t buff_hdr_addr = 0; uint32_t head[MAX_PKT_BURST], packet_len = 0; uint32_t head_idx, packet_success = 0; uint16_t avail_idx, res_cur_idx; uint16_t res_base_idx, res_end_idx; uint16_t free_entries; uint8_t success = 0; LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_rx()\n", dev->device_fh); vq = dev->virtqueue_rx; count = (count > MAX_PKT_BURST) ? MAX_PKT_BURST : count; /* As many data cores may want access to available buffers, they need to be reserved. */ do { res_base_idx = vq->last_used_idx_res; avail_idx = *((volatile uint16_t *)&vq->avail->idx); free_entries = (avail_idx - res_base_idx); /*check that we have enough buffers*/ if (unlikely(count > free_entries)) count = free_entries; if (count == 0) return 0; res_end_idx = res_base_idx + count; /* vq->last_used_idx_res is atomically updated. */ success = rte_atomic16_cmpset(&vq->last_used_idx_res, res_base_idx, res_end_idx); } while (unlikely(success == 0)); res_cur_idx = res_base_idx; LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Current Index %d| End Index %d\n", dev->device_fh, res_cur_idx, res_end_idx); /* Prefetch available ring to retrieve indexes. */ rte_prefetch0(&vq->avail->ring[res_cur_idx & (vq->size - 1)]); /* Retrieve all of the head indexes first to avoid caching issues. */ for (head_idx = 0; head_idx < count; head_idx++) head[head_idx] = vq->avail->ring[(res_cur_idx + head_idx) & (vq->size - 1)]; /*Prefetch descriptor index. */ rte_prefetch0(&vq->desc[head[packet_success]]); while (res_cur_idx != res_end_idx) { /* Get descriptor from available ring */ desc = &vq->desc[head[packet_success]]; /* Prefetch descriptor address. */ rte_prefetch0(desc); buff = pkts[packet_success]; /* Convert from gpa to vva (guest physical addr -> vhost virtual addr) */ buff_addr = gpa_to_vva(dev, desc->addr); /* Prefetch buffer address. */ rte_prefetch0((void*)(uintptr_t)buff_addr); { /* Copy virtio_hdr to packet and increment buffer address */ buff_hdr_addr = buff_addr; packet_len = rte_pktmbuf_data_len(buff) + vq->vhost_hlen; /* * If the descriptors are chained the header and data are placed in * separate buffers. */ if (desc->flags & VRING_DESC_F_NEXT) { desc->len = vq->vhost_hlen; desc = &vq->desc[desc->next]; /* Buffer address translation. */ buff_addr = gpa_to_vva(dev, desc->addr); desc->len = rte_pktmbuf_data_len(buff); } else { buff_addr += vq->vhost_hlen; desc->len = packet_len; } } /* Update used ring with desc information */ vq->used->ring[res_cur_idx & (vq->size - 1)].id = head[packet_success]; vq->used->ring[res_cur_idx & (vq->size - 1)].len = packet_len; /* Copy mbuf data to buffer */ rte_memcpy((void *)(uintptr_t)buff_addr, (const void*)buff->pkt.data, rte_pktmbuf_data_len(buff)); res_cur_idx++; packet_success++; /* mergeable is disabled then a header is required per buffer. */ rte_memcpy((void *)(uintptr_t)buff_hdr_addr, (const void*)&virtio_hdr, vq->vhost_hlen); if (res_cur_idx < res_end_idx) { /* Prefetch descriptor index. */ rte_prefetch0(&vq->desc[head[packet_success]]); } } rte_compiler_barrier(); /* Wait until it's our turn to add our buffer to the used ring. */ while (unlikely(vq->last_used_idx != res_base_idx)) rte_pause(); *(volatile uint16_t *)&vq->used->idx += count; vq->last_used_idx = res_end_idx; return count; }
static uint16_t bnx2x_recv_pkts(void *p_rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) { struct bnx2x_rx_queue *rxq = p_rxq; struct bnx2x_softc *sc = rxq->sc; struct bnx2x_fastpath *fp = &sc->fp[rxq->queue_id]; uint32_t nb_rx = 0; uint16_t hw_cq_cons, sw_cq_cons, sw_cq_prod; uint16_t bd_cons, bd_prod; struct rte_mbuf *new_mb; uint16_t rx_pref; struct eth_fast_path_rx_cqe *cqe_fp; uint16_t len, pad; struct rte_mbuf *rx_mb = NULL; hw_cq_cons = le16toh(*fp->rx_cq_cons_sb); if ((hw_cq_cons & USABLE_RCQ_ENTRIES_PER_PAGE) == USABLE_RCQ_ENTRIES_PER_PAGE) { ++hw_cq_cons; } bd_cons = rxq->rx_bd_head; bd_prod = rxq->rx_bd_tail; sw_cq_cons = rxq->rx_cq_head; sw_cq_prod = rxq->rx_cq_tail; if (sw_cq_cons == hw_cq_cons) return 0; while (nb_rx < nb_pkts && sw_cq_cons != hw_cq_cons) { bd_prod &= MAX_RX_BD(rxq); bd_cons &= MAX_RX_BD(rxq); cqe_fp = &rxq->cq_ring[sw_cq_cons & MAX_RX_BD(rxq)].fast_path_cqe; if (unlikely(CQE_TYPE_SLOW(cqe_fp->type_error_flags & ETH_FAST_PATH_RX_CQE_TYPE))) { PMD_RX_LOG(ERR, "slowpath event during traffic processing"); break; } if (unlikely(cqe_fp->type_error_flags & ETH_FAST_PATH_RX_CQE_PHY_DECODE_ERR_FLG)) { PMD_RX_LOG(ERR, "flags 0x%x rx packet %u", cqe_fp->type_error_flags, sw_cq_cons); goto next_rx; } len = cqe_fp->pkt_len_or_gro_seg_len; pad = cqe_fp->placement_offset; new_mb = bnx2x_rxmbuf_alloc(rxq->mb_pool); if (unlikely(!new_mb)) { PMD_RX_LOG(ERR, "mbuf alloc fail fp[%02d]", fp->index); goto next_rx; } rx_mb = rxq->sw_ring[bd_cons]; rxq->sw_ring[bd_cons] = new_mb; rxq->rx_ring[bd_prod] = new_mb->buf_physaddr; rx_pref = NEXT_RX_BD(bd_cons) & MAX_RX_BD(rxq); rte_prefetch0(rxq->sw_ring[rx_pref]); if ((rx_pref & 0x3) == 0) { rte_prefetch0(&rxq->rx_ring[rx_pref]); rte_prefetch0(&rxq->sw_ring[rx_pref]); } rx_mb->data_off = pad; rx_mb->nb_segs = 1; rx_mb->next = NULL; rx_mb->pkt_len = rx_mb->data_len = len; rx_mb->port = rxq->port_id; rx_mb->buf_len = len + pad; rte_prefetch1(rte_pktmbuf_mtod(rx_mb, void *)); /* * If we received a packet with a vlan tag, * attach that information to the packet. */ if (cqe_fp->pars_flags.flags & PARSING_FLAGS_VLAN) { rx_mb->vlan_tci = cqe_fp->vlan_tag; rx_mb->ol_flags |= PKT_RX_VLAN_PKT; } rx_pkts[nb_rx] = rx_mb; nb_rx++; /* limit spinning on the queue */ if (unlikely(nb_rx == sc->rx_budget)) { PMD_RX_LOG(ERR, "Limit spinning on the queue"); break; } next_rx: bd_cons = NEXT_RX_BD(bd_cons); bd_prod = NEXT_RX_BD(bd_prod); sw_cq_prod = NEXT_RCQ_IDX(sw_cq_prod); sw_cq_cons = NEXT_RCQ_IDX(sw_cq_cons); } rxq->rx_bd_head = bd_cons; rxq->rx_bd_tail = bd_prod; rxq->rx_cq_head = sw_cq_cons; rxq->rx_cq_tail = sw_cq_prod; bnx2x_upd_rx_prod_fast(sc, fp, bd_prod, sw_cq_prod); return nb_rx; }
static inline uint16_t fm10k_recv_raw_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts, uint8_t *split_packet) { volatile union fm10k_rx_desc *rxdp; struct rte_mbuf **mbufp; uint16_t nb_pkts_recd; int pos; struct fm10k_rx_queue *rxq = rx_queue; uint64_t var; __m128i shuf_msk; __m128i dd_check, eop_check; uint16_t next_dd; next_dd = rxq->next_dd; /* Just the act of getting into the function from the application is * going to cost about 7 cycles */ rxdp = rxq->hw_ring + next_dd; rte_prefetch0(rxdp); /* See if we need to rearm the RX queue - gives the prefetch a bit * of time to act */ if (rxq->rxrearm_nb > RTE_FM10K_RXQ_REARM_THRESH) fm10k_rxq_rearm(rxq); /* Before we start moving massive data around, check to see if * there is actually a packet available */ if (!(rxdp->d.staterr & FM10K_RXD_STATUS_DD)) return 0; /* Vecotr RX will process 4 packets at a time, strip the unaligned * tails in case it's not multiple of 4. */ nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, RTE_FM10K_DESCS_PER_LOOP); /* 4 packets DD mask */ dd_check = _mm_set_epi64x(0x0000000100000001LL, 0x0000000100000001LL); /* 4 packets EOP mask */ eop_check = _mm_set_epi64x(0x0000000200000002LL, 0x0000000200000002LL); /* mask to shuffle from desc. to mbuf */ shuf_msk = _mm_set_epi8( 7, 6, 5, 4, /* octet 4~7, 32bits rss */ 15, 14, /* octet 14~15, low 16 bits vlan_macip */ 13, 12, /* octet 12~13, 16 bits data_len */ 0xFF, 0xFF, /* skip high 16 bits pkt_len, zero out */ 13, 12, /* octet 12~13, low 16 bits pkt_len */ 0xFF, 0xFF, /* skip high 16 bits pkt_type */ 0xFF, 0xFF /* Skip pkt_type field in shuffle operation */ ); /* * Compile-time verify the shuffle mask * NOTE: some field positions already verified above, but duplicated * here for completeness in case of future modifications. */ RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, pkt_len) != offsetof(struct rte_mbuf, rx_descriptor_fields1) + 4); RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_len) != offsetof(struct rte_mbuf, rx_descriptor_fields1) + 8); RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, vlan_tci) != offsetof(struct rte_mbuf, rx_descriptor_fields1) + 10); RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, hash) != offsetof(struct rte_mbuf, rx_descriptor_fields1) + 12); /* Cache is empty -> need to scan the buffer rings, but first move * the next 'n' mbufs into the cache */ mbufp = &rxq->sw_ring[next_dd]; /* A. load 4 packet in one loop * [A*. mask out 4 unused dirty field in desc] * B. copy 4 mbuf point from swring to rx_pkts * C. calc the number of DD bits among the 4 packets * [C*. extract the end-of-packet bit, if requested] * D. fill info. from desc to mbuf */ for (pos = 0, nb_pkts_recd = 0; pos < nb_pkts; pos += RTE_FM10K_DESCS_PER_LOOP, rxdp += RTE_FM10K_DESCS_PER_LOOP) { __m128i descs0[RTE_FM10K_DESCS_PER_LOOP]; __m128i pkt_mb1, pkt_mb2, pkt_mb3, pkt_mb4; __m128i zero, staterr, sterr_tmp1, sterr_tmp2; __m128i mbp1; /* 2 64 bit or 4 32 bit mbuf pointers in one XMM reg. */ #if defined(RTE_ARCH_X86_64) __m128i mbp2; #endif /* B.1 load 2 (64 bit) or 4 (32 bit) mbuf points */ mbp1 = _mm_loadu_si128((__m128i *)&mbufp[pos]); /* Read desc statuses backwards to avoid race condition */ /* A.1 load 4 pkts desc */ descs0[3] = _mm_loadu_si128((__m128i *)(rxdp + 3)); rte_compiler_barrier(); /* B.2 copy 2 64 bit or 4 32 bit mbuf point into rx_pkts */ _mm_storeu_si128((__m128i *)&rx_pkts[pos], mbp1); #if defined(RTE_ARCH_X86_64) /* B.1 load 2 64 bit mbuf poitns */ mbp2 = _mm_loadu_si128((__m128i *)&mbufp[pos+2]); #endif descs0[2] = _mm_loadu_si128((__m128i *)(rxdp + 2)); rte_compiler_barrier(); /* B.1 load 2 mbuf point */ descs0[1] = _mm_loadu_si128((__m128i *)(rxdp + 1)); rte_compiler_barrier(); descs0[0] = _mm_loadu_si128((__m128i *)(rxdp)); #if defined(RTE_ARCH_X86_64) /* B.2 copy 2 mbuf point into rx_pkts */ _mm_storeu_si128((__m128i *)&rx_pkts[pos+2], mbp2); #endif /* avoid compiler reorder optimization */ rte_compiler_barrier(); if (split_packet) { rte_mbuf_prefetch_part2(rx_pkts[pos]); rte_mbuf_prefetch_part2(rx_pkts[pos + 1]); rte_mbuf_prefetch_part2(rx_pkts[pos + 2]); rte_mbuf_prefetch_part2(rx_pkts[pos + 3]); } /* D.1 pkt 3,4 convert format from desc to pktmbuf */ pkt_mb4 = _mm_shuffle_epi8(descs0[3], shuf_msk); pkt_mb3 = _mm_shuffle_epi8(descs0[2], shuf_msk); /* C.1 4=>2 filter staterr info only */ sterr_tmp2 = _mm_unpackhi_epi32(descs0[3], descs0[2]); /* C.1 4=>2 filter staterr info only */ sterr_tmp1 = _mm_unpackhi_epi32(descs0[1], descs0[0]); /* set ol_flags with vlan packet type */ fm10k_desc_to_olflags_v(descs0, &rx_pkts[pos]); /* D.1 pkt 1,2 convert format from desc to pktmbuf */ pkt_mb2 = _mm_shuffle_epi8(descs0[1], shuf_msk); pkt_mb1 = _mm_shuffle_epi8(descs0[0], shuf_msk); /* C.2 get 4 pkts staterr value */ zero = _mm_xor_si128(dd_check, dd_check); staterr = _mm_unpacklo_epi32(sterr_tmp1, sterr_tmp2); /* D.3 copy final 3,4 data to rx_pkts */ _mm_storeu_si128((void *)&rx_pkts[pos+3]->rx_descriptor_fields1, pkt_mb4); _mm_storeu_si128((void *)&rx_pkts[pos+2]->rx_descriptor_fields1, pkt_mb3); /* C* extract and record EOP bit */ if (split_packet) { __m128i eop_shuf_mask = _mm_set_epi8( 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x04, 0x0C, 0x00, 0x08 ); /* and with mask to extract bits, flipping 1-0 */ __m128i eop_bits = _mm_andnot_si128(staterr, eop_check); /* the staterr values are not in order, as the count * count of dd bits doesn't care. However, for end of * packet tracking, we do care, so shuffle. This also * compresses the 32-bit values to 8-bit */ eop_bits = _mm_shuffle_epi8(eop_bits, eop_shuf_mask); /* store the resulting 32-bit value */ *(int *)split_packet = _mm_cvtsi128_si32(eop_bits); split_packet += RTE_FM10K_DESCS_PER_LOOP; /* zero-out next pointers */ rx_pkts[pos]->next = NULL; rx_pkts[pos + 1]->next = NULL; rx_pkts[pos + 2]->next = NULL; rx_pkts[pos + 3]->next = NULL; } /* C.3 calc available number of desc */ staterr = _mm_and_si128(staterr, dd_check); staterr = _mm_packs_epi32(staterr, zero); /* D.3 copy final 1,2 data to rx_pkts */ _mm_storeu_si128((void *)&rx_pkts[pos+1]->rx_descriptor_fields1, pkt_mb2); _mm_storeu_si128((void *)&rx_pkts[pos]->rx_descriptor_fields1, pkt_mb1); fm10k_desc_to_pktype_v(descs0, &rx_pkts[pos]); /* C.4 calc avaialbe number of desc */ var = __builtin_popcountll(_mm_cvtsi128_si64(staterr)); nb_pkts_recd += var; if (likely(var != RTE_FM10K_DESCS_PER_LOOP)) break; } /* Update our internal tail pointer */ rxq->next_dd = (uint16_t)(rxq->next_dd + nb_pkts_recd); rxq->next_dd = (uint16_t)(rxq->next_dd & (rxq->nb_desc - 1)); rxq->rxrearm_nb = (uint16_t)(rxq->rxrearm_nb + nb_pkts_recd); return nb_pkts_recd; }
dpdk_virtio_dev_to_vm_tx_burst(struct dpdk_virtio_writer *p, vr_dpdk_virtioq_t *vq, struct rte_mbuf **pkts, uint32_t count) { struct vring_desc *desc; struct rte_mbuf *buff; /* The virtio_hdr is initialised to 0. */ struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0}; uint64_t buff_addr = 0; uint64_t buff_hdr_addr = 0; uint32_t head[VR_DPDK_VIRTIO_TX_BURST_SZ]; uint32_t head_idx, packet_success = 0; uint16_t avail_idx, res_cur_idx; uint16_t res_base_idx, res_end_idx; uint16_t free_entries; uint8_t success = 0; vr_uvh_client_t *vru_cl; if (unlikely(vq->vdv_ready_state == VQ_NOT_READY)) return 0; vru_cl = vr_dpdk_virtio_get_vif_client(vq->vdv_vif_idx); if (unlikely(vru_cl == NULL)) return 0; /* * As many data cores may want access to available buffers, * they need to be reserved. */ do { res_base_idx = vq->vdv_last_used_idx_res; avail_idx = *((volatile uint16_t *)&vq->vdv_avail->idx); free_entries = (avail_idx - res_base_idx); /*check that we have enough buffers*/ if (unlikely(count > free_entries)) count = free_entries; if (unlikely(count == 0)) return 0; res_end_idx = res_base_idx + count; /* vq->vdv_last_used_idx_res is atomically updated. */ /* TODO: Allow to disable cmpset if no concurrency in application. */ success = rte_atomic16_cmpset(&vq->vdv_last_used_idx_res, res_base_idx, res_end_idx); } while (unlikely(success == 0)); res_cur_idx = res_base_idx; RTE_LOG(DEBUG, VROUTER, "%s: Current Index %d| End Index %d\n", __func__, res_cur_idx, res_end_idx); /* Prefetch available ring to retrieve indexes. */ rte_prefetch0(&vq->vdv_avail->ring[res_cur_idx & (vq->vdv_size - 1)]); /* Retrieve all of the head indexes first to avoid caching issues. */ for (head_idx = 0; head_idx < count; head_idx++) head[head_idx] = vq->vdv_avail->ring[(res_cur_idx + head_idx) & (vq->vdv_size - 1)]; /* Prefetch descriptor index. */ rte_prefetch0(&vq->vdv_desc[head[packet_success]]); while (res_cur_idx != res_end_idx) { uint32_t offset = 0, vb_offset = 0; uint32_t pkt_len, len_to_cpy, data_len, total_copied = 0; uint8_t hdr = 0, uncompleted_pkt = 0; /* Get descriptor from available ring */ desc = &vq->vdv_desc[head[packet_success]]; buff = pkts[packet_success]; /* Convert from gpa to vva (guest physical addr -> vhost virtual addr) */ buff_addr = (uintptr_t)vr_dpdk_guest_phys_to_host_virt(vru_cl, desc->addr); /* Prefetch buffer address. */ rte_prefetch0((void *)(uintptr_t)buff_addr); /* Copy virtio_hdr to packet and increment buffer address */ buff_hdr_addr = buff_addr; /* * If the descriptors are chained the header and data are * placed in separate buffers. */ if (likely(desc->flags & VRING_DESC_F_NEXT) && (desc->len == sizeof(struct virtio_net_hdr))) { /* * TODO: verify that desc->next is sane below. */ desc = &vq->vdv_desc[desc->next]; /* Buffer address translation. */ buff_addr = (uintptr_t)vr_dpdk_guest_phys_to_host_virt(vru_cl, desc->addr); } else { vb_offset += sizeof(struct virtio_net_hdr); hdr = 1; } pkt_len = rte_pktmbuf_pkt_len(buff); data_len = rte_pktmbuf_data_len(buff); len_to_cpy = RTE_MIN(data_len, hdr ? desc->len - sizeof(struct virtio_net_hdr) : desc->len); while (total_copied < pkt_len) { /* Copy mbuf data to buffer */ rte_memcpy((void *)(uintptr_t)(buff_addr + vb_offset), rte_pktmbuf_mtod_offset(buff, const void *, offset), len_to_cpy); offset += len_to_cpy; vb_offset += len_to_cpy; total_copied += len_to_cpy; /* The whole packet completes */ if (likely(total_copied == pkt_len)) break; /* The current segment completes */ if (offset == data_len) { buff = buff->next; offset = 0; data_len = rte_pktmbuf_data_len(buff); } /* The current vring descriptor done */ if (vb_offset == desc->len) { if (desc->flags & VRING_DESC_F_NEXT) { desc = &vq->vdv_desc[desc->next]; buff_addr = (uintptr_t)vr_dpdk_guest_phys_to_host_virt(vru_cl, desc->addr); vb_offset = 0; } else { /* Room in vring buffer is not enough */ uncompleted_pkt = 1; break; } } len_to_cpy = RTE_MIN(data_len - offset, desc->len - vb_offset); }; /* Update used ring with desc information */ vq->vdv_used->ring[res_cur_idx & (vq->vdv_size - 1)].id = head[packet_success]; /* Drop the packet if it is uncompleted */ if (unlikely(uncompleted_pkt == 1)) vq->vdv_used->ring[res_cur_idx & (vq->vdv_size - 1)].len = sizeof(struct virtio_net_hdr); else vq->vdv_used->ring[res_cur_idx & (vq->vdv_size - 1)].len = pkt_len + sizeof(struct virtio_net_hdr); res_cur_idx++; packet_success++; /* TODO: in DPDK 2.1 we do not copy the header if (unlikely(uncompleted_pkt == 1)) continue; */ rte_memcpy((void *)(uintptr_t)buff_hdr_addr, (const void *)&virtio_hdr, sizeof(struct virtio_net_hdr)); if (likely(res_cur_idx < res_end_idx)) { /* Prefetch descriptor index. */ rte_prefetch0(&vq->vdv_desc[head[packet_success]]); } } rte_compiler_barrier(); /* Wait until it's our turn to add our buffer to the used ring. */ while (unlikely(vq->vdv_last_used_idx != res_base_idx)) rte_pause(); *(volatile uint16_t *)&vq->vdv_used->idx += count; vq->vdv_last_used_idx = res_end_idx; RTE_LOG(DEBUG, VROUTER, "%s: vif %d vq %p last_used_idx %d used->idx %d\n", __func__, vq->vdv_vif_idx, vq, vq->vdv_last_used_idx, vq->vdv_used->idx); /* flush used->idx update before we read avail->flags. */ rte_mb(); /* Kick the guest if necessary. */ if (unlikely(!(vq->vdv_avail->flags & VRING_AVAIL_F_NO_INTERRUPT))) { p->nb_syscalls++; eventfd_write(vq->vdv_callfd, 1); } return count; }
/** * DPDK callback for RX with scattered packets support. * * @param dpdk_rxq * Generic pointer to RX queue structure. * @param[out] pkts * Array to store received packets. * @param pkts_n * Maximum number of packets in array. * * @return * Number of packets successfully received (<= pkts_n). */ uint16_t mlx5_rx_burst_sp(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) { struct rxq *rxq = (struct rxq *)dpdk_rxq; struct rxq_elt_sp (*elts)[rxq->elts_n] = rxq->elts.sp; const unsigned int elts_n = rxq->elts_n; unsigned int elts_head = rxq->elts_head; unsigned int i; unsigned int pkts_ret = 0; int ret; if (unlikely(!rxq->sp)) return mlx5_rx_burst(dpdk_rxq, pkts, pkts_n); if (unlikely(elts == NULL)) /* See RTE_DEV_CMD_SET_MTU. */ return 0; for (i = 0; (i != pkts_n); ++i) { struct rxq_elt_sp *elt = &(*elts)[elts_head]; unsigned int len; unsigned int pkt_buf_len; struct rte_mbuf *pkt_buf = NULL; /* Buffer returned in pkts. */ struct rte_mbuf **pkt_buf_next = &pkt_buf; unsigned int seg_headroom = RTE_PKTMBUF_HEADROOM; unsigned int j = 0; uint32_t flags; uint16_t vlan_tci; /* Sanity checks. */ assert(elts_head < rxq->elts_n); assert(rxq->elts_head < rxq->elts_n); ret = rxq->poll(rxq->cq, NULL, NULL, &flags, &vlan_tci); if (unlikely(ret < 0)) { struct ibv_wc wc; int wcs_n; DEBUG("rxq=%p, poll_length() failed (ret=%d)", (void *)rxq, ret); /* ibv_poll_cq() must be used in case of failure. */ wcs_n = ibv_poll_cq(rxq->cq, 1, &wc); if (unlikely(wcs_n == 0)) break; if (unlikely(wcs_n < 0)) { DEBUG("rxq=%p, ibv_poll_cq() failed (wcs_n=%d)", (void *)rxq, wcs_n); break; } assert(wcs_n == 1); if (unlikely(wc.status != IBV_WC_SUCCESS)) { /* Whatever, just repost the offending WR. */ DEBUG("rxq=%p, wr_id=%" PRIu64 ": bad work" " completion status (%d): %s", (void *)rxq, wc.wr_id, wc.status, ibv_wc_status_str(wc.status)); #ifdef MLX5_PMD_SOFT_COUNTERS /* Increment dropped packets counter. */ ++rxq->stats.idropped; #endif goto repost; } ret = wc.byte_len; } if (ret == 0) break; assert(ret >= (rxq->crc_present << 2)); len = ret - (rxq->crc_present << 2); pkt_buf_len = len; /* * Replace spent segments with new ones, concatenate and * return them as pkt_buf. */ while (1) { struct ibv_sge *sge = &elt->sges[j]; struct rte_mbuf *seg = elt->bufs[j]; struct rte_mbuf *rep; unsigned int seg_tailroom; assert(seg != NULL); /* * Fetch initial bytes of packet descriptor into a * cacheline while allocating rep. */ rte_prefetch0(seg); rep = __rte_mbuf_raw_alloc(rxq->mp); if (unlikely(rep == NULL)) { /* * Unable to allocate a replacement mbuf, * repost WR. */ DEBUG("rxq=%p: can't allocate a new mbuf", (void *)rxq); if (pkt_buf != NULL) { *pkt_buf_next = NULL; rte_pktmbuf_free(pkt_buf); } /* Increment out of memory counters. */ ++rxq->stats.rx_nombuf; ++rxq->priv->dev->data->rx_mbuf_alloc_failed; goto repost; } #ifndef NDEBUG /* Poison user-modifiable fields in rep. */ NEXT(rep) = (void *)((uintptr_t)-1); SET_DATA_OFF(rep, 0xdead); DATA_LEN(rep) = 0xd00d; PKT_LEN(rep) = 0xdeadd00d; NB_SEGS(rep) = 0x2a; PORT(rep) = 0x2a; rep->ol_flags = -1; #endif assert(rep->buf_len == seg->buf_len); assert(rep->buf_len == rxq->mb_len); /* Reconfigure sge to use rep instead of seg. */ assert(sge->lkey == rxq->mr->lkey); sge->addr = ((uintptr_t)rep->buf_addr + seg_headroom); elt->bufs[j] = rep; ++j; /* Update pkt_buf if it's the first segment, or link * seg to the previous one and update pkt_buf_next. */ *pkt_buf_next = seg; pkt_buf_next = &NEXT(seg); /* Update seg information. */ seg_tailroom = (seg->buf_len - seg_headroom); assert(sge->length == seg_tailroom); SET_DATA_OFF(seg, seg_headroom); if (likely(len <= seg_tailroom)) { /* Last segment. */ DATA_LEN(seg) = len; PKT_LEN(seg) = len; /* Sanity check. */ assert(rte_pktmbuf_headroom(seg) == seg_headroom); assert(rte_pktmbuf_tailroom(seg) == (seg_tailroom - len)); break; } DATA_LEN(seg) = seg_tailroom; PKT_LEN(seg) = seg_tailroom; /* Sanity check. */ assert(rte_pktmbuf_headroom(seg) == seg_headroom); assert(rte_pktmbuf_tailroom(seg) == 0); /* Fix len and clear headroom for next segments. */ len -= seg_tailroom; seg_headroom = 0; } /* Update head and tail segments. */ *pkt_buf_next = NULL; assert(pkt_buf != NULL); assert(j != 0); NB_SEGS(pkt_buf) = j; PORT(pkt_buf) = rxq->port_id; PKT_LEN(pkt_buf) = pkt_buf_len; if (rxq->csum | rxq->csum_l2tun | rxq->vlan_strip) { pkt_buf->packet_type = rxq_cq_to_pkt_type(flags); pkt_buf->ol_flags = rxq_cq_to_ol_flags(rxq, flags); #ifdef HAVE_EXP_DEVICE_ATTR_VLAN_OFFLOADS if (flags & IBV_EXP_CQ_RX_CVLAN_STRIPPED_V1) { pkt_buf->ol_flags |= PKT_RX_VLAN_PKT; pkt_buf->vlan_tci = vlan_tci; } #endif /* HAVE_EXP_DEVICE_ATTR_VLAN_OFFLOADS */ } /* Return packet. */ *(pkts++) = pkt_buf; ++pkts_ret; #ifdef MLX5_PMD_SOFT_COUNTERS /* Increment bytes counter. */ rxq->stats.ibytes += pkt_buf_len; #endif repost: ret = rxq->recv(rxq->wq, elt->sges, RTE_DIM(elt->sges)); if (unlikely(ret)) { /* Inability to repost WRs is fatal. */ DEBUG("%p: recv_sg_list(): failed (ret=%d)", (void *)rxq->priv, ret); abort(); } if (++elts_head >= elts_n) elts_head = 0; continue; } if (unlikely(i == 0)) return 0; rxq->elts_head = elts_head; #ifdef MLX5_PMD_SOFT_COUNTERS /* Increment packets counter. */ rxq->stats.ipackets += pkts_ret; #endif return pkts_ret; }
virtio_dev_tx(struct virtio_net* dev, struct rte_mempool *mbuf_pool) { struct rte_mbuf m; struct vhost_virtqueue *vq; struct vring_desc *desc; uint64_t buff_addr = 0; uint32_t head[MAX_PKT_BURST]; uint32_t used_idx; uint32_t i; uint16_t free_entries, packet_success = 0; uint16_t avail_idx; vq = dev->virtqueue_tx; avail_idx = *((volatile uint16_t *)&vq->avail->idx); /* If there are no available buffers then return. */ if (vq->last_used_idx == avail_idx) return; LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_tx()\n", dev->device_fh); /* Prefetch available ring to retrieve head indexes. */ rte_prefetch0(&vq->avail->ring[vq->last_used_idx & (vq->size - 1)]); /*get the number of free entries in the ring*/ free_entries = avail_idx - vq->last_used_idx; free_entries = unlikely(free_entries < MAX_PKT_BURST) ? free_entries : MAX_PKT_BURST; LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Buffers available %d\n", dev->device_fh, free_entries); /* Retrieve all of the head indexes first to avoid caching issues. */ for (i = 0; i < free_entries; i++) head[i] = vq->avail->ring[(vq->last_used_idx + i) & (vq->size - 1)]; /* Prefetch descriptor index. */ rte_prefetch0(&vq->desc[head[packet_success]]); while (packet_success < free_entries) { desc = &vq->desc[head[packet_success]]; /* Prefetch descriptor address. */ rte_prefetch0(desc); if (packet_success < (free_entries - 1)) { /* Prefetch descriptor index. */ rte_prefetch0(&vq->desc[head[packet_success+1]]); } /* Update used index buffer information. */ used_idx = vq->last_used_idx & (vq->size - 1); vq->used->ring[used_idx].id = head[packet_success]; vq->used->ring[used_idx].len = 0; /* Discard first buffer as it is the virtio header */ desc = &vq->desc[desc->next]; /* Buffer address translation. */ buff_addr = gpa_to_vva(dev, desc->addr); /* Prefetch buffer address. */ rte_prefetch0((void*)(uintptr_t)buff_addr); /* Setup dummy mbuf. This is copied to a real mbuf if transmitted out the physical port. */ m.pkt.data_len = desc->len; m.pkt.data = (void*)(uintptr_t)buff_addr; m.pkt.nb_segs = 1; virtio_tx_route(dev, &m, mbuf_pool, 0); vq->last_used_idx++; packet_success++; } rte_compiler_barrier(); vq->used->idx += packet_success; /* Kick guest if required. */ }
/* * Notice: * - nb_pkts < RTE_I40E_DESCS_PER_LOOP, just return no packet * - nb_pkts > RTE_I40E_VPMD_RX_BURST, only scan RTE_I40E_VPMD_RX_BURST * numbers of DD bits */ static inline uint16_t _recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts, uint8_t *split_packet) { volatile union i40e_rx_desc *rxdp; struct i40e_rx_entry *sw_ring; uint16_t nb_pkts_recd; int pos; uint64_t var; __m128i shuf_msk; uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl; __m128i crc_adjust = _mm_set_epi16( 0, 0, 0, /* ignore non-length fields */ -rxq->crc_len, /* sub crc on data_len */ 0, /* ignore high-16bits of pkt_len */ -rxq->crc_len, /* sub crc on pkt_len */ 0, 0 /* ignore pkt_type field */ ); /* * compile-time check the above crc_adjust layout is correct. * NOTE: the first field (lowest address) is given last in set_epi16 * call above. */ RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, pkt_len) != offsetof(struct rte_mbuf, rx_descriptor_fields1) + 4); RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_len) != offsetof(struct rte_mbuf, rx_descriptor_fields1) + 8); __m128i dd_check, eop_check; /* nb_pkts shall be less equal than RTE_I40E_MAX_RX_BURST */ nb_pkts = RTE_MIN(nb_pkts, RTE_I40E_MAX_RX_BURST); /* nb_pkts has to be floor-aligned to RTE_I40E_DESCS_PER_LOOP */ nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, RTE_I40E_DESCS_PER_LOOP); /* Just the act of getting into the function from the application is * going to cost about 7 cycles */ rxdp = rxq->rx_ring + rxq->rx_tail; rte_prefetch0(rxdp); /* See if we need to rearm the RX queue - gives the prefetch a bit * of time to act */ if (rxq->rxrearm_nb > RTE_I40E_RXQ_REARM_THRESH) i40e_rxq_rearm(rxq); /* Before we start moving massive data around, check to see if * there is actually a packet available */ if (!(rxdp->wb.qword1.status_error_len & rte_cpu_to_le_32(1 << I40E_RX_DESC_STATUS_DD_SHIFT))) return 0; /* 4 packets DD mask */ dd_check = _mm_set_epi64x(0x0000000100000001LL, 0x0000000100000001LL); /* 4 packets EOP mask */ eop_check = _mm_set_epi64x(0x0000000200000002LL, 0x0000000200000002LL); /* mask to shuffle from desc. to mbuf */ shuf_msk = _mm_set_epi8( 7, 6, 5, 4, /* octet 4~7, 32bits rss */ 3, 2, /* octet 2~3, low 16 bits vlan_macip */ 15, 14, /* octet 15~14, 16 bits data_len */ 0xFF, 0xFF, /* skip high 16 bits pkt_len, zero out */ 15, 14, /* octet 15~14, low 16 bits pkt_len */ 0xFF, 0xFF, /* pkt_type set as unknown */ 0xFF, 0xFF /*pkt_type set as unknown */ ); /* * Compile-time verify the shuffle mask * NOTE: some field positions already verified above, but duplicated * here for completeness in case of future modifications. */ RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, pkt_len) != offsetof(struct rte_mbuf, rx_descriptor_fields1) + 4); RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_len) != offsetof(struct rte_mbuf, rx_descriptor_fields1) + 8); RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, vlan_tci) != offsetof(struct rte_mbuf, rx_descriptor_fields1) + 10); RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, hash) != offsetof(struct rte_mbuf, rx_descriptor_fields1) + 12); /* Cache is empty -> need to scan the buffer rings, but first move * the next 'n' mbufs into the cache */ sw_ring = &rxq->sw_ring[rxq->rx_tail]; /* A. load 4 packet in one loop * [A*. mask out 4 unused dirty field in desc] * B. copy 4 mbuf point from swring to rx_pkts * C. calc the number of DD bits among the 4 packets * [C*. extract the end-of-packet bit, if requested] * D. fill info. from desc to mbuf */ for (pos = 0, nb_pkts_recd = 0; pos < nb_pkts; pos += RTE_I40E_DESCS_PER_LOOP, rxdp += RTE_I40E_DESCS_PER_LOOP) { __m128i descs[RTE_I40E_DESCS_PER_LOOP]; __m128i pkt_mb1, pkt_mb2, pkt_mb3, pkt_mb4; __m128i zero, staterr, sterr_tmp1, sterr_tmp2; /* 2 64 bit or 4 32 bit mbuf pointers in one XMM reg. */ __m128i mbp1; #if defined(RTE_ARCH_X86_64) __m128i mbp2; #endif /* B.1 load 2 (64 bit) or 4 (32 bit) mbuf points */ mbp1 = _mm_loadu_si128((__m128i *)&sw_ring[pos]); /* Read desc statuses backwards to avoid race condition */ /* A.1 load 4 pkts desc */ descs[3] = _mm_loadu_si128((__m128i *)(rxdp + 3)); rte_compiler_barrier(); /* B.2 copy 2 64 bit or 4 32 bit mbuf point into rx_pkts */ _mm_storeu_si128((__m128i *)&rx_pkts[pos], mbp1); #if defined(RTE_ARCH_X86_64) /* B.1 load 2 64 bit mbuf points */ mbp2 = _mm_loadu_si128((__m128i *)&sw_ring[pos+2]); #endif descs[2] = _mm_loadu_si128((__m128i *)(rxdp + 2)); rte_compiler_barrier(); /* B.1 load 2 mbuf point */ descs[1] = _mm_loadu_si128((__m128i *)(rxdp + 1)); rte_compiler_barrier(); descs[0] = _mm_loadu_si128((__m128i *)(rxdp)); #if defined(RTE_ARCH_X86_64) /* B.2 copy 2 mbuf point into rx_pkts */ _mm_storeu_si128((__m128i *)&rx_pkts[pos+2], mbp2); #endif if (split_packet) { rte_mbuf_prefetch_part2(rx_pkts[pos]); rte_mbuf_prefetch_part2(rx_pkts[pos + 1]); rte_mbuf_prefetch_part2(rx_pkts[pos + 2]); rte_mbuf_prefetch_part2(rx_pkts[pos + 3]); } /* avoid compiler reorder optimization */ rte_compiler_barrier(); /* pkt 3,4 shift the pktlen field to be 16-bit aligned*/ const __m128i len3 = _mm_slli_epi32(descs[3], PKTLEN_SHIFT); const __m128i len2 = _mm_slli_epi32(descs[2], PKTLEN_SHIFT); /* merge the now-aligned packet length fields back in */ descs[3] = _mm_blend_epi16(descs[3], len3, 0x80); descs[2] = _mm_blend_epi16(descs[2], len2, 0x80); /* D.1 pkt 3,4 convert format from desc to pktmbuf */ pkt_mb4 = _mm_shuffle_epi8(descs[3], shuf_msk); pkt_mb3 = _mm_shuffle_epi8(descs[2], shuf_msk); /* C.1 4=>2 filter staterr info only */ sterr_tmp2 = _mm_unpackhi_epi32(descs[3], descs[2]); /* C.1 4=>2 filter staterr info only */ sterr_tmp1 = _mm_unpackhi_epi32(descs[1], descs[0]); desc_to_olflags_v(rxq, descs, &rx_pkts[pos]); /* D.2 pkt 3,4 set in_port/nb_seg and remove crc */ pkt_mb4 = _mm_add_epi16(pkt_mb4, crc_adjust); pkt_mb3 = _mm_add_epi16(pkt_mb3, crc_adjust); /* pkt 1,2 shift the pktlen field to be 16-bit aligned*/ const __m128i len1 = _mm_slli_epi32(descs[1], PKTLEN_SHIFT); const __m128i len0 = _mm_slli_epi32(descs[0], PKTLEN_SHIFT); /* merge the now-aligned packet length fields back in */ descs[1] = _mm_blend_epi16(descs[1], len1, 0x80); descs[0] = _mm_blend_epi16(descs[0], len0, 0x80); /* D.1 pkt 1,2 convert format from desc to pktmbuf */ pkt_mb2 = _mm_shuffle_epi8(descs[1], shuf_msk); pkt_mb1 = _mm_shuffle_epi8(descs[0], shuf_msk); /* C.2 get 4 pkts staterr value */ zero = _mm_xor_si128(dd_check, dd_check); staterr = _mm_unpacklo_epi32(sterr_tmp1, sterr_tmp2); /* D.3 copy final 3,4 data to rx_pkts */ _mm_storeu_si128((void *)&rx_pkts[pos+3]->rx_descriptor_fields1, pkt_mb4); _mm_storeu_si128((void *)&rx_pkts[pos+2]->rx_descriptor_fields1, pkt_mb3); /* D.2 pkt 1,2 set in_port/nb_seg and remove crc */ pkt_mb2 = _mm_add_epi16(pkt_mb2, crc_adjust); pkt_mb1 = _mm_add_epi16(pkt_mb1, crc_adjust); /* C* extract and record EOP bit */ if (split_packet) { __m128i eop_shuf_mask = _mm_set_epi8( 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x04, 0x0C, 0x00, 0x08 ); /* and with mask to extract bits, flipping 1-0 */ __m128i eop_bits = _mm_andnot_si128(staterr, eop_check); /* the staterr values are not in order, as the count * count of dd bits doesn't care. However, for end of * packet tracking, we do care, so shuffle. This also * compresses the 32-bit values to 8-bit */ eop_bits = _mm_shuffle_epi8(eop_bits, eop_shuf_mask); /* store the resulting 32-bit value */ *(int *)split_packet = _mm_cvtsi128_si32(eop_bits); split_packet += RTE_I40E_DESCS_PER_LOOP; } /* C.3 calc available number of desc */ staterr = _mm_and_si128(staterr, dd_check); staterr = _mm_packs_epi32(staterr, zero); /* D.3 copy final 1,2 data to rx_pkts */ _mm_storeu_si128((void *)&rx_pkts[pos+1]->rx_descriptor_fields1, pkt_mb2); _mm_storeu_si128((void *)&rx_pkts[pos]->rx_descriptor_fields1, pkt_mb1); desc_to_ptype_v(descs, &rx_pkts[pos], ptype_tbl); /* C.4 calc avaialbe number of desc */ var = __builtin_popcountll(_mm_cvtsi128_si64(staterr)); nb_pkts_recd += var; if (likely(var != RTE_I40E_DESCS_PER_LOOP)) break; } /* Update our internal tail pointer */ rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_pkts_recd); rxq->rx_tail = (uint16_t)(rxq->rx_tail & (rxq->nb_rx_desc - 1)); rxq->rxrearm_nb = (uint16_t)(rxq->rxrearm_nb + nb_pkts_recd); return nb_pkts_recd; }