i40e_rxq_vec_setup(struct i40e_rx_queue *rxq) { uintptr_t p; struct rte_mbuf mb_def = { .buf_addr = 0 }; /* zeroed mbuf */ mb_def.nb_segs = 1; mb_def.data_off = RTE_PKTMBUF_HEADROOM; mb_def.port = rxq->port_id; rte_mbuf_refcnt_set(&mb_def, 1); /* prevent compiler reordering: rearm_data covers previous fields */ rte_compiler_barrier(); p = (uintptr_t)&mb_def.rearm_data; rxq->mbuf_initializer = *(uint64_t *)p; return 0; } int __attribute__((cold)) i40e_txq_vec_setup(struct i40e_tx_queue __rte_unused *txq) { return 0; } int __attribute__((cold)) i40e_rx_vec_dev_conf_condition_check(struct rte_eth_dev *dev) { #ifndef RTE_LIBRTE_IEEE1588 struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode; struct rte_fdir_conf *fconf = &dev->data->dev_conf.fdir_conf; /* need SSE4.1 support */ if (!rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE4_1)) return -1; #ifndef RTE_LIBRTE_I40E_RX_OLFLAGS_ENABLE /* whithout rx ol_flags, no VP flag report */ if (rxmode->hw_vlan_strip != 0 || rxmode->hw_vlan_extend != 0) return -1; #endif /* no fdir support */ if (fconf->mode != RTE_FDIR_MODE_NONE) return -1; /* - no csum error report support * - no header split support */ if (rxmode->hw_ip_checksum == 1 || rxmode->header_split == 1) return -1; return 0; #else RTE_SET_USED(dev); return -1; #endif }
static inline uint32_t bnxt_tx_avail(struct bnxt_tx_ring_info *txr) { /* Tell compiler to fetch tx indices from memory. */ rte_compiler_barrier(); return txr->tx_ring_struct->ring_size - ((txr->tx_prod - txr->tx_cons) & txr->tx_ring_struct->ring_mask) - 1; }
static uint16_t eth_xenvirt_tx(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) { struct virtqueue *txvq = tx_queue; struct rte_mbuf *txm; uint16_t nb_used, nb_tx, num, i; int error; uint32_t len[VIRTIO_MBUF_BURST_SZ]; struct rte_mbuf *snd_pkts[VIRTIO_MBUF_BURST_SZ]; struct pmd_internals *pi = txvq->internals; nb_tx = 0; if (unlikely(nb_pkts == 0)) return 0; PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts); nb_used = VIRTQUEUE_NUSED(txvq); rte_compiler_barrier(); /* rmb */ num = (uint16_t)(likely(nb_used <= VIRTIO_MBUF_BURST_SZ) ? nb_used : VIRTIO_MBUF_BURST_SZ); num = virtqueue_dequeue_burst(txvq, snd_pkts, len, num); for (i = 0; i < num ; i ++) { /* mergable not supported, one segment only */ rte_pktmbuf_free_seg(snd_pkts[i]); } while (nb_tx < nb_pkts) { if (likely(!virtqueue_full(txvq))) { /* TODO drop tx_pkts if it contains multiple segments */ txm = tx_pkts[nb_tx]; error = virtqueue_enqueue_xmit(txvq, txm); if (unlikely(error)) { if (error == ENOSPC) PMD_TX_LOG(ERR, "virtqueue_enqueue Free count = 0\n"); else if (error == EMSGSIZE) PMD_TX_LOG(ERR, "virtqueue_enqueue Free count < 1\n"); else PMD_TX_LOG(ERR, "virtqueue_enqueue error: %d\n", error); break; } nb_tx++; } else { PMD_TX_LOG(ERR, "No free tx descriptors to transmit\n"); /* virtqueue_notify not needed in our para-virt solution */ break; } } pi->eth_stats.opackets += nb_tx; return nb_tx; }
/* * This function registers mac along with a * vlan tag to a VMDQ. */ static int link_vmdq(struct virtio_net *dev) { int ret; struct virtio_net_data_ll *dev_ll; dev_ll = ll_root_used; while (dev_ll != NULL) { if ((dev != dev_ll->dev) && ether_addr_cmp(&dev->mac_address, &dev_ll->dev->mac_address)) { RTE_LOG(INFO, VHOST_DATA, "(%"PRIu64") WARNING: This device is using an existing MAC address and has not been registered.\n", dev->device_fh); return -1; } dev_ll = dev_ll->next; } /* vlan_tag currently uses the device_id. */ dev->vlan_tag = vlan_tags[dev->device_fh]; dev->vmdq_rx_q = dev->device_fh * (num_queues/num_devices); /* Print out VMDQ registration info. */ RTE_LOG(INFO, VHOST_DATA, "(%"PRIu64") MAC_ADDRESS %02x:%02x:%02x:%02x:%02x:%02x and VLAN_TAG %d registered\n", dev->device_fh, dev->mac_address.addr_bytes[0], dev->mac_address.addr_bytes[1], dev->mac_address.addr_bytes[2], dev->mac_address.addr_bytes[3], dev->mac_address.addr_bytes[4], dev->mac_address.addr_bytes[5], dev->vlan_tag); /* Register the MAC address. */ ret = rte_eth_dev_mac_addr_add(ports[0], &dev->mac_address, (uint32_t)dev->device_fh); if (ret) { RTE_LOG(ERR, VHOST_DATA, "(%"PRIu64") Failed to add device MAC address to VMDQ\n", dev->device_fh); return -1; } /* Enable stripping of the vlan tag as we handle routing. */ rte_eth_dev_set_vlan_strip_on_queue(ports[0], dev->vmdq_rx_q, 1); rte_compiler_barrier(); /* Set device as ready for RX. */ dev->ready = DEVICE_READY; return 0; }
static uint16_t eth_xenvirt_rx(void *q, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) { struct virtqueue *rxvq = q; struct rte_mbuf *rxm, *new_mbuf; uint16_t nb_used, num; uint32_t len[VIRTIO_MBUF_BURST_SZ]; uint32_t i; struct pmd_internals *pi = rxvq->internals; nb_used = VIRTQUEUE_NUSED(rxvq); rte_compiler_barrier(); /* rmb */ num = (uint16_t)(likely(nb_used <= nb_pkts) ? nb_used : nb_pkts); num = (uint16_t)(likely(num <= VIRTIO_MBUF_BURST_SZ) ? num : VIRTIO_MBUF_BURST_SZ); if (unlikely(num == 0)) return 0; num = virtqueue_dequeue_burst(rxvq, rx_pkts, len, num); PMD_RX_LOG(DEBUG, "used:%d dequeue:%d\n", nb_used, num); for (i = 0; i < num ; i ++) { rxm = rx_pkts[i]; PMD_RX_LOG(DEBUG, "packet len:%d\n", len[i]); rxm->next = NULL; rxm->data_off = RTE_PKTMBUF_HEADROOM; rxm->data_len = (uint16_t)(len[i] - sizeof(struct virtio_net_hdr)); rxm->nb_segs = 1; rxm->port = pi->port_id; rxm->pkt_len = (uint32_t)(len[i] - sizeof(struct virtio_net_hdr)); } /* allocate new mbuf for the used descriptor */ while (likely(!virtqueue_full(rxvq))) { new_mbuf = rte_rxmbuf_alloc(rxvq->mpool); if (unlikely(new_mbuf == NULL)) { break; } if (unlikely(virtqueue_enqueue_recv_refill(rxvq, new_mbuf))) { rte_pktmbuf_free_seg(new_mbuf); break; } } pi->eth_stats.ipackets += num; return num; }
static inline uint32_t sw_schedule_parallel_to_cq(struct sw_evdev *sw, struct sw_qid * const qid, uint32_t iq_num, unsigned int count, int keep_order) { uint32_t i; uint32_t cq_idx = qid->cq_next_tx; /* This is the QID ID. The QID ID is static, hence it can be * used to identify the stage of processing in history lists etc */ uint32_t qid_id = qid->id; if (count > MAX_PER_IQ_DEQUEUE) count = MAX_PER_IQ_DEQUEUE; if (keep_order) /* only schedule as many as we have reorder buffer entries */ count = RTE_MIN(count, rte_ring_count(qid->reorder_buffer_freelist)); for (i = 0; i < count; i++) { const struct rte_event *qe = iq_ring_peek(qid->iq[iq_num]); uint32_t cq_check_count = 0; uint32_t cq; /* * for parallel, just send to next available CQ in round-robin * fashion. So scan for an available CQ. If all CQs are full * just return and move on to next QID */ do { if (++cq_check_count > qid->cq_num_mapped_cqs) goto exit; cq = qid->cq_map[cq_idx]; if (++cq_idx == qid->cq_num_mapped_cqs) cq_idx = 0; } while (rte_event_ring_free_count( sw->ports[cq].cq_worker_ring) == 0 || sw->ports[cq].inflights == SW_PORT_HIST_LIST); struct sw_port *p = &sw->ports[cq]; if (sw->cq_ring_space[cq] == 0 || p->inflights == SW_PORT_HIST_LIST) break; sw->cq_ring_space[cq]--; qid->stats.tx_pkts++; const int head = (p->hist_head & (SW_PORT_HIST_LIST-1)); p->hist_list[head].fid = SW_HASH_FLOWID(qe->flow_id); p->hist_list[head].qid = qid_id; if (keep_order) rte_ring_sc_dequeue(qid->reorder_buffer_freelist, (void *)&p->hist_list[head].rob_entry); sw->ports[cq].cq_buf[sw->ports[cq].cq_buf_count++] = *qe; iq_ring_pop(qid->iq[iq_num]); rte_compiler_barrier(); p->inflights++; p->stats.tx_pkts++; p->hist_head++; } exit: qid->cq_next_tx = cq_idx; return i; }
uint16_t vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) { uint16_t nb_tx; vmxnet3_tx_queue_t *txq = tx_queue; struct vmxnet3_hw *hw = txq->hw; if (unlikely(txq->stopped)) { PMD_TX_LOG(DEBUG, "Tx queue is stopped."); return 0; } /* Free up the comp_descriptors aggressively */ vmxnet3_tq_tx_complete(txq); nb_tx = 0; while (nb_tx < nb_pkts) { Vmxnet3_GenericDesc *gdesc; vmxnet3_buf_info_t *tbi; uint32_t first2fill, avail, dw2; struct rte_mbuf *txm = tx_pkts[nb_tx]; struct rte_mbuf *m_seg = txm; /* Is this packet execessively fragmented, then drop */ if (unlikely(txm->nb_segs > VMXNET3_MAX_TXD_PER_PKT)) { ++txq->stats.drop_too_many_segs; ++txq->stats.drop_total; rte_pktmbuf_free(txm); ++nb_tx; continue; } /* Is command ring full? */ avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring); if (txm->nb_segs > avail) { ++txq->stats.tx_ring_full; break; } /* use the previous gen bit for the SOP desc */ dw2 = (txq->cmd_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT; first2fill = txq->cmd_ring.next2fill; do { /* Remember the transmit buffer for cleanup */ tbi = txq->cmd_ring.buf_info + txq->cmd_ring.next2fill; tbi->m = m_seg; /* NB: the following assumes that VMXNET3 maximum transmit buffer size (16K) is greater than maximum sizeof mbuf segment size. */ gdesc = txq->cmd_ring.base + txq->cmd_ring.next2fill; gdesc->txd.addr = rte_mbuf_data_dma_addr(m_seg); gdesc->dword[2] = dw2 | m_seg->data_len; gdesc->dword[3] = 0; /* move to the next2fill descriptor */ vmxnet3_cmd_ring_adv_next2fill(&txq->cmd_ring); /* use the right gen for non-SOP desc */ dw2 = txq->cmd_ring.gen << VMXNET3_TXD_GEN_SHIFT; } while ((m_seg = m_seg->next) != NULL); /* Update the EOP descriptor */ gdesc->dword[3] |= VMXNET3_TXD_EOP | VMXNET3_TXD_CQ; /* Add VLAN tag if present */ gdesc = txq->cmd_ring.base + first2fill; if (txm->ol_flags & PKT_TX_VLAN_PKT) { gdesc->txd.ti = 1; gdesc->txd.tci = txm->vlan_tci; } /* TODO: Add transmit checksum offload here */ /* flip the GEN bit on the SOP */ rte_compiler_barrier(); gdesc->dword[2] ^= VMXNET3_TXD_GEN; txq->shared->ctrl.txNumDeferred++; nb_tx++; } PMD_TX_LOG(DEBUG, "vmxnet3 txThreshold: %u", txq->shared->ctrl.txThreshold); if (txq->shared->ctrl.txNumDeferred >= txq->shared->ctrl.txThreshold) { txq->shared->ctrl.txNumDeferred = 0; /* Notify vSwitch that packets are available. */ VMXNET3_WRITE_BAR0_REG(hw, (VMXNET3_REG_TXPROD + txq->queue_id * VMXNET3_REG_ALIGN), txq->cmd_ring.next2fill); } return nb_tx; }
virtio_dev_tx(struct virtio_net* dev, struct rte_mempool *mbuf_pool) { struct rte_mbuf m; struct vhost_virtqueue *vq; struct vring_desc *desc; uint64_t buff_addr = 0; uint32_t head[MAX_PKT_BURST]; uint32_t used_idx; uint32_t i; uint16_t free_entries, packet_success = 0; uint16_t avail_idx; vq = dev->virtqueue_tx; avail_idx = *((volatile uint16_t *)&vq->avail->idx); /* If there are no available buffers then return. */ if (vq->last_used_idx == avail_idx) return; LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_tx()\n", dev->device_fh); /* Prefetch available ring to retrieve head indexes. */ rte_prefetch0(&vq->avail->ring[vq->last_used_idx & (vq->size - 1)]); /*get the number of free entries in the ring*/ free_entries = avail_idx - vq->last_used_idx; free_entries = unlikely(free_entries < MAX_PKT_BURST) ? free_entries : MAX_PKT_BURST; LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Buffers available %d\n", dev->device_fh, free_entries); /* Retrieve all of the head indexes first to avoid caching issues. */ for (i = 0; i < free_entries; i++) head[i] = vq->avail->ring[(vq->last_used_idx + i) & (vq->size - 1)]; /* Prefetch descriptor index. */ rte_prefetch0(&vq->desc[head[packet_success]]); while (packet_success < free_entries) { desc = &vq->desc[head[packet_success]]; /* Prefetch descriptor address. */ rte_prefetch0(desc); if (packet_success < (free_entries - 1)) { /* Prefetch descriptor index. */ rte_prefetch0(&vq->desc[head[packet_success+1]]); } /* Update used index buffer information. */ used_idx = vq->last_used_idx & (vq->size - 1); vq->used->ring[used_idx].id = head[packet_success]; vq->used->ring[used_idx].len = 0; /* Discard first buffer as it is the virtio header */ desc = &vq->desc[desc->next]; /* Buffer address translation. */ buff_addr = gpa_to_vva(dev, desc->addr); /* Prefetch buffer address. */ rte_prefetch0((void*)(uintptr_t)buff_addr); /* Setup dummy mbuf. This is copied to a real mbuf if transmitted out the physical port. */ m.pkt.data_len = desc->len; m.pkt.data = (void*)(uintptr_t)buff_addr; m.pkt.nb_segs = 1; virtio_tx_route(dev, &m, mbuf_pool, 0); vq->last_used_idx++; packet_success++; } rte_compiler_barrier(); vq->used->idx += packet_success; /* Kick guest if required. */ }
virtio_dev_rx(struct virtio_net *dev, struct rte_mbuf **pkts, uint32_t count) { struct vhost_virtqueue *vq; struct vring_desc *desc; struct rte_mbuf *buff; /* The virtio_hdr is initialised to 0. */ struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0,0,0,0,0,0},0}; uint64_t buff_addr = 0; uint64_t buff_hdr_addr = 0; uint32_t head[MAX_PKT_BURST], packet_len = 0; uint32_t head_idx, packet_success = 0; uint16_t avail_idx, res_cur_idx; uint16_t res_base_idx, res_end_idx; uint16_t free_entries; uint8_t success = 0; LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_rx()\n", dev->device_fh); vq = dev->virtqueue_rx; count = (count > MAX_PKT_BURST) ? MAX_PKT_BURST : count; /* As many data cores may want access to available buffers, they need to be reserved. */ do { res_base_idx = vq->last_used_idx_res; avail_idx = *((volatile uint16_t *)&vq->avail->idx); free_entries = (avail_idx - res_base_idx); /*check that we have enough buffers*/ if (unlikely(count > free_entries)) count = free_entries; if (count == 0) return 0; res_end_idx = res_base_idx + count; /* vq->last_used_idx_res is atomically updated. */ success = rte_atomic16_cmpset(&vq->last_used_idx_res, res_base_idx, res_end_idx); } while (unlikely(success == 0)); res_cur_idx = res_base_idx; LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Current Index %d| End Index %d\n", dev->device_fh, res_cur_idx, res_end_idx); /* Prefetch available ring to retrieve indexes. */ rte_prefetch0(&vq->avail->ring[res_cur_idx & (vq->size - 1)]); /* Retrieve all of the head indexes first to avoid caching issues. */ for (head_idx = 0; head_idx < count; head_idx++) head[head_idx] = vq->avail->ring[(res_cur_idx + head_idx) & (vq->size - 1)]; /*Prefetch descriptor index. */ rte_prefetch0(&vq->desc[head[packet_success]]); while (res_cur_idx != res_end_idx) { /* Get descriptor from available ring */ desc = &vq->desc[head[packet_success]]; /* Prefetch descriptor address. */ rte_prefetch0(desc); buff = pkts[packet_success]; /* Convert from gpa to vva (guest physical addr -> vhost virtual addr) */ buff_addr = gpa_to_vva(dev, desc->addr); /* Prefetch buffer address. */ rte_prefetch0((void*)(uintptr_t)buff_addr); { /* Copy virtio_hdr to packet and increment buffer address */ buff_hdr_addr = buff_addr; packet_len = rte_pktmbuf_data_len(buff) + vq->vhost_hlen; /* * If the descriptors are chained the header and data are placed in * separate buffers. */ if (desc->flags & VRING_DESC_F_NEXT) { desc->len = vq->vhost_hlen; desc = &vq->desc[desc->next]; /* Buffer address translation. */ buff_addr = gpa_to_vva(dev, desc->addr); desc->len = rte_pktmbuf_data_len(buff); } else { buff_addr += vq->vhost_hlen; desc->len = packet_len; } } /* Update used ring with desc information */ vq->used->ring[res_cur_idx & (vq->size - 1)].id = head[packet_success]; vq->used->ring[res_cur_idx & (vq->size - 1)].len = packet_len; /* Copy mbuf data to buffer */ rte_memcpy((void *)(uintptr_t)buff_addr, (const void*)buff->pkt.data, rte_pktmbuf_data_len(buff)); res_cur_idx++; packet_success++; /* mergeable is disabled then a header is required per buffer. */ rte_memcpy((void *)(uintptr_t)buff_hdr_addr, (const void*)&virtio_hdr, vq->vhost_hlen); if (res_cur_idx < res_end_idx) { /* Prefetch descriptor index. */ rte_prefetch0(&vq->desc[head[packet_success]]); } } rte_compiler_barrier(); /* Wait until it's our turn to add our buffer to the used ring. */ while (unlikely(vq->last_used_idx != res_base_idx)) rte_pause(); *(volatile uint16_t *)&vq->used->idx += count; vq->last_used_idx = res_end_idx; return count; }
uint16_t rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id, struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count) { struct rte_mbuf *m, *prev; struct vhost_virtqueue *vq; struct vring_desc *desc; uint64_t vb_addr = 0; uint32_t head[MAX_PKT_BURST]; uint32_t used_idx; uint32_t i; uint16_t free_entries, entry_success = 0; uint16_t avail_idx; if (unlikely(queue_id != VIRTIO_TXQ)) { LOG_DEBUG(VHOST_DATA, "mq isn't supported in this version.\n"); return 0; } vq = dev->virtqueue[VIRTIO_TXQ]; avail_idx = *((volatile uint16_t *)&vq->avail->idx); /* If there are no available buffers then return. */ if (vq->last_used_idx == avail_idx) return 0; LOG_DEBUG(VHOST_DATA, "%s (%"PRIu64")\n", __func__, dev->device_fh); /* Prefetch available ring to retrieve head indexes. */ rte_prefetch0(&vq->avail->ring[vq->last_used_idx & (vq->size - 1)]); /*get the number of free entries in the ring*/ free_entries = (avail_idx - vq->last_used_idx); free_entries = RTE_MIN(free_entries, count); /* Limit to MAX_PKT_BURST. */ free_entries = RTE_MIN(free_entries, MAX_PKT_BURST); LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Buffers available %d\n", dev->device_fh, free_entries); /* Retrieve all of the head indexes first to avoid caching issues. */ for (i = 0; i < free_entries; i++) head[i] = vq->avail->ring[(vq->last_used_idx + i) & (vq->size - 1)]; /* Prefetch descriptor index. */ rte_prefetch0(&vq->desc[head[entry_success]]); rte_prefetch0(&vq->used->ring[vq->last_used_idx & (vq->size - 1)]); while (entry_success < free_entries) { uint32_t vb_avail, vb_offset; uint32_t seg_avail, seg_offset; uint32_t cpy_len; uint32_t seg_num = 0; struct rte_mbuf *cur; uint8_t alloc_err = 0; desc = &vq->desc[head[entry_success]]; /* Discard first buffer as it is the virtio header */ if (desc->flags & VRING_DESC_F_NEXT) { desc = &vq->desc[desc->next]; vb_offset = 0; vb_avail = desc->len; } else { vb_offset = vq->vhost_hlen; vb_avail = desc->len - vb_offset; } /* Buffer address translation. */ vb_addr = gpa_to_vva(dev, desc->addr); /* Prefetch buffer address. */ rte_prefetch0((void *)(uintptr_t)vb_addr); used_idx = vq->last_used_idx & (vq->size - 1); if (entry_success < (free_entries - 1)) { /* Prefetch descriptor index. */ rte_prefetch0(&vq->desc[head[entry_success+1]]); rte_prefetch0(&vq->used->ring[(used_idx + 1) & (vq->size - 1)]); } /* Update used index buffer information. */ vq->used->ring[used_idx].id = head[entry_success]; vq->used->ring[used_idx].len = 0; /* Allocate an mbuf and populate the structure. */ m = rte_pktmbuf_alloc(mbuf_pool); if (unlikely(m == NULL)) { RTE_LOG(ERR, VHOST_DATA, "Failed to allocate memory for mbuf.\n"); break; } seg_offset = 0; seg_avail = m->buf_len - RTE_PKTMBUF_HEADROOM; cpy_len = RTE_MIN(vb_avail, seg_avail); PRINT_PACKET(dev, (uintptr_t)vb_addr, desc->len, 0); seg_num++; cur = m; prev = m; while (cpy_len != 0) { rte_memcpy((void *)(rte_pktmbuf_mtod(cur, char *) + seg_offset), (void *)((uintptr_t)(vb_addr + vb_offset)), cpy_len); seg_offset += cpy_len; vb_offset += cpy_len; vb_avail -= cpy_len; seg_avail -= cpy_len; if (vb_avail != 0) { /* * The segment reachs to its end, * while the virtio buffer in TX vring has * more data to be copied. */ cur->data_len = seg_offset; m->pkt_len += seg_offset; /* Allocate mbuf and populate the structure. */ cur = rte_pktmbuf_alloc(mbuf_pool); if (unlikely(cur == NULL)) { RTE_LOG(ERR, VHOST_DATA, "Failed to " "allocate memory for mbuf.\n"); rte_pktmbuf_free(m); alloc_err = 1; break; } seg_num++; prev->next = cur; prev = cur; seg_offset = 0; seg_avail = cur->buf_len - RTE_PKTMBUF_HEADROOM; } else { if (desc->flags & VRING_DESC_F_NEXT) { /* * There are more virtio buffers in * same vring entry need to be copied. */ if (seg_avail == 0) { /* * The current segment hasn't * room to accomodate more * data. */ cur->data_len = seg_offset; m->pkt_len += seg_offset; /* * Allocate an mbuf and * populate the structure. */ cur = rte_pktmbuf_alloc(mbuf_pool); if (unlikely(cur == NULL)) { RTE_LOG(ERR, VHOST_DATA, "Failed to " "allocate memory " "for mbuf\n"); rte_pktmbuf_free(m); alloc_err = 1; break; } seg_num++; prev->next = cur; prev = cur; seg_offset = 0; seg_avail = cur->buf_len - RTE_PKTMBUF_HEADROOM; } desc = &vq->desc[desc->next]; /* Buffer address translation. */ vb_addr = gpa_to_vva(dev, desc->addr); /* Prefetch buffer address. */ rte_prefetch0((void *)(uintptr_t)vb_addr); vb_offset = 0; vb_avail = desc->len; PRINT_PACKET(dev, (uintptr_t)vb_addr, desc->len, 0); } else { /* The whole packet completes. */ cur->data_len = seg_offset; m->pkt_len += seg_offset; vb_avail = 0; } } cpy_len = RTE_MIN(vb_avail, seg_avail); } if (unlikely(alloc_err == 1)) break; m->nb_segs = seg_num; pkts[entry_success] = m; vq->last_used_idx++; entry_success++; } rte_compiler_barrier(); vq->used->idx += entry_success; /* Kick guest if required. */ if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) eventfd_write((int)vq->callfd, 1); return entry_success; }
virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id, struct rte_mbuf **pkts, uint32_t count) { struct vhost_virtqueue *vq; struct vring_desc *desc; struct rte_mbuf *buff; /* The virtio_hdr is initialised to 0. */ struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0}; uint64_t buff_addr = 0; uint64_t buff_hdr_addr = 0; uint32_t head[MAX_PKT_BURST]; uint32_t head_idx, packet_success = 0; uint16_t avail_idx, res_cur_idx; uint16_t res_base_idx, res_end_idx; uint16_t free_entries; uint8_t success = 0; LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_rx()\n", dev->device_fh); if (unlikely(queue_id != VIRTIO_RXQ)) { LOG_DEBUG(VHOST_DATA, "mq isn't supported in this version.\n"); return 0; } vq = dev->virtqueue[VIRTIO_RXQ]; count = (count > MAX_PKT_BURST) ? MAX_PKT_BURST : count; /* * As many data cores may want access to available buffers, * they need to be reserved. */ do { res_base_idx = vq->last_used_idx_res; avail_idx = *((volatile uint16_t *)&vq->avail->idx); free_entries = (avail_idx - res_base_idx); /*check that we have enough buffers*/ if (unlikely(count > free_entries)) count = free_entries; if (count == 0) return 0; res_end_idx = res_base_idx + count; /* vq->last_used_idx_res is atomically updated. */ /* TODO: Allow to disable cmpset if no concurrency in application. */ success = rte_atomic16_cmpset(&vq->last_used_idx_res, res_base_idx, res_end_idx); } while (unlikely(success == 0)); res_cur_idx = res_base_idx; LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Current Index %d| End Index %d\n", dev->device_fh, res_cur_idx, res_end_idx); /* Prefetch available ring to retrieve indexes. */ rte_prefetch0(&vq->avail->ring[res_cur_idx & (vq->size - 1)]); /* Retrieve all of the head indexes first to avoid caching issues. */ for (head_idx = 0; head_idx < count; head_idx++) head[head_idx] = vq->avail->ring[(res_cur_idx + head_idx) & (vq->size - 1)]; /*Prefetch descriptor index. */ rte_prefetch0(&vq->desc[head[packet_success]]); while (res_cur_idx != res_end_idx) { uint32_t offset = 0, vb_offset = 0; uint32_t pkt_len, len_to_cpy, data_len, total_copied = 0; uint8_t hdr = 0, uncompleted_pkt = 0; /* Get descriptor from available ring */ desc = &vq->desc[head[packet_success]]; buff = pkts[packet_success]; /* Convert from gpa to vva (guest physical addr -> vhost virtual addr) */ buff_addr = gpa_to_vva(dev, desc->addr); /* Prefetch buffer address. */ rte_prefetch0((void *)(uintptr_t)buff_addr); /* Copy virtio_hdr to packet and increment buffer address */ buff_hdr_addr = buff_addr; /* * If the descriptors are chained the header and data are * placed in separate buffers. */ if ((desc->flags & VRING_DESC_F_NEXT) && (desc->len == vq->vhost_hlen)) { desc = &vq->desc[desc->next]; /* Buffer address translation. */ buff_addr = gpa_to_vva(dev, desc->addr); } else { vb_offset += vq->vhost_hlen; hdr = 1; } pkt_len = rte_pktmbuf_pkt_len(buff); data_len = rte_pktmbuf_data_len(buff); len_to_cpy = RTE_MIN(data_len, hdr ? desc->len - vq->vhost_hlen : desc->len); while (total_copied < pkt_len) { /* Copy mbuf data to buffer */ rte_memcpy((void *)(uintptr_t)(buff_addr + vb_offset), (const void *)(rte_pktmbuf_mtod(buff, const char *) + offset), len_to_cpy); PRINT_PACKET(dev, (uintptr_t)(buff_addr + vb_offset), len_to_cpy, 0); offset += len_to_cpy; vb_offset += len_to_cpy; total_copied += len_to_cpy; /* The whole packet completes */ if (total_copied == pkt_len) break; /* The current segment completes */ if (offset == data_len) { buff = buff->next; offset = 0; data_len = rte_pktmbuf_data_len(buff); } /* The current vring descriptor done */ if (vb_offset == desc->len) { if (desc->flags & VRING_DESC_F_NEXT) { desc = &vq->desc[desc->next]; buff_addr = gpa_to_vva(dev, desc->addr); vb_offset = 0; } else { /* Room in vring buffer is not enough */ uncompleted_pkt = 1; break; } } len_to_cpy = RTE_MIN(data_len - offset, desc->len - vb_offset); }; /* Update used ring with desc information */ vq->used->ring[res_cur_idx & (vq->size - 1)].id = head[packet_success]; /* Drop the packet if it is uncompleted */ if (unlikely(uncompleted_pkt == 1)) vq->used->ring[res_cur_idx & (vq->size - 1)].len = vq->vhost_hlen; else vq->used->ring[res_cur_idx & (vq->size - 1)].len = pkt_len + vq->vhost_hlen; res_cur_idx++; packet_success++; if (unlikely(uncompleted_pkt == 1)) continue; rte_memcpy((void *)(uintptr_t)buff_hdr_addr, (const void *)&virtio_hdr, vq->vhost_hlen); PRINT_PACKET(dev, (uintptr_t)buff_hdr_addr, vq->vhost_hlen, 1); if (res_cur_idx < res_end_idx) { /* Prefetch descriptor index. */ rte_prefetch0(&vq->desc[head[packet_success]]); } } rte_compiler_barrier(); /* Wait until it's our turn to add our buffer to the used ring. */ while (unlikely(vq->last_used_idx != res_base_idx)) rte_pause(); *(volatile uint16_t *)&vq->used->idx += count; vq->last_used_idx = res_end_idx; /* flush used->idx update before we read avail->flags. */ rte_mb(); /* Kick the guest if necessary. */ if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) eventfd_write((int)vq->callfd, 1); return count; }
/* * Notice: * - nb_pkts < RTE_I40E_DESCS_PER_LOOP, just return no packet * - nb_pkts > RTE_I40E_VPMD_RX_BURST, only scan RTE_I40E_VPMD_RX_BURST * numbers of DD bits */ static inline uint16_t _recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts, uint8_t *split_packet) { volatile union i40e_rx_desc *rxdp; struct i40e_rx_entry *sw_ring; uint16_t nb_pkts_recd; int pos; uint64_t var; __m128i shuf_msk; uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl; __m128i crc_adjust = _mm_set_epi16( 0, 0, 0, /* ignore non-length fields */ -rxq->crc_len, /* sub crc on data_len */ 0, /* ignore high-16bits of pkt_len */ -rxq->crc_len, /* sub crc on pkt_len */ 0, 0 /* ignore pkt_type field */ ); /* * compile-time check the above crc_adjust layout is correct. * NOTE: the first field (lowest address) is given last in set_epi16 * call above. */ RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, pkt_len) != offsetof(struct rte_mbuf, rx_descriptor_fields1) + 4); RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_len) != offsetof(struct rte_mbuf, rx_descriptor_fields1) + 8); __m128i dd_check, eop_check; /* nb_pkts shall be less equal than RTE_I40E_MAX_RX_BURST */ nb_pkts = RTE_MIN(nb_pkts, RTE_I40E_MAX_RX_BURST); /* nb_pkts has to be floor-aligned to RTE_I40E_DESCS_PER_LOOP */ nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, RTE_I40E_DESCS_PER_LOOP); /* Just the act of getting into the function from the application is * going to cost about 7 cycles */ rxdp = rxq->rx_ring + rxq->rx_tail; rte_prefetch0(rxdp); /* See if we need to rearm the RX queue - gives the prefetch a bit * of time to act */ if (rxq->rxrearm_nb > RTE_I40E_RXQ_REARM_THRESH) i40e_rxq_rearm(rxq); /* Before we start moving massive data around, check to see if * there is actually a packet available */ if (!(rxdp->wb.qword1.status_error_len & rte_cpu_to_le_32(1 << I40E_RX_DESC_STATUS_DD_SHIFT))) return 0; /* 4 packets DD mask */ dd_check = _mm_set_epi64x(0x0000000100000001LL, 0x0000000100000001LL); /* 4 packets EOP mask */ eop_check = _mm_set_epi64x(0x0000000200000002LL, 0x0000000200000002LL); /* mask to shuffle from desc. to mbuf */ shuf_msk = _mm_set_epi8( 7, 6, 5, 4, /* octet 4~7, 32bits rss */ 3, 2, /* octet 2~3, low 16 bits vlan_macip */ 15, 14, /* octet 15~14, 16 bits data_len */ 0xFF, 0xFF, /* skip high 16 bits pkt_len, zero out */ 15, 14, /* octet 15~14, low 16 bits pkt_len */ 0xFF, 0xFF, /* pkt_type set as unknown */ 0xFF, 0xFF /*pkt_type set as unknown */ ); /* * Compile-time verify the shuffle mask * NOTE: some field positions already verified above, but duplicated * here for completeness in case of future modifications. */ RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, pkt_len) != offsetof(struct rte_mbuf, rx_descriptor_fields1) + 4); RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_len) != offsetof(struct rte_mbuf, rx_descriptor_fields1) + 8); RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, vlan_tci) != offsetof(struct rte_mbuf, rx_descriptor_fields1) + 10); RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, hash) != offsetof(struct rte_mbuf, rx_descriptor_fields1) + 12); /* Cache is empty -> need to scan the buffer rings, but first move * the next 'n' mbufs into the cache */ sw_ring = &rxq->sw_ring[rxq->rx_tail]; /* A. load 4 packet in one loop * [A*. mask out 4 unused dirty field in desc] * B. copy 4 mbuf point from swring to rx_pkts * C. calc the number of DD bits among the 4 packets * [C*. extract the end-of-packet bit, if requested] * D. fill info. from desc to mbuf */ for (pos = 0, nb_pkts_recd = 0; pos < nb_pkts; pos += RTE_I40E_DESCS_PER_LOOP, rxdp += RTE_I40E_DESCS_PER_LOOP) { __m128i descs[RTE_I40E_DESCS_PER_LOOP]; __m128i pkt_mb1, pkt_mb2, pkt_mb3, pkt_mb4; __m128i zero, staterr, sterr_tmp1, sterr_tmp2; /* 2 64 bit or 4 32 bit mbuf pointers in one XMM reg. */ __m128i mbp1; #if defined(RTE_ARCH_X86_64) __m128i mbp2; #endif /* B.1 load 2 (64 bit) or 4 (32 bit) mbuf points */ mbp1 = _mm_loadu_si128((__m128i *)&sw_ring[pos]); /* Read desc statuses backwards to avoid race condition */ /* A.1 load 4 pkts desc */ descs[3] = _mm_loadu_si128((__m128i *)(rxdp + 3)); rte_compiler_barrier(); /* B.2 copy 2 64 bit or 4 32 bit mbuf point into rx_pkts */ _mm_storeu_si128((__m128i *)&rx_pkts[pos], mbp1); #if defined(RTE_ARCH_X86_64) /* B.1 load 2 64 bit mbuf points */ mbp2 = _mm_loadu_si128((__m128i *)&sw_ring[pos+2]); #endif descs[2] = _mm_loadu_si128((__m128i *)(rxdp + 2)); rte_compiler_barrier(); /* B.1 load 2 mbuf point */ descs[1] = _mm_loadu_si128((__m128i *)(rxdp + 1)); rte_compiler_barrier(); descs[0] = _mm_loadu_si128((__m128i *)(rxdp)); #if defined(RTE_ARCH_X86_64) /* B.2 copy 2 mbuf point into rx_pkts */ _mm_storeu_si128((__m128i *)&rx_pkts[pos+2], mbp2); #endif if (split_packet) { rte_mbuf_prefetch_part2(rx_pkts[pos]); rte_mbuf_prefetch_part2(rx_pkts[pos + 1]); rte_mbuf_prefetch_part2(rx_pkts[pos + 2]); rte_mbuf_prefetch_part2(rx_pkts[pos + 3]); } /* avoid compiler reorder optimization */ rte_compiler_barrier(); /* pkt 3,4 shift the pktlen field to be 16-bit aligned*/ const __m128i len3 = _mm_slli_epi32(descs[3], PKTLEN_SHIFT); const __m128i len2 = _mm_slli_epi32(descs[2], PKTLEN_SHIFT); /* merge the now-aligned packet length fields back in */ descs[3] = _mm_blend_epi16(descs[3], len3, 0x80); descs[2] = _mm_blend_epi16(descs[2], len2, 0x80); /* D.1 pkt 3,4 convert format from desc to pktmbuf */ pkt_mb4 = _mm_shuffle_epi8(descs[3], shuf_msk); pkt_mb3 = _mm_shuffle_epi8(descs[2], shuf_msk); /* C.1 4=>2 filter staterr info only */ sterr_tmp2 = _mm_unpackhi_epi32(descs[3], descs[2]); /* C.1 4=>2 filter staterr info only */ sterr_tmp1 = _mm_unpackhi_epi32(descs[1], descs[0]); desc_to_olflags_v(rxq, descs, &rx_pkts[pos]); /* D.2 pkt 3,4 set in_port/nb_seg and remove crc */ pkt_mb4 = _mm_add_epi16(pkt_mb4, crc_adjust); pkt_mb3 = _mm_add_epi16(pkt_mb3, crc_adjust); /* pkt 1,2 shift the pktlen field to be 16-bit aligned*/ const __m128i len1 = _mm_slli_epi32(descs[1], PKTLEN_SHIFT); const __m128i len0 = _mm_slli_epi32(descs[0], PKTLEN_SHIFT); /* merge the now-aligned packet length fields back in */ descs[1] = _mm_blend_epi16(descs[1], len1, 0x80); descs[0] = _mm_blend_epi16(descs[0], len0, 0x80); /* D.1 pkt 1,2 convert format from desc to pktmbuf */ pkt_mb2 = _mm_shuffle_epi8(descs[1], shuf_msk); pkt_mb1 = _mm_shuffle_epi8(descs[0], shuf_msk); /* C.2 get 4 pkts staterr value */ zero = _mm_xor_si128(dd_check, dd_check); staterr = _mm_unpacklo_epi32(sterr_tmp1, sterr_tmp2); /* D.3 copy final 3,4 data to rx_pkts */ _mm_storeu_si128((void *)&rx_pkts[pos+3]->rx_descriptor_fields1, pkt_mb4); _mm_storeu_si128((void *)&rx_pkts[pos+2]->rx_descriptor_fields1, pkt_mb3); /* D.2 pkt 1,2 set in_port/nb_seg and remove crc */ pkt_mb2 = _mm_add_epi16(pkt_mb2, crc_adjust); pkt_mb1 = _mm_add_epi16(pkt_mb1, crc_adjust); /* C* extract and record EOP bit */ if (split_packet) { __m128i eop_shuf_mask = _mm_set_epi8( 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x04, 0x0C, 0x00, 0x08 ); /* and with mask to extract bits, flipping 1-0 */ __m128i eop_bits = _mm_andnot_si128(staterr, eop_check); /* the staterr values are not in order, as the count * count of dd bits doesn't care. However, for end of * packet tracking, we do care, so shuffle. This also * compresses the 32-bit values to 8-bit */ eop_bits = _mm_shuffle_epi8(eop_bits, eop_shuf_mask); /* store the resulting 32-bit value */ *(int *)split_packet = _mm_cvtsi128_si32(eop_bits); split_packet += RTE_I40E_DESCS_PER_LOOP; } /* C.3 calc available number of desc */ staterr = _mm_and_si128(staterr, dd_check); staterr = _mm_packs_epi32(staterr, zero); /* D.3 copy final 1,2 data to rx_pkts */ _mm_storeu_si128((void *)&rx_pkts[pos+1]->rx_descriptor_fields1, pkt_mb2); _mm_storeu_si128((void *)&rx_pkts[pos]->rx_descriptor_fields1, pkt_mb1); desc_to_ptype_v(descs, &rx_pkts[pos], ptype_tbl); /* C.4 calc avaialbe number of desc */ var = __builtin_popcountll(_mm_cvtsi128_si64(staterr)); nb_pkts_recd += var; if (likely(var != RTE_I40E_DESCS_PER_LOOP)) break; } /* Update our internal tail pointer */ rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_pkts_recd); rxq->rx_tail = (uint16_t)(rxq->rx_tail & (rxq->nb_rx_desc - 1)); rxq->rxrearm_nb = (uint16_t)(rxq->rxrearm_nb + nb_pkts_recd); return nb_pkts_recd; }
/* * vPMD raw receive routine, only accept(nb_pkts >= RTE_IXGBE_DESCS_PER_LOOP) * * Notice: * - nb_pkts < RTE_IXGBE_DESCS_PER_LOOP, just return no packet * - nb_pkts > RTE_IXGBE_MAX_RX_BURST, only scan RTE_IXGBE_MAX_RX_BURST * numbers of DD bit * - floor align nb_pkts to a RTE_IXGBE_DESC_PER_LOOP power-of-two * - don't support ol_flags for rss and csum err */ static inline uint16_t _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts, uint8_t *split_packet) { volatile union ixgbe_adv_rx_desc *rxdp; struct ixgbe_rx_entry *sw_ring; uint16_t nb_pkts_recd; int pos; uint64_t var; __m128i shuf_msk; __m128i crc_adjust = _mm_set_epi16( 0, 0, 0, /* ignore non-length fields */ -rxq->crc_len, /* sub crc on data_len */ 0, /* ignore high-16bits of pkt_len */ -rxq->crc_len, /* sub crc on pkt_len */ 0, 0 /* ignore pkt_type field */ ); __m128i dd_check, eop_check; /* nb_pkts shall be less equal than RTE_IXGBE_MAX_RX_BURST */ nb_pkts = RTE_MIN(nb_pkts, RTE_IXGBE_MAX_RX_BURST); /* nb_pkts has to be floor-aligned to RTE_IXGBE_DESCS_PER_LOOP */ nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, RTE_IXGBE_DESCS_PER_LOOP); /* Just the act of getting into the function from the application is * going to cost about 7 cycles */ rxdp = rxq->rx_ring + rxq->rx_tail; _mm_prefetch((const void *)rxdp, _MM_HINT_T0); /* See if we need to rearm the RX queue - gives the prefetch a bit * of time to act */ if (rxq->rxrearm_nb > RTE_IXGBE_RXQ_REARM_THRESH) ixgbe_rxq_rearm(rxq); /* Before we start moving massive data around, check to see if * there is actually a packet available */ if (!(rxdp->wb.upper.status_error & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))) return 0; /* 4 packets DD mask */ dd_check = _mm_set_epi64x(0x0000000100000001LL, 0x0000000100000001LL); /* 4 packets EOP mask */ eop_check = _mm_set_epi64x(0x0000000200000002LL, 0x0000000200000002LL); /* mask to shuffle from desc. to mbuf */ shuf_msk = _mm_set_epi8( 7, 6, 5, 4, /* octet 4~7, 32bits rss */ 15, 14, /* octet 14~15, low 16 bits vlan_macip */ 13, 12, /* octet 12~13, 16 bits data_len */ 0xFF, 0xFF, /* skip high 16 bits pkt_len, zero out */ 13, 12, /* octet 12~13, low 16 bits pkt_len */ 0xFF, 0xFF, /* skip 32 bit pkt_type */ 0xFF, 0xFF ); /* Cache is empty -> need to scan the buffer rings, but first move * the next 'n' mbufs into the cache */ sw_ring = &rxq->sw_ring[rxq->rx_tail]; /* A. load 4 packet in one loop * [A*. mask out 4 unused dirty field in desc] * B. copy 4 mbuf point from swring to rx_pkts * C. calc the number of DD bits among the 4 packets * [C*. extract the end-of-packet bit, if requested] * D. fill info. from desc to mbuf */ for (pos = 0, nb_pkts_recd = 0; pos < nb_pkts; pos += RTE_IXGBE_DESCS_PER_LOOP, rxdp += RTE_IXGBE_DESCS_PER_LOOP) { __m128i descs[RTE_IXGBE_DESCS_PER_LOOP]; __m128i pkt_mb1, pkt_mb2, pkt_mb3, pkt_mb4; __m128i zero, staterr, sterr_tmp1, sterr_tmp2; __m128i mbp1, mbp2; /* two mbuf pointer in one XMM reg. */ /* B.1 load 1 mbuf point */ mbp1 = _mm_loadu_si128((__m128i *)&sw_ring[pos]); /* Read desc statuses backwards to avoid race condition */ /* A.1 load 4 pkts desc */ descs[3] = _mm_loadu_si128((__m128i *)(rxdp + 3)); /* B.2 copy 2 mbuf point into rx_pkts */ _mm_storeu_si128((__m128i *)&rx_pkts[pos], mbp1); /* B.1 load 1 mbuf point */ mbp2 = _mm_loadu_si128((__m128i *)&sw_ring[pos+2]); descs[2] = _mm_loadu_si128((__m128i *)(rxdp + 2)); /* B.1 load 2 mbuf point */ descs[1] = _mm_loadu_si128((__m128i *)(rxdp + 1)); descs[0] = _mm_loadu_si128((__m128i *)(rxdp)); /* B.2 copy 2 mbuf point into rx_pkts */ _mm_storeu_si128((__m128i *)&rx_pkts[pos+2], mbp2); if (split_packet) { rte_mbuf_prefetch_part2(rx_pkts[pos]); rte_mbuf_prefetch_part2(rx_pkts[pos + 1]); rte_mbuf_prefetch_part2(rx_pkts[pos + 2]); rte_mbuf_prefetch_part2(rx_pkts[pos + 3]); } /* avoid compiler reorder optimization */ rte_compiler_barrier(); /* D.1 pkt 3,4 convert format from desc to pktmbuf */ pkt_mb4 = _mm_shuffle_epi8(descs[3], shuf_msk); pkt_mb3 = _mm_shuffle_epi8(descs[2], shuf_msk); /* D.1 pkt 1,2 convert format from desc to pktmbuf */ pkt_mb2 = _mm_shuffle_epi8(descs[1], shuf_msk); pkt_mb1 = _mm_shuffle_epi8(descs[0], shuf_msk); /* C.1 4=>2 filter staterr info only */ sterr_tmp2 = _mm_unpackhi_epi32(descs[3], descs[2]); /* C.1 4=>2 filter staterr info only */ sterr_tmp1 = _mm_unpackhi_epi32(descs[1], descs[0]); /* set ol_flags with vlan packet type */ desc_to_olflags_v(descs, &rx_pkts[pos]); /* D.2 pkt 3,4 set in_port/nb_seg and remove crc */ pkt_mb4 = _mm_add_epi16(pkt_mb4, crc_adjust); pkt_mb3 = _mm_add_epi16(pkt_mb3, crc_adjust); /* C.2 get 4 pkts staterr value */ zero = _mm_xor_si128(dd_check, dd_check); staterr = _mm_unpacklo_epi32(sterr_tmp1, sterr_tmp2); /* D.3 copy final 3,4 data to rx_pkts */ _mm_storeu_si128((void *)&rx_pkts[pos+3]->rx_descriptor_fields1, pkt_mb4); _mm_storeu_si128((void *)&rx_pkts[pos+2]->rx_descriptor_fields1, pkt_mb3); /* D.2 pkt 1,2 set in_port/nb_seg and remove crc */ pkt_mb2 = _mm_add_epi16(pkt_mb2, crc_adjust); pkt_mb1 = _mm_add_epi16(pkt_mb1, crc_adjust); /* C* extract and record EOP bit */ if (split_packet) { __m128i eop_shuf_mask = _mm_set_epi8( 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x04, 0x0C, 0x00, 0x08 ); /* and with mask to extract bits, flipping 1-0 */ __m128i eop_bits = _mm_andnot_si128(staterr, eop_check); /* the staterr values are not in order, as the count * count of dd bits doesn't care. However, for end of * packet tracking, we do care, so shuffle. This also * compresses the 32-bit values to 8-bit */ eop_bits = _mm_shuffle_epi8(eop_bits, eop_shuf_mask); /* store the resulting 32-bit value */ *(int *)split_packet = _mm_cvtsi128_si32(eop_bits); split_packet += RTE_IXGBE_DESCS_PER_LOOP; /* zero-out next pointers */ rx_pkts[pos]->next = NULL; rx_pkts[pos + 1]->next = NULL; rx_pkts[pos + 2]->next = NULL; rx_pkts[pos + 3]->next = NULL; } /* C.3 calc available number of desc */ staterr = _mm_and_si128(staterr, dd_check); staterr = _mm_packs_epi32(staterr, zero); /* D.3 copy final 1,2 data to rx_pkts */ _mm_storeu_si128((void *)&rx_pkts[pos+1]->rx_descriptor_fields1, pkt_mb2); _mm_storeu_si128((void *)&rx_pkts[pos]->rx_descriptor_fields1, pkt_mb1); /* C.4 calc avaialbe number of desc */ var = __builtin_popcountll(_mm_cvtsi128_si64(staterr)); nb_pkts_recd += var; if (likely(var != RTE_IXGBE_DESCS_PER_LOOP)) break; } /* Update our internal tail pointer */ rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_pkts_recd); rxq->rx_tail = (uint16_t)(rxq->rx_tail & (rxq->nb_rx_desc - 1)); rxq->rxrearm_nb = (uint16_t)(rxq->rxrearm_nb + nb_pkts_recd); return nb_pkts_recd; }
/* * Notice: * - nb_pkts < RTE_I40E_DESCS_PER_LOOP, just return no packet * - nb_pkts > RTE_I40E_VPMD_RX_BURST, only scan RTE_I40E_VPMD_RX_BURST * numbers of DD bits */ static inline uint16_t _recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts, uint8_t *split_packet) { volatile union i40e_rx_desc *rxdp; struct i40e_rx_entry *sw_ring; uint16_t nb_pkts_recd; int pos; uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl; /* mask to shuffle from desc. to mbuf */ uint8x16_t shuf_msk = { 0xFF, 0xFF, /* pkt_type set as unknown */ 0xFF, 0xFF, /* pkt_type set as unknown */ 14, 15, /* octet 15~14, low 16 bits pkt_len */ 0xFF, 0xFF, /* skip high 16 bits pkt_len, zero out */ 14, 15, /* octet 15~14, 16 bits data_len */ 2, 3, /* octet 2~3, low 16 bits vlan_macip */ 4, 5, 6, 7 /* octet 4~7, 32bits rss */ }; uint8x16_t eop_check = { 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; uint16x8_t crc_adjust = { 0, 0, /* ignore pkt_type field */ rxq->crc_len, /* sub crc on pkt_len */ 0, /* ignore high-16bits of pkt_len */ rxq->crc_len, /* sub crc on data_len */ 0, 0, 0 /* ignore non-length fields */ }; /* nb_pkts shall be less equal than RTE_I40E_MAX_RX_BURST */ nb_pkts = RTE_MIN(nb_pkts, RTE_I40E_MAX_RX_BURST); /* nb_pkts has to be floor-aligned to RTE_I40E_DESCS_PER_LOOP */ nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, RTE_I40E_DESCS_PER_LOOP); /* Just the act of getting into the function from the application is * going to cost about 7 cycles */ rxdp = rxq->rx_ring + rxq->rx_tail; rte_prefetch_non_temporal(rxdp); /* See if we need to rearm the RX queue - gives the prefetch a bit * of time to act */ if (rxq->rxrearm_nb > RTE_I40E_RXQ_REARM_THRESH) i40e_rxq_rearm(rxq); /* Before we start moving massive data around, check to see if * there is actually a packet available */ if (!(rxdp->wb.qword1.status_error_len & rte_cpu_to_le_32(1 << I40E_RX_DESC_STATUS_DD_SHIFT))) return 0; /* Cache is empty -> need to scan the buffer rings, but first move * the next 'n' mbufs into the cache */ sw_ring = &rxq->sw_ring[rxq->rx_tail]; /* A. load 4 packet in one loop * [A*. mask out 4 unused dirty field in desc] * B. copy 4 mbuf point from swring to rx_pkts * C. calc the number of DD bits among the 4 packets * [C*. extract the end-of-packet bit, if requested] * D. fill info. from desc to mbuf */ for (pos = 0, nb_pkts_recd = 0; pos < nb_pkts; pos += RTE_I40E_DESCS_PER_LOOP, rxdp += RTE_I40E_DESCS_PER_LOOP) { uint64x2_t descs[RTE_I40E_DESCS_PER_LOOP]; uint8x16_t pkt_mb1, pkt_mb2, pkt_mb3, pkt_mb4; uint16x8x2_t sterr_tmp1, sterr_tmp2; uint64x2_t mbp1, mbp2; uint16x8_t staterr; uint16x8_t tmp; uint64_t stat; int32x4_t len_shl = {0, 0, 0, PKTLEN_SHIFT}; /* B.1 load 1 mbuf point */ mbp1 = vld1q_u64((uint64_t *)&sw_ring[pos]); /* Read desc statuses backwards to avoid race condition */ /* A.1 load 4 pkts desc */ descs[3] = vld1q_u64((uint64_t *)(rxdp + 3)); rte_rmb(); /* B.2 copy 2 mbuf point into rx_pkts */ vst1q_u64((uint64_t *)&rx_pkts[pos], mbp1); /* B.1 load 1 mbuf point */ mbp2 = vld1q_u64((uint64_t *)&sw_ring[pos + 2]); descs[2] = vld1q_u64((uint64_t *)(rxdp + 2)); /* B.1 load 2 mbuf point */ descs[1] = vld1q_u64((uint64_t *)(rxdp + 1)); descs[0] = vld1q_u64((uint64_t *)(rxdp)); /* B.2 copy 2 mbuf point into rx_pkts */ vst1q_u64((uint64_t *)&rx_pkts[pos + 2], mbp2); if (split_packet) { rte_mbuf_prefetch_part2(rx_pkts[pos]); rte_mbuf_prefetch_part2(rx_pkts[pos + 1]); rte_mbuf_prefetch_part2(rx_pkts[pos + 2]); rte_mbuf_prefetch_part2(rx_pkts[pos + 3]); } /* avoid compiler reorder optimization */ rte_compiler_barrier(); /* pkt 3,4 shift the pktlen field to be 16-bit aligned*/ uint32x4_t len3 = vshlq_u32(vreinterpretq_u32_u64(descs[3]), len_shl); descs[3] = vreinterpretq_u64_u32(len3); uint32x4_t len2 = vshlq_u32(vreinterpretq_u32_u64(descs[2]), len_shl); descs[2] = vreinterpretq_u64_u32(len2); /* D.1 pkt 3,4 convert format from desc to pktmbuf */ pkt_mb4 = vqtbl1q_u8(vreinterpretq_u8_u64(descs[3]), shuf_msk); pkt_mb3 = vqtbl1q_u8(vreinterpretq_u8_u64(descs[2]), shuf_msk); /* C.1 4=>2 filter staterr info only */ sterr_tmp2 = vzipq_u16(vreinterpretq_u16_u64(descs[1]), vreinterpretq_u16_u64(descs[3])); /* C.1 4=>2 filter staterr info only */ sterr_tmp1 = vzipq_u16(vreinterpretq_u16_u64(descs[0]), vreinterpretq_u16_u64(descs[2])); /* C.2 get 4 pkts staterr value */ staterr = vzipq_u16(sterr_tmp1.val[1], sterr_tmp2.val[1]).val[0]; desc_to_olflags_v(rxq, descs, &rx_pkts[pos]); /* D.2 pkt 3,4 set in_port/nb_seg and remove crc */ tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb4), crc_adjust); pkt_mb4 = vreinterpretq_u8_u16(tmp); tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb3), crc_adjust); pkt_mb3 = vreinterpretq_u8_u16(tmp); /* pkt 1,2 shift the pktlen field to be 16-bit aligned*/ uint32x4_t len1 = vshlq_u32(vreinterpretq_u32_u64(descs[1]), len_shl); descs[1] = vreinterpretq_u64_u32(len1); uint32x4_t len0 = vshlq_u32(vreinterpretq_u32_u64(descs[0]), len_shl); descs[0] = vreinterpretq_u64_u32(len0); /* D.1 pkt 1,2 convert format from desc to pktmbuf */ pkt_mb2 = vqtbl1q_u8(vreinterpretq_u8_u64(descs[1]), shuf_msk); pkt_mb1 = vqtbl1q_u8(vreinterpretq_u8_u64(descs[0]), shuf_msk); /* D.3 copy final 3,4 data to rx_pkts */ vst1q_u8((void *)&rx_pkts[pos + 3]->rx_descriptor_fields1, pkt_mb4); vst1q_u8((void *)&rx_pkts[pos + 2]->rx_descriptor_fields1, pkt_mb3); /* D.2 pkt 1,2 set in_port/nb_seg and remove crc */ tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb2), crc_adjust); pkt_mb2 = vreinterpretq_u8_u16(tmp); tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb1), crc_adjust); pkt_mb1 = vreinterpretq_u8_u16(tmp); /* C* extract and record EOP bit */ if (split_packet) { uint8x16_t eop_shuf_mask = { 0x00, 0x02, 0x04, 0x06, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}; uint8x16_t eop_bits; /* and with mask to extract bits, flipping 1-0 */ eop_bits = vmvnq_u8(vreinterpretq_u8_u16(staterr)); eop_bits = vandq_u8(eop_bits, eop_check); /* the staterr values are not in order, as the count * count of dd bits doesn't care. However, for end of * packet tracking, we do care, so shuffle. This also * compresses the 32-bit values to 8-bit */ eop_bits = vqtbl1q_u8(eop_bits, eop_shuf_mask); /* store the resulting 32-bit value */ vst1q_lane_u32((uint32_t *)split_packet, vreinterpretq_u32_u8(eop_bits), 0); split_packet += RTE_I40E_DESCS_PER_LOOP; /* zero-out next pointers */ rx_pkts[pos]->next = NULL; rx_pkts[pos + 1]->next = NULL; rx_pkts[pos + 2]->next = NULL; rx_pkts[pos + 3]->next = NULL; } staterr = vshlq_n_u16(staterr, I40E_UINT16_BIT - 1); staterr = vreinterpretq_u16_s16( vshrq_n_s16(vreinterpretq_s16_u16(staterr), I40E_UINT16_BIT - 1)); stat = ~vgetq_lane_u64(vreinterpretq_u64_u16(staterr), 0); rte_prefetch_non_temporal(rxdp + RTE_I40E_DESCS_PER_LOOP); /* D.3 copy final 1,2 data to rx_pkts */ vst1q_u8((void *)&rx_pkts[pos + 1]->rx_descriptor_fields1, pkt_mb2); vst1q_u8((void *)&rx_pkts[pos]->rx_descriptor_fields1, pkt_mb1); desc_to_ptype_v(descs, &rx_pkts[pos], ptype_tbl); /* C.4 calc avaialbe number of desc */ if (unlikely(stat == 0)) { nb_pkts_recd += RTE_I40E_DESCS_PER_LOOP; } else { nb_pkts_recd += __builtin_ctzl(stat) / I40E_UINT16_BIT; break; } } /* Update our internal tail pointer */ rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_pkts_recd); rxq->rx_tail = (uint16_t)(rxq->rx_tail & (rxq->nb_rx_desc - 1)); rxq->rxrearm_nb = (uint16_t)(rxq->rxrearm_nb + nb_pkts_recd); return nb_pkts_recd; }
/* * Write scattered channel packet to TX bufring. * * The offset of this channel packet is written as a 64bits value * immediately after this channel packet. * * The write goes through three stages: * 1. Reserve space in ring buffer for the new data. * Writer atomically moves priv_write_index. * 2. Copy the new data into the ring. * 3. Update the tail of the ring (visible to host) that indicates * next read location. Writer updates write_index */ int vmbus_txbr_write(struct vmbus_br *tbr, const struct iovec iov[], int iovlen, bool *need_sig) { struct vmbus_bufring *vbr = tbr->vbr; uint32_t ring_size = tbr->dsize; uint32_t old_windex, next_windex, windex, total; uint64_t save_windex; int i; total = 0; for (i = 0; i < iovlen; i++) total += iov[i].iov_len; total += sizeof(save_windex); /* Reserve space in ring */ do { uint32_t avail; /* Get current free location */ old_windex = tbr->windex; /* Prevent compiler reordering this with calculation */ rte_compiler_barrier(); avail = vmbus_br_availwrite(tbr, old_windex); /* If not enough space in ring, then tell caller. */ if (avail <= total) return -EAGAIN; next_windex = vmbus_br_idxinc(old_windex, total, ring_size); /* Atomic update of next write_index for other threads */ } while (!rte_atomic32_cmpset(&tbr->windex, old_windex, next_windex)); /* Space from old..new is now reserved */ windex = old_windex; for (i = 0; i < iovlen; i++) { windex = vmbus_txbr_copyto(tbr, windex, iov[i].iov_base, iov[i].iov_len); } /* Set the offset of the current channel packet. */ save_windex = ((uint64_t)old_windex) << 32; windex = vmbus_txbr_copyto(tbr, windex, &save_windex, sizeof(save_windex)); /* The region reserved should match region used */ RTE_ASSERT(windex == next_windex); /* Ensure that data is available before updating host index */ rte_smp_wmb(); /* Checkin for our reservation. wait for our turn to update host */ while (!rte_atomic32_cmpset(&vbr->windex, old_windex, next_windex)) rte_pause(); /* If host had read all data before this, then need to signal */ *need_sig |= vmbus_txbr_need_signal(tbr, old_windex); return 0; }
fm10k_rxq_vec_setup(struct fm10k_rx_queue *rxq) { uintptr_t p; struct rte_mbuf mb_def = { .buf_addr = 0 }; /* zeroed mbuf */ mb_def.nb_segs = 1; /* data_off will be ajusted after new mbuf allocated for 512-byte * alignment. */ mb_def.data_off = RTE_PKTMBUF_HEADROOM; mb_def.port = rxq->port_id; rte_mbuf_refcnt_set(&mb_def, 1); /* prevent compiler reordering: rearm_data covers previous fields */ rte_compiler_barrier(); p = (uintptr_t)&mb_def.rearm_data; rxq->mbuf_initializer = *(uint64_t *)p; return 0; } static inline void fm10k_rxq_rearm(struct fm10k_rx_queue *rxq) { int i; uint16_t rx_id; volatile union fm10k_rx_desc *rxdp; struct rte_mbuf **mb_alloc = &rxq->sw_ring[rxq->rxrearm_start]; struct rte_mbuf *mb0, *mb1; __m128i head_off = _mm_set_epi64x( RTE_PKTMBUF_HEADROOM + FM10K_RX_DATABUF_ALIGN - 1, RTE_PKTMBUF_HEADROOM + FM10K_RX_DATABUF_ALIGN - 1); __m128i dma_addr0, dma_addr1; /* Rx buffer need to be aligned with 512 byte */ const __m128i hba_msk = _mm_set_epi64x(0, UINT64_MAX - FM10K_RX_DATABUF_ALIGN + 1); rxdp = rxq->hw_ring + rxq->rxrearm_start; /* Pull 'n' more MBUFs into the software ring */ if (rte_mempool_get_bulk(rxq->mp, (void *)mb_alloc, RTE_FM10K_RXQ_REARM_THRESH) < 0) { dma_addr0 = _mm_setzero_si128(); /* Clean up all the HW/SW ring content */ for (i = 0; i < RTE_FM10K_RXQ_REARM_THRESH; i++) { mb_alloc[i] = &rxq->fake_mbuf; _mm_store_si128((__m128i *)&rxdp[i].q, dma_addr0); } rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed += RTE_FM10K_RXQ_REARM_THRESH; return; } /* Initialize the mbufs in vector, process 2 mbufs in one loop */ for (i = 0; i < RTE_FM10K_RXQ_REARM_THRESH; i += 2, mb_alloc += 2) { __m128i vaddr0, vaddr1; uintptr_t p0, p1; mb0 = mb_alloc[0]; mb1 = mb_alloc[1]; /* Flush mbuf with pkt template. * Data to be rearmed is 6 bytes long. * Though, RX will overwrite ol_flags that are coming next * anyway. So overwrite whole 8 bytes with one load: * 6 bytes of rearm_data plus first 2 bytes of ol_flags. */ p0 = (uintptr_t)&mb0->rearm_data; *(uint64_t *)p0 = rxq->mbuf_initializer; p1 = (uintptr_t)&mb1->rearm_data; *(uint64_t *)p1 = rxq->mbuf_initializer; /* load buf_addr(lo 64bit) and buf_physaddr(hi 64bit) */ vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr); vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr); /* convert pa to dma_addr hdr/data */ dma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0); dma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1); /* add headroom to pa values */ dma_addr0 = _mm_add_epi64(dma_addr0, head_off); dma_addr1 = _mm_add_epi64(dma_addr1, head_off); /* Do 512 byte alignment to satisfy HW requirement, in the * meanwhile, set Header Buffer Address to zero. */ dma_addr0 = _mm_and_si128(dma_addr0, hba_msk); dma_addr1 = _mm_and_si128(dma_addr1, hba_msk); /* flush desc with pa dma_addr */ _mm_store_si128((__m128i *)&rxdp++->q, dma_addr0); _mm_store_si128((__m128i *)&rxdp++->q, dma_addr1); /* enforce 512B alignment on default Rx virtual addresses */ mb0->data_off = (uint16_t)(RTE_PTR_ALIGN((char *)mb0->buf_addr + RTE_PKTMBUF_HEADROOM, FM10K_RX_DATABUF_ALIGN) - (char *)mb0->buf_addr); mb1->data_off = (uint16_t)(RTE_PTR_ALIGN((char *)mb1->buf_addr + RTE_PKTMBUF_HEADROOM, FM10K_RX_DATABUF_ALIGN) - (char *)mb1->buf_addr); } rxq->rxrearm_start += RTE_FM10K_RXQ_REARM_THRESH; if (rxq->rxrearm_start >= rxq->nb_desc) rxq->rxrearm_start = 0; rxq->rxrearm_nb -= RTE_FM10K_RXQ_REARM_THRESH; rx_id = (uint16_t)((rxq->rxrearm_start == 0) ? (rxq->nb_desc - 1) : (rxq->rxrearm_start - 1)); /* Update the tail pointer on the NIC */ FM10K_PCI_REG_WRITE(rxq->tail_ptr, rx_id); }
static inline uint16_t fm10k_recv_raw_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts, uint8_t *split_packet) { volatile union fm10k_rx_desc *rxdp; struct rte_mbuf **mbufp; uint16_t nb_pkts_recd; int pos; struct fm10k_rx_queue *rxq = rx_queue; uint64_t var; __m128i shuf_msk; __m128i dd_check, eop_check; uint16_t next_dd; next_dd = rxq->next_dd; /* Just the act of getting into the function from the application is * going to cost about 7 cycles */ rxdp = rxq->hw_ring + next_dd; rte_prefetch0(rxdp); /* See if we need to rearm the RX queue - gives the prefetch a bit * of time to act */ if (rxq->rxrearm_nb > RTE_FM10K_RXQ_REARM_THRESH) fm10k_rxq_rearm(rxq); /* Before we start moving massive data around, check to see if * there is actually a packet available */ if (!(rxdp->d.staterr & FM10K_RXD_STATUS_DD)) return 0; /* Vecotr RX will process 4 packets at a time, strip the unaligned * tails in case it's not multiple of 4. */ nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, RTE_FM10K_DESCS_PER_LOOP); /* 4 packets DD mask */ dd_check = _mm_set_epi64x(0x0000000100000001LL, 0x0000000100000001LL); /* 4 packets EOP mask */ eop_check = _mm_set_epi64x(0x0000000200000002LL, 0x0000000200000002LL); /* mask to shuffle from desc. to mbuf */ shuf_msk = _mm_set_epi8( 7, 6, 5, 4, /* octet 4~7, 32bits rss */ 15, 14, /* octet 14~15, low 16 bits vlan_macip */ 13, 12, /* octet 12~13, 16 bits data_len */ 0xFF, 0xFF, /* skip high 16 bits pkt_len, zero out */ 13, 12, /* octet 12~13, low 16 bits pkt_len */ 0xFF, 0xFF, /* skip high 16 bits pkt_type */ 0xFF, 0xFF /* Skip pkt_type field in shuffle operation */ ); /* * Compile-time verify the shuffle mask * NOTE: some field positions already verified above, but duplicated * here for completeness in case of future modifications. */ RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, pkt_len) != offsetof(struct rte_mbuf, rx_descriptor_fields1) + 4); RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_len) != offsetof(struct rte_mbuf, rx_descriptor_fields1) + 8); RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, vlan_tci) != offsetof(struct rte_mbuf, rx_descriptor_fields1) + 10); RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, hash) != offsetof(struct rte_mbuf, rx_descriptor_fields1) + 12); /* Cache is empty -> need to scan the buffer rings, but first move * the next 'n' mbufs into the cache */ mbufp = &rxq->sw_ring[next_dd]; /* A. load 4 packet in one loop * [A*. mask out 4 unused dirty field in desc] * B. copy 4 mbuf point from swring to rx_pkts * C. calc the number of DD bits among the 4 packets * [C*. extract the end-of-packet bit, if requested] * D. fill info. from desc to mbuf */ for (pos = 0, nb_pkts_recd = 0; pos < nb_pkts; pos += RTE_FM10K_DESCS_PER_LOOP, rxdp += RTE_FM10K_DESCS_PER_LOOP) { __m128i descs0[RTE_FM10K_DESCS_PER_LOOP]; __m128i pkt_mb1, pkt_mb2, pkt_mb3, pkt_mb4; __m128i zero, staterr, sterr_tmp1, sterr_tmp2; __m128i mbp1; /* 2 64 bit or 4 32 bit mbuf pointers in one XMM reg. */ #if defined(RTE_ARCH_X86_64) __m128i mbp2; #endif /* B.1 load 2 (64 bit) or 4 (32 bit) mbuf points */ mbp1 = _mm_loadu_si128((__m128i *)&mbufp[pos]); /* Read desc statuses backwards to avoid race condition */ /* A.1 load 4 pkts desc */ descs0[3] = _mm_loadu_si128((__m128i *)(rxdp + 3)); rte_compiler_barrier(); /* B.2 copy 2 64 bit or 4 32 bit mbuf point into rx_pkts */ _mm_storeu_si128((__m128i *)&rx_pkts[pos], mbp1); #if defined(RTE_ARCH_X86_64) /* B.1 load 2 64 bit mbuf poitns */ mbp2 = _mm_loadu_si128((__m128i *)&mbufp[pos+2]); #endif descs0[2] = _mm_loadu_si128((__m128i *)(rxdp + 2)); rte_compiler_barrier(); /* B.1 load 2 mbuf point */ descs0[1] = _mm_loadu_si128((__m128i *)(rxdp + 1)); rte_compiler_barrier(); descs0[0] = _mm_loadu_si128((__m128i *)(rxdp)); #if defined(RTE_ARCH_X86_64) /* B.2 copy 2 mbuf point into rx_pkts */ _mm_storeu_si128((__m128i *)&rx_pkts[pos+2], mbp2); #endif /* avoid compiler reorder optimization */ rte_compiler_barrier(); if (split_packet) { rte_mbuf_prefetch_part2(rx_pkts[pos]); rte_mbuf_prefetch_part2(rx_pkts[pos + 1]); rte_mbuf_prefetch_part2(rx_pkts[pos + 2]); rte_mbuf_prefetch_part2(rx_pkts[pos + 3]); } /* D.1 pkt 3,4 convert format from desc to pktmbuf */ pkt_mb4 = _mm_shuffle_epi8(descs0[3], shuf_msk); pkt_mb3 = _mm_shuffle_epi8(descs0[2], shuf_msk); /* C.1 4=>2 filter staterr info only */ sterr_tmp2 = _mm_unpackhi_epi32(descs0[3], descs0[2]); /* C.1 4=>2 filter staterr info only */ sterr_tmp1 = _mm_unpackhi_epi32(descs0[1], descs0[0]); /* set ol_flags with vlan packet type */ fm10k_desc_to_olflags_v(descs0, &rx_pkts[pos]); /* D.1 pkt 1,2 convert format from desc to pktmbuf */ pkt_mb2 = _mm_shuffle_epi8(descs0[1], shuf_msk); pkt_mb1 = _mm_shuffle_epi8(descs0[0], shuf_msk); /* C.2 get 4 pkts staterr value */ zero = _mm_xor_si128(dd_check, dd_check); staterr = _mm_unpacklo_epi32(sterr_tmp1, sterr_tmp2); /* D.3 copy final 3,4 data to rx_pkts */ _mm_storeu_si128((void *)&rx_pkts[pos+3]->rx_descriptor_fields1, pkt_mb4); _mm_storeu_si128((void *)&rx_pkts[pos+2]->rx_descriptor_fields1, pkt_mb3); /* C* extract and record EOP bit */ if (split_packet) { __m128i eop_shuf_mask = _mm_set_epi8( 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x04, 0x0C, 0x00, 0x08 ); /* and with mask to extract bits, flipping 1-0 */ __m128i eop_bits = _mm_andnot_si128(staterr, eop_check); /* the staterr values are not in order, as the count * count of dd bits doesn't care. However, for end of * packet tracking, we do care, so shuffle. This also * compresses the 32-bit values to 8-bit */ eop_bits = _mm_shuffle_epi8(eop_bits, eop_shuf_mask); /* store the resulting 32-bit value */ *(int *)split_packet = _mm_cvtsi128_si32(eop_bits); split_packet += RTE_FM10K_DESCS_PER_LOOP; /* zero-out next pointers */ rx_pkts[pos]->next = NULL; rx_pkts[pos + 1]->next = NULL; rx_pkts[pos + 2]->next = NULL; rx_pkts[pos + 3]->next = NULL; } /* C.3 calc available number of desc */ staterr = _mm_and_si128(staterr, dd_check); staterr = _mm_packs_epi32(staterr, zero); /* D.3 copy final 1,2 data to rx_pkts */ _mm_storeu_si128((void *)&rx_pkts[pos+1]->rx_descriptor_fields1, pkt_mb2); _mm_storeu_si128((void *)&rx_pkts[pos]->rx_descriptor_fields1, pkt_mb1); fm10k_desc_to_pktype_v(descs0, &rx_pkts[pos]); /* C.4 calc avaialbe number of desc */ var = __builtin_popcountll(_mm_cvtsi128_si64(staterr)); nb_pkts_recd += var; if (likely(var != RTE_FM10K_DESCS_PER_LOOP)) break; } /* Update our internal tail pointer */ rxq->next_dd = (uint16_t)(rxq->next_dd + nb_pkts_recd); rxq->next_dd = (uint16_t)(rxq->next_dd & (rxq->nb_desc - 1)); rxq->rxrearm_nb = (uint16_t)(rxq->rxrearm_nb + nb_pkts_recd); return nb_pkts_recd; }
dpdk_virtio_dev_to_vm_tx_burst(struct dpdk_virtio_writer *p, vr_dpdk_virtioq_t *vq, struct rte_mbuf **pkts, uint32_t count) { struct vring_desc *desc; struct rte_mbuf *buff; /* The virtio_hdr is initialised to 0. */ struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0}; uint64_t buff_addr = 0; uint64_t buff_hdr_addr = 0; uint32_t head[VR_DPDK_VIRTIO_TX_BURST_SZ]; uint32_t head_idx, packet_success = 0; uint16_t avail_idx, res_cur_idx; uint16_t res_base_idx, res_end_idx; uint16_t free_entries; uint8_t success = 0; vr_uvh_client_t *vru_cl; if (unlikely(vq->vdv_ready_state == VQ_NOT_READY)) return 0; vru_cl = vr_dpdk_virtio_get_vif_client(vq->vdv_vif_idx); if (unlikely(vru_cl == NULL)) return 0; /* * As many data cores may want access to available buffers, * they need to be reserved. */ do { res_base_idx = vq->vdv_last_used_idx_res; avail_idx = *((volatile uint16_t *)&vq->vdv_avail->idx); free_entries = (avail_idx - res_base_idx); /*check that we have enough buffers*/ if (unlikely(count > free_entries)) count = free_entries; if (unlikely(count == 0)) return 0; res_end_idx = res_base_idx + count; /* vq->vdv_last_used_idx_res is atomically updated. */ /* TODO: Allow to disable cmpset if no concurrency in application. */ success = rte_atomic16_cmpset(&vq->vdv_last_used_idx_res, res_base_idx, res_end_idx); } while (unlikely(success == 0)); res_cur_idx = res_base_idx; RTE_LOG(DEBUG, VROUTER, "%s: Current Index %d| End Index %d\n", __func__, res_cur_idx, res_end_idx); /* Prefetch available ring to retrieve indexes. */ rte_prefetch0(&vq->vdv_avail->ring[res_cur_idx & (vq->vdv_size - 1)]); /* Retrieve all of the head indexes first to avoid caching issues. */ for (head_idx = 0; head_idx < count; head_idx++) head[head_idx] = vq->vdv_avail->ring[(res_cur_idx + head_idx) & (vq->vdv_size - 1)]; /* Prefetch descriptor index. */ rte_prefetch0(&vq->vdv_desc[head[packet_success]]); while (res_cur_idx != res_end_idx) { uint32_t offset = 0, vb_offset = 0; uint32_t pkt_len, len_to_cpy, data_len, total_copied = 0; uint8_t hdr = 0, uncompleted_pkt = 0; /* Get descriptor from available ring */ desc = &vq->vdv_desc[head[packet_success]]; buff = pkts[packet_success]; /* Convert from gpa to vva (guest physical addr -> vhost virtual addr) */ buff_addr = (uintptr_t)vr_dpdk_guest_phys_to_host_virt(vru_cl, desc->addr); /* Prefetch buffer address. */ rte_prefetch0((void *)(uintptr_t)buff_addr); /* Copy virtio_hdr to packet and increment buffer address */ buff_hdr_addr = buff_addr; /* * If the descriptors are chained the header and data are * placed in separate buffers. */ if (likely(desc->flags & VRING_DESC_F_NEXT) && (desc->len == sizeof(struct virtio_net_hdr))) { /* * TODO: verify that desc->next is sane below. */ desc = &vq->vdv_desc[desc->next]; /* Buffer address translation. */ buff_addr = (uintptr_t)vr_dpdk_guest_phys_to_host_virt(vru_cl, desc->addr); } else { vb_offset += sizeof(struct virtio_net_hdr); hdr = 1; } pkt_len = rte_pktmbuf_pkt_len(buff); data_len = rte_pktmbuf_data_len(buff); len_to_cpy = RTE_MIN(data_len, hdr ? desc->len - sizeof(struct virtio_net_hdr) : desc->len); while (total_copied < pkt_len) { /* Copy mbuf data to buffer */ rte_memcpy((void *)(uintptr_t)(buff_addr + vb_offset), rte_pktmbuf_mtod_offset(buff, const void *, offset), len_to_cpy); offset += len_to_cpy; vb_offset += len_to_cpy; total_copied += len_to_cpy; /* The whole packet completes */ if (likely(total_copied == pkt_len)) break; /* The current segment completes */ if (offset == data_len) { buff = buff->next; offset = 0; data_len = rte_pktmbuf_data_len(buff); } /* The current vring descriptor done */ if (vb_offset == desc->len) { if (desc->flags & VRING_DESC_F_NEXT) { desc = &vq->vdv_desc[desc->next]; buff_addr = (uintptr_t)vr_dpdk_guest_phys_to_host_virt(vru_cl, desc->addr); vb_offset = 0; } else { /* Room in vring buffer is not enough */ uncompleted_pkt = 1; break; } } len_to_cpy = RTE_MIN(data_len - offset, desc->len - vb_offset); }; /* Update used ring with desc information */ vq->vdv_used->ring[res_cur_idx & (vq->vdv_size - 1)].id = head[packet_success]; /* Drop the packet if it is uncompleted */ if (unlikely(uncompleted_pkt == 1)) vq->vdv_used->ring[res_cur_idx & (vq->vdv_size - 1)].len = sizeof(struct virtio_net_hdr); else vq->vdv_used->ring[res_cur_idx & (vq->vdv_size - 1)].len = pkt_len + sizeof(struct virtio_net_hdr); res_cur_idx++; packet_success++; /* TODO: in DPDK 2.1 we do not copy the header if (unlikely(uncompleted_pkt == 1)) continue; */ rte_memcpy((void *)(uintptr_t)buff_hdr_addr, (const void *)&virtio_hdr, sizeof(struct virtio_net_hdr)); if (likely(res_cur_idx < res_end_idx)) { /* Prefetch descriptor index. */ rte_prefetch0(&vq->vdv_desc[head[packet_success]]); } } rte_compiler_barrier(); /* Wait until it's our turn to add our buffer to the used ring. */ while (unlikely(vq->vdv_last_used_idx != res_base_idx)) rte_pause(); *(volatile uint16_t *)&vq->vdv_used->idx += count; vq->vdv_last_used_idx = res_end_idx; RTE_LOG(DEBUG, VROUTER, "%s: vif %d vq %p last_used_idx %d used->idx %d\n", __func__, vq->vdv_vif_idx, vq, vq->vdv_last_used_idx, vq->vdv_used->idx); /* flush used->idx update before we read avail->flags. */ rte_mb(); /* Kick the guest if necessary. */ if (unlikely(!(vq->vdv_avail->flags & VRING_AVAIL_F_NO_INTERRUPT))) { p->nb_syscalls++; eventfd_write(vq->vdv_callfd, 1); } return count; }
ixgbe_rxq_vec_setup(struct ixgbe_rx_queue *rxq) { uintptr_t p; struct rte_mbuf mb_def = { .buf_addr = 0 }; /* zeroed mbuf */ mb_def.nb_segs = 1; mb_def.data_off = RTE_PKTMBUF_HEADROOM; mb_def.port = rxq->port_id; rte_mbuf_refcnt_set(&mb_def, 1); /* prevent compiler reordering: rearm_data covers previous fields */ rte_compiler_barrier(); p = (uintptr_t)&mb_def.rearm_data; rxq->mbuf_initializer = *(uint64_t *)p; return 0; } int __attribute__((cold)) ixgbe_txq_vec_setup(struct ixgbe_tx_queue *txq) { if (txq->sw_ring_v == NULL) return -1; /* leave the first one for overflow */ txq->sw_ring_v = txq->sw_ring_v + 1; txq->ops = &vec_txq_ops; return 0; } int __attribute__((cold)) ixgbe_rx_vec_dev_conf_condition_check(struct rte_eth_dev *dev) { #ifndef RTE_LIBRTE_IEEE1588 struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode; struct rte_fdir_conf *fconf = &dev->data->dev_conf.fdir_conf; #ifndef RTE_IXGBE_RX_OLFLAGS_ENABLE /* whithout rx ol_flags, no VP flag report */ if (rxmode->hw_vlan_strip != 0 || rxmode->hw_vlan_extend != 0) return -1; #endif /* no fdir support */ if (fconf->mode != RTE_FDIR_MODE_NONE) return -1; /* * - no csum error report support * - no header split support */ if (rxmode->hw_ip_checksum == 1 || rxmode->header_split == 1) return -1; return 0; #else RTE_SET_USED(dev); return -1; #endif }
virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id, struct rte_mbuf **pkts, uint32_t count) { struct vhost_virtqueue *vq; uint32_t pkt_idx = 0, entry_success = 0; uint16_t avail_idx; uint16_t res_base_idx, res_cur_idx; uint8_t success = 0; LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_merge_rx()\n", dev->device_fh); if (unlikely(queue_id != VIRTIO_RXQ)) { LOG_DEBUG(VHOST_DATA, "mq isn't supported in this version.\n"); } vq = dev->virtqueue[VIRTIO_RXQ]; count = RTE_MIN((uint32_t)MAX_PKT_BURST, count); if (count == 0) return 0; for (pkt_idx = 0; pkt_idx < count; pkt_idx++) { uint32_t pkt_len = pkts[pkt_idx]->pkt_len + vq->vhost_hlen; do { /* * As many data cores may want access to available * buffers, they need to be reserved. */ uint32_t secure_len = 0; uint32_t vec_idx = 0; res_base_idx = vq->last_used_idx_res; res_cur_idx = res_base_idx; do { avail_idx = *((volatile uint16_t *)&vq->avail->idx); if (unlikely(res_cur_idx == avail_idx)) { LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Failed " "to get enough desc from " "vring\n", dev->device_fh); return pkt_idx; } else { update_secure_len(vq, res_cur_idx, &secure_len, &vec_idx); res_cur_idx++; } } while (pkt_len > secure_len); /* vq->last_used_idx_res is atomically updated. */ success = rte_atomic16_cmpset(&vq->last_used_idx_res, res_base_idx, res_cur_idx); } while (success == 0); entry_success = copy_from_mbuf_to_vring(dev, res_base_idx, res_cur_idx, pkts[pkt_idx]); rte_compiler_barrier(); /* * Wait until it's our turn to add our buffer * to the used ring. */ while (unlikely(vq->last_used_idx != res_base_idx)) rte_pause(); *(volatile uint16_t *)&vq->used->idx += entry_success; vq->last_used_idx = res_cur_idx; /* flush used->idx update before we read avail->flags. */ rte_mb(); /* Kick the guest if necessary. */ if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) eventfd_write((int)vq->callfd, 1); } return count; }