uint16_t rx_pkt_hw(struct rte_mbuf **rx_mbuf, struct task_base *ptask) { START_EMPTY_MEASSURE(); #ifdef BRAS_RX_BULK uint16_t nb_rx = rte_eth_rx_burst(ptask->rx_params_hw.rx_port, ptask->rx_params_hw.rx_queue, rx_mbuf + ptask->rx_params_hw.nb_rxbulk, MAX_PKT_BURST - ptask->rx_params_hw.nb_rxbulk); if (likely(nb_rx > 0)) { ptask->rx_params_hw.nb_rxbulk += nb_rx; if (ptask->rx_params_hw.nb_rxbulk == MAX_PKT_BURST) { ptask->rx_params_hw.nb_rxbulk = 0; return MAX_PKT_BURST; } else { /* Don't increment EMPTY cycles. */ return 0; } } #else uint16_t nb_rx = rte_eth_rx_burst(ptask->rx_params_hw.rx_port, ptask->rx_params_hw.rx_queue, rx_mbuf, MAX_PKT_BURST); if (likely(nb_rx > 0)) { return nb_rx; } #endif INCR_EMPTY_CYCLES(ptask->stats, rte_rdtsc() - cur_tsc); return 0; }
/* * Removes MAC address and vlan tag from VMDQ. Ensures that nothing is adding buffers to the RX * queue before disabling RX on the device. */ static inline void unlink_vmdq(struct virtio_net *dev) { unsigned i = 0; unsigned rx_count; struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; if (dev->ready == DEVICE_READY) { /*clear MAC and VLAN settings*/ rte_eth_dev_mac_addr_remove(ports[0], &dev->mac_address); for (i = 0; i < 6; i++) dev->mac_address.addr_bytes[i] = 0; dev->vlan_tag = 0; /*Clear out the receive buffers*/ rx_count = rte_eth_rx_burst(ports[0], (uint16_t)dev->vmdq_rx_q, pkts_burst, MAX_PKT_BURST); while (rx_count) { for (i = 0; i < rx_count; i++) rte_pktmbuf_free(pkts_burst[i]); rx_count = rte_eth_rx_burst(ports[0], (uint16_t)dev->vmdq_rx_q, pkts_burst, MAX_PKT_BURST); } dev->ready = DEVICE_NOT_READY; } }
/** * Removes cloud filter. Ensures that nothing is adding buffers to the RX * queue before disabling RX on the device. */ void vxlan_unlink(struct vhost_dev *vdev) { unsigned i = 0, rx_count; int ret; struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; struct rte_eth_tunnel_filter_conf tunnel_filter_conf; if (vdev->ready == DEVICE_RX) { memset(&tunnel_filter_conf, 0, sizeof(struct rte_eth_tunnel_filter_conf)); ether_addr_copy(&ports_eth_addr[0], &tunnel_filter_conf.outer_mac); ether_addr_copy(&vdev->mac_address, &tunnel_filter_conf.inner_mac); tunnel_filter_conf.tenant_id = tenant_id_conf[vdev->rx_q]; tunnel_filter_conf.filter_type = tep_filter_type[filter_idx]; if (tep_filter_type[filter_idx] == RTE_TUNNEL_FILTER_IMAC_IVLAN_TENID) tunnel_filter_conf.inner_vlan = INNER_VLAN_ID; tunnel_filter_conf.queue_id = vdev->rx_q; tunnel_filter_conf.tunnel_type = RTE_TUNNEL_TYPE_VXLAN; ret = rte_eth_dev_filter_ctrl(ports[0], RTE_ETH_FILTER_TUNNEL, RTE_ETH_FILTER_DELETE, &tunnel_filter_conf); if (ret) { RTE_LOG(ERR, VHOST_DATA, "%d Failed to add device MAC address to cloud filter\n", vdev->rx_q); return; } for (i = 0; i < ETHER_ADDR_LEN; i++) vdev->mac_address.addr_bytes[i] = 0; /* Clear out the receive buffers */ rx_count = rte_eth_rx_burst(ports[0], (uint16_t)vdev->rx_q, pkts_burst, MAX_PKT_BURST); while (rx_count) { for (i = 0; i < rx_count; i++) rte_pktmbuf_free(pkts_burst[i]); rx_count = rte_eth_rx_burst(ports[0], (uint16_t)vdev->rx_q, pkts_burst, MAX_PKT_BURST); } vdev->ready = DEVICE_MAC_LEARNING; } }
/* * Receive burst of packets from physical port. */ static void receive_from_port(unsigned vportid) { int j = 0; uint16_t rx_count = 0; struct rte_mbuf *buf[PKT_BURST_SIZE] = {0}; /* read a port */ rx_count = rte_eth_rx_burst(ports->id[vportid & PORT_MASK], 0, \ buf, PKT_BURST_SIZE); /* Now process the NIC packets read */ if (likely(rx_count > 0)) { vport_stats[vportid].rx += rx_count; /* Prefetch first packets */ for (j = 0; j < PREFETCH_OFFSET && j < rx_count; j++) { rte_prefetch0(rte_pktmbuf_mtod(buf[j], void *)); } /* Prefetch and forward already prefetched packets */ for (j = 0; j < (rx_count - PREFETCH_OFFSET); j++) { rte_prefetch0(rte_pktmbuf_mtod(buf[ j + PREFETCH_OFFSET], void *)); switch_packet(buf[j], vportid); } /* Forward remaining prefetched packets */ for (; j < rx_count; j++) { switch_packet(buf[j], vportid); } } }
static int test_send_basic_packets(void) { struct rte_mbuf bufs[RING_SIZE]; struct rte_mbuf *pbufs[RING_SIZE]; int i; printf("Testing ring pmd RX/TX\n"); for (i = 0; i < RING_SIZE/2; i++) pbufs[i] = &bufs[i]; if (rte_eth_tx_burst(TX_PORT, 0, pbufs, RING_SIZE/2) < RING_SIZE/2) { printf("Failed to transmit packet burst\n"); return -1; } if (rte_eth_rx_burst(RX_PORT, 0, pbufs, RING_SIZE) != RING_SIZE/2) { printf("Failed to receive packet burst\n"); return -1; } for (i = 0; i < RING_SIZE/2; i++) if (pbufs[i] != &bufs[i]) { printf("Error: received data does not match that transmitted\n"); return -1; } return 0; }
/* * Main thread that does the work, reading from INPUT_PORT * and writing to OUTPUT_PORT */ static __attribute__((noreturn)) void lcore_main(void) { uint8_t port = 0; if (rte_eth_dev_socket_id(port) > 0 && rte_eth_dev_socket_id(port) != (int)rte_socket_id()) printf("WARNING, port %u is on remote NUMA node to " "polling thread.\n\tPerformance will " "not be optimal.\n", port); printf("\nCore %u forwarding packets. [Ctrl+C to quit]\n", rte_lcore_id()); for (;;) { struct rte_mbuf *bufs[BURST_SIZE]; const uint16_t nb_rx = rte_eth_rx_burst(port, 0, bufs, BURST_SIZE); uint16_t buf; if (unlikely(nb_rx == 0)) continue; for (buf = 0; buf < nb_rx; buf++) { struct rte_mbuf *mbuf = bufs[buf]; unsigned int len = rte_pktmbuf_data_len(mbuf); rte_pktmbuf_dump(stdout, mbuf, len); rte_pktmbuf_free(mbuf); } } }
uint16_t rx_pkt_hw(struct task_base *tbase, struct rte_mbuf ***mbufs) { uint8_t last_read_portid; uint16_t nb_rx; START_EMPTY_MEASSURE(); *mbufs = tbase->ws_mbuf->mbuf[0] + (RTE_ALIGN_CEIL(tbase->ws_mbuf->idx[0].prod, 2) & WS_MBUF_MASK); last_read_portid = tbase->rx_params_hw.last_read_portid; nb_rx = rte_eth_rx_burst(tbase->rx_params_hw.rx_pq[last_read_portid].port, tbase->rx_params_hw.rx_pq[last_read_portid].queue, *mbufs, MAX_PKT_BURST); ++tbase->rx_params_hw.last_read_portid; if (unlikely(tbase->rx_params_hw.last_read_portid == tbase->rx_params_hw.nb_rxports)) { tbase->rx_params_hw.last_read_portid = 0; } if (likely(nb_rx > 0)) { TASK_STATS_ADD_RX(&tbase->aux->stats, nb_rx); return nb_rx; } TASK_STATS_ADD_IDLE(&tbase->aux->stats, rte_rdtsc() - cur_tsc); return 0; }
static void l2sw_main_process(struct lcore_env *env) { struct rte_mbuf *pkt_burst[MAX_PKT_BURST]; uint8_t n_ports = rte_eth_dev_count(); unsigned lcore_id = rte_lcore_id(); uint64_t prev_tsc, diff_tsc, cur_tsc, timer_tsc; const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S * BURST_TX_DRAIN_US; //RTE_LOG(INFO, MARIO, "[%u] Starting main processing.\n", lcore_id); prev_tsc = 0; timer_tsc = 0; while(1) { cur_tsc = rte_rdtsc(); diff_tsc = cur_tsc - prev_tsc; if (unlikely(diff_tsc > drain_tsc)) { uint8_t port_id; for(port_id = 0; port_id < n_ports; port_id++) { if (env->tx_mbufs[port_id].len == 0) continue; l2sw_send_burst(env, port_id, env->tx_mbufs[port_id].len); env->tx_mbufs[port_id].len = 0; } /* if timer is enabled */ if (timer_period > 0) { /* advance the timer */ timer_tsc += diff_tsc; /* if timer has reached its timeout */ if (unlikely(timer_tsc >= (uint64_t) timer_period)) { /* do this only on master core */ if (lcore_id == rte_get_master_lcore()) { //print_stats(env); /* reset the timer */ timer_tsc = 0; } } } prev_tsc = cur_tsc; } /* RX */ uint8_t port_id; for (port_id = 0; port_id < n_ports; port_id++) { unsigned n_rx = rte_eth_rx_burst(port_id, lcore_id, pkt_burst, MAX_PKT_BURST); if (n_rx != 0) //RTE_LOG(INFO, MARIO, "[%u-%u] %u packet(s) came.\n", // lcore_id, port_id, n_rx); __sync_fetch_and_add(&port_statistics[port_id].rx, n_rx); ether_in(env, pkt_burst, n_rx, port_id); } } return ; }
void app_main_loop_rx(void) { uint32_t i; int ret; RTE_LOG(INFO, USER1, "Core %u is doing RX\n", rte_lcore_id()); for (i = 0; ; i = ((i + 1) & (app.n_ports - 1))) { uint16_t n_mbufs; n_mbufs = rte_eth_rx_burst( app.ports[i], 0, app.mbuf_rx.array, app.burst_size_rx_read); if (n_mbufs == 0) continue; do { ret = rte_ring_sp_enqueue_bulk( app.rings_rx[i], (void **) app.mbuf_rx.array, n_mbufs); } while (ret < 0); } }
/** * @brief RX routine */ void DPDKAdapter::rxRoutine() { uint8_t pkt = 0; uint8_t rxPktCount = 0; uint8_t devId = 0; uint8_t lcoreId = rte_lcore_id(); LcoreInfo& coreInfo = cores[lcoreId]; for(PortList_t::iterator itor = coreInfo.rxPortList.begin(); itor != coreInfo.rxPortList.end(); itor++) { devId = *itor; DeviceInfo& devInfo = devices[devId]; struct rte_eth_dev *dev = &rte_eth_devices[devId]; if(!dev || !dev->data->dev_started) { continue; } rxPktCount = rte_eth_rx_burst(devId, 0, devInfo.rxBurstBuf, DPDK_RX_MAX_PKT_BURST); if(isRxStarted(devId)) { saveToBuf(devId, devInfo.rxBurstBuf, rxPktCount); } for(pkt = 0; pkt < rxPktCount; pkt++) { rte_pktmbuf_free(devInfo.rxBurstBuf[pkt]); } } }
int pcap_next_ex(pcap_t *p, struct pcap_pkthdr **pkt_header, const u_char **pkt_data) { struct rte_mbuf* mbuf = NULL; int len = 0; if (p == NULL || pkt_header == NULL || pkt_data == NULL || p->deviceId < 0 || p->deviceId > RTE_MAX_ETHPORTS) { snprintf (errbuf_g, PCAP_ERRBUF_SIZE, "Invalid parameter"); return DPDKPCAP_FAILURE; } debug("Receiving a packet on port %d\n", p->deviceId); while (!rte_eth_rx_burst(p->deviceId, 0, &mbuf, 1)) { } len = rte_pktmbuf_pkt_len(mbuf); pktHeader_g.len = len; *pkt_header = &pktHeader_g; rte_memcpy((void*)data_g, rte_pktmbuf_mtod(mbuf, void*), len); *pkt_data = data_g; rte_pktmbuf_free(mbuf); return 1; }
static int rte_port_ethdev_reader_rx(void *port, struct rte_mbuf **pkts, uint32_t n_pkts) { struct rte_port_ethdev_reader *p = (struct rte_port_ethdev_reader *) port; return rte_eth_rx_burst(p->port_id, p->queue_id, pkts, n_pkts); }
/* * Softnic packet forward */ static void softnic_fwd(struct fwd_stream *fs) { struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; uint16_t nb_rx; uint16_t nb_tx; uint32_t retry; #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES uint64_t start_tsc; uint64_t end_tsc; uint64_t core_cycles; #endif #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES start_tsc = rte_rdtsc(); #endif /* Packets Receive */ nb_rx = rte_eth_rx_burst(fs->rx_port, fs->rx_queue, pkts_burst, nb_pkt_per_burst); fs->rx_packets += nb_rx; #ifdef RTE_TEST_PMD_RECORD_BURST_STATS fs->rx_burst_stats.pkt_burst_spread[nb_rx]++; #endif nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, nb_rx); /* Retry if necessary */ if (unlikely(nb_tx < nb_rx) && fs->retry_enabled) { retry = 0; while (nb_tx < nb_rx && retry++ < burst_tx_retry_num) { rte_delay_us(burst_tx_delay_time); nb_tx += rte_eth_tx_burst(fs->tx_port, fs->tx_queue, &pkts_burst[nb_tx], nb_rx - nb_tx); } } fs->tx_packets += nb_tx; #ifdef RTE_TEST_PMD_RECORD_BURST_STATS fs->tx_burst_stats.pkt_burst_spread[nb_tx]++; #endif if (unlikely(nb_tx < nb_rx)) { fs->fwd_dropped += (nb_rx - nb_tx); do { rte_pktmbuf_free(pkts_burst[nb_tx]); } while (++nb_tx < nb_rx); } #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES end_tsc = rte_rdtsc(); core_cycles = (end_tsc - start_tsc); fs->core_cycles = (uint64_t) (fs->core_cycles + core_cycles); #endif }
static int rte_port_ethdev_reader_rx(void *port, struct rte_mbuf **pkts, uint32_t n_pkts) { struct rte_port_ethdev_reader *p = (struct rte_port_ethdev_reader *) port; uint16_t rx_pkt_cnt; rx_pkt_cnt = rte_eth_rx_burst(p->port_id, p->queue_id, pkts, n_pkts); RTE_PORT_ETHDEV_READER_STATS_PKTS_IN_ADD(p, rx_pkt_cnt); return rx_pkt_cnt; }
/* * Polls receive queues added to the event adapter and enqueues received * packets to the event device. * * The receive code enqueues initially to a temporary buffer, the * temporary buffer is drained anytime it holds >= BATCH_SIZE packets * * If there isn't space available in the temporary buffer, packets from the * Rx queue aren't dequeued from the eth device, this back pressures the * eth device, in virtual device environments this back pressure is relayed to * the hypervisor's switching layer where adjustments can be made to deal with * it. */ static inline uint32_t eth_rx_poll(struct rte_event_eth_rx_adapter *rx_adapter) { uint32_t num_queue; uint16_t n; uint32_t nb_rx = 0; struct rte_mbuf *mbufs[BATCH_SIZE]; struct rte_eth_event_enqueue_buffer *buf; uint32_t wrr_pos; uint32_t max_nb_rx; wrr_pos = rx_adapter->wrr_pos; max_nb_rx = rx_adapter->max_nb_rx; buf = &rx_adapter->event_enqueue_buffer; struct rte_event_eth_rx_adapter_stats *stats = &rx_adapter->stats; /* Iterate through a WRR sequence */ for (num_queue = 0; num_queue < rx_adapter->wrr_len; num_queue++) { unsigned int poll_idx = rx_adapter->wrr_sched[wrr_pos]; uint16_t qid = rx_adapter->eth_rx_poll[poll_idx].eth_rx_qid; uint8_t d = rx_adapter->eth_rx_poll[poll_idx].eth_dev_id; /* Don't do a batch dequeue from the rx queue if there isn't * enough space in the enqueue buffer. */ if (buf->count >= BATCH_SIZE) flush_event_buffer(rx_adapter); if (BATCH_SIZE > (ETH_EVENT_BUFFER_SIZE - buf->count)) break; stats->rx_poll_count++; n = rte_eth_rx_burst(d, qid, mbufs, BATCH_SIZE); if (n) { stats->rx_packets += n; /* The check before rte_eth_rx_burst() ensures that * all n mbufs can be buffered */ fill_event_buffer(rx_adapter, d, qid, mbufs, n); nb_rx += n; if (nb_rx > max_nb_rx) { rx_adapter->wrr_pos = (wrr_pos + 1) % rx_adapter->wrr_len; return nb_rx; } } if (++wrr_pos == rx_adapter->wrr_len) wrr_pos = 0; } return nb_rx; }
static int test_stats_reset(void) { struct rte_eth_stats stats; struct rte_mbuf buf, *pbuf = &buf; printf("Testing ring PMD stats reset\n"); rte_eth_stats_reset(RXTX_PORT); /* check stats of RXTX port, should all be zero */ rte_eth_stats_get(RXTX_PORT, &stats); if (stats.ipackets != 0 || stats.opackets != 0 || stats.ibytes != 0 || stats.obytes != 0 || stats.ierrors != 0 || stats.oerrors != 0) { printf("Error: RXTX port stats are not zero\n"); return -1; } /* send and receive 1 packet and check for stats update */ if (rte_eth_tx_burst(RXTX_PORT, 0, &pbuf, 1) != 1) { printf("Error sending packet to RXTX port\n"); return -1; } if (rte_eth_rx_burst(RXTX_PORT, 0, &pbuf, 1) != 1) { printf("Error receiving packet from RXTX port\n"); return -1; } rte_eth_stats_get(RXTX_PORT, &stats); if (stats.ipackets != 1 || stats.opackets != 1 || stats.ibytes != 0 || stats.obytes != 0 || stats.ierrors != 0 || stats.oerrors != 0) { printf("Error: RXTX port stats are not as expected\n"); return -1; } rte_eth_stats_reset(RXTX_PORT); /* check stats of RXTX port, should all be zero */ rte_eth_stats_get(RXTX_PORT, &stats); if (stats.ipackets != 0 || stats.opackets != 0 || stats.ibytes != 0 || stats.obytes != 0 || stats.ierrors != 0 || stats.oerrors != 0) { printf("Error: RXTX port stats are not zero\n"); return -1; } return 0; }
/* * The lcore main. This is the main thread that does the work, reading from * an input port and writing to an output port. */ static __attribute__((noreturn)) void lcore_main(void) { const uint8_t nb_ports = rte_eth_dev_count(); uint8_t port; /* * Check that the port is on the same NUMA node as the polling thread * for best performance. */ for (port = 0; port < nb_ports; port++) if (rte_eth_dev_socket_id(port) > 0 && rte_eth_dev_socket_id(port) != (int)rte_socket_id()) printf("WARNING, port %u is on remote NUMA node to " "polling thread.\n\tPerformance will " "not be optimal.\n", port); printf("\nCore %u forwarding packets. [Ctrl+C to quit]\n", rte_lcore_id()); /* Run until the application is quit or killed. */ for (;;) { /* * Receive packets on a port and forward them on the paired * port. The mapping is 0 -> 1, 1 -> 0, 2 -> 3, 3 -> 2, etc. */ for (port = 0; port < nb_ports; port++) { /* Get burst of RX packets, from first port of pair. */ struct rte_mbuf *bufs[BURST_SIZE]; const uint16_t nb_rx = rte_eth_rx_burst(port, 0, bufs, BURST_SIZE); if (unlikely(nb_rx == 0)) continue; /* Send burst of TX packets, to second port of pair. */ const uint16_t nb_tx = rte_eth_tx_burst(port ^ 1, 0, bufs, nb_rx); /* Free any unsent packets. */ if (unlikely(nb_tx < nb_rx)) { uint16_t buf; for (buf = nb_tx; buf < nb_rx; buf++) rte_pktmbuf_free(bufs[buf]); } } } }
// FIXME: link speed is hardcoded to 10gbit (but not really relevant for this use case where you should have only one packet anyways) // this is only optimized for latency measurements/timestamping, not packet capture // packet capturing would benefit from running the whole rx thread in C to avoid gc/jit pauses uint16_t receive_with_timestamps_software(uint8_t port_id, uint16_t queue_id, struct rte_mbuf* rx_pkts[], uint16_t nb_pkts, uint64_t timestamps[]) { uint32_t cycles_per_byte = rte_get_tsc_hz() / 10000000.0 / 0.8; while (is_running()) { uint64_t tsc = read_rdtsc(); uint16_t rx = rte_eth_rx_burst(port_id, queue_id, rx_pkts, nb_pkts); uint16_t prev_pkt_size = 0; for (int i = 0; i < rx; i++) { timestamps[i] = tsc + prev_pkt_size * cycles_per_byte; prev_pkt_size = rx_pkts[i]->pkt_len + 24; } if (rx > 0) { return rx; } } return 0; }
/** * This thread receives mbufs from the port and affects them an internal * sequence number to keep track of their order of arrival through an * mbuf structure. * The mbufs are then passed to the worker threads via the rx_to_workers * ring. */ static int rx_thread(struct rte_ring *ring_out) { const uint8_t nb_ports = rte_eth_dev_count(); uint32_t seqn = 0; uint16_t i, ret = 0; uint16_t nb_rx_pkts; uint8_t port_id; struct rte_mbuf *pkts[MAX_PKTS_BURST]; RTE_LOG(INFO, REORDERAPP, "%s() started on lcore %u\n", __func__, rte_lcore_id()); while (!quit_signal) { for (port_id = 0; port_id < nb_ports; port_id++) { if ((portmask & (1 << port_id)) != 0) { /* receive packets */ nb_rx_pkts = rte_eth_rx_burst(port_id, 0, pkts, MAX_PKTS_BURST); if (nb_rx_pkts == 0) { LOG_DEBUG(REORDERAPP, "%s():Received zero packets\n", __func__); continue; } app_stats.rx.rx_pkts += nb_rx_pkts; /* mark sequence number */ for (i = 0; i < nb_rx_pkts; ) pkts[i++]->seqn = seqn++; /* enqueue to rx_to_workers ring */ ret = rte_ring_enqueue_burst(ring_out, (void *) pkts, nb_rx_pkts); app_stats.rx.enqueue_pkts += ret; if (unlikely(ret < nb_rx_pkts)) { app_stats.rx.enqueue_failed_pkts += (nb_rx_pkts-ret); pktmbuf_free_bulk(&pkts[ret], nb_rx_pkts - ret); } } } } return 0; }
/* * The lcore main. This is the main thread that does the work, reading from * an input port and flooding in other ports. */ static __attribute__((noreturn)) void lcore_main(void) { const uint8_t nb_ports = rte_eth_dev_count(); uint8_t port; int i; /* * Check that the port is on the same NUMA node as the polling thread * for best performance. */ for (port = 0; port < nb_ports; port++) if (rte_eth_dev_socket_id(port) > 0 && rte_eth_dev_socket_id(port) != (int)rte_socket_id()) printf("WARNING, port %u is on remote NUMA node to " "polling thread.\n\tPerformance will " "not be optimal.\n", port); printf("\nCore %u forwarding packets. [Ctrl+C to quit]\n", rte_lcore_id()); /* Run until the application is quit or killed. */ for (;;) { /* * Receive packets on a port will be flooded in other ports */ for (port = 0; port < nb_ports; port++) { /* Get burst of RX packets, from first port of pair. */ struct rte_mbuf *bufs[BURST_SIZE]; const uint16_t nb_rx = rte_eth_rx_burst(port, 0, bufs, BURST_SIZE); if (unlikely(nb_rx == 0)) continue; for (i = 0; i < nb_rx; i++) { /* rte_pktmbuf_dump(stdout, bufs[i], 16); */ flood_forward(bufs[i], port, nb_ports); } } /* Send out packets from TX queues */ send_timeout_burst(); } }
/* * Get mbuf off of interface, copy it into memory provided by the * TCP/IP stack. TODO: share TCP/IP stack mbufs with DPDK mbufs to avoid * data copy. */ int rumpcomp_virtif_recv(struct virtif_user *viu, void *data, size_t dlen, size_t *rcvp) { void *cookie = rumpuser_component_unschedule(); uint8_t *p = data; struct rte_mbuf *m, *m0; struct rte_pktmbuf *mp; int nb_rx, rv; for (;;) { nb_rx = rte_eth_rx_burst(IF_PORTID, 0, &m, 1); if (nb_rx) { assert(nb_rx == 1); mp = &m->pkt; if (mp->pkt_len > dlen) { /* for now, just drop packets we can't handle */ printf("warning: virtif recv packet too big " "%d vs. %zu\n", mp->pkt_len, dlen); rte_pktmbuf_free(m); continue; } *rcvp = mp->pkt_len; m0 = m; do { mp = &m->pkt; memcpy(p, mp->data, mp->data_len); p += mp->data_len; } while ((m = mp->next) != NULL); rte_pktmbuf_free(m0); rv = 0; break; } else { usleep(10000); /* XXX: don't 100% busyloop */ } } rumpuser_component_schedule(cookie); return rv; }
int pollRxRings(void *arg) { int i, count = rte_eth_dev_count(); struct rte_mbuf* rxPkts[32]; while (!stopRxPoll_) { for (i = 0; i < count; i++) { int n = rte_eth_rx_burst(i, 0, // Queue# rxPkts, sizeof(rxPkts)/sizeof(rxPkts[0])); pkts += n; for (int j = 0; j < n; j++) rte_pktmbuf_free(rxPkts[j]); } } qDebug("DPDK Rx polling stopped"); return 0; }
/** * A call to rx_sync_ring will try to fill a Netmap RX ring with as many * packets as it can hold coming from its dpdk port. */ static inline int rx_sync_ring(struct netmap_ring *ring, uint8_t port, uint16_t ring_number, uint16_t max_burst) { int32_t i, n_rx; uint16_t burst_size; uint32_t cur_slot, n_free_slots; struct rte_mbuf *rx_mbufs[COMPAT_NETMAP_MAX_BURST]; n_free_slots = ring->num_slots - (ring->avail + ring->reserved); n_free_slots = RTE_MIN(n_free_slots, max_burst); cur_slot = (ring->cur + ring->avail) & (ring->num_slots - 1); while (n_free_slots) { burst_size = (uint16_t)RTE_MIN(n_free_slots, RTE_DIM(rx_mbufs)); /* receive up to burst_size packets from the NIC's queue */ n_rx = rte_eth_rx_burst(port, ring_number, rx_mbufs, burst_size); if (n_rx == 0) return 0; if (unlikely(n_rx < 0)) return -1; /* Put those n_rx packets in the Netmap structures */ for (i = 0; i < n_rx ; i++) { mbuf_to_slot(rx_mbufs[i], ring, cur_slot); rte_pktmbuf_free(rx_mbufs[i]); cur_slot = NETMAP_RING_NEXT(ring, cur_slot); } /* Update the Netmap ring structure to reflect the change */ ring->avail += n_rx; n_free_slots -= n_rx; } return 0; }
/* * Main thread that does the work, reading from INPUT_PORT * and writing to OUTPUT_PORT */ static __attribute__((noreturn)) int lcore_main(void *arg) { const uintptr_t core_num = (uintptr_t)arg; const unsigned num_cores = rte_lcore_count(); uint16_t startQueue = (uint16_t)(core_num * (NUM_QUEUES/num_cores)); uint16_t endQueue = (uint16_t)(startQueue + (NUM_QUEUES/num_cores)); uint16_t q, i, p; printf("Core %u(lcore %u) reading queues %i-%i\n", (unsigned)core_num, rte_lcore_id(), startQueue, endQueue - 1); for (;;) { struct rte_mbuf *buf[32]; const uint16_t buf_size = sizeof(buf) / sizeof(buf[0]); for (p = 0; p < num_ports; p++) { const uint8_t src = ports[p]; const uint8_t dst = ports[p ^ 1]; /* 0 <-> 1, 2 <-> 3 etc */ if ((src == INVALID_PORT_ID) || (dst == INVALID_PORT_ID)) continue; for (q = startQueue; q < endQueue; q++) { const uint16_t rxCount = rte_eth_rx_burst(src, q, buf, buf_size); if (rxCount == 0) continue; rxPackets[q] += rxCount; const uint16_t txCount = rte_eth_tx_burst(dst, (uint16_t)core_num, buf, rxCount); if (txCount != rxCount) { for (i = txCount; i < rxCount; i++) rte_pktmbuf_free(buf[i]); } } } } }
static uint64_t measure_txonly(struct lcore_conf *conf, struct rte_mbuf *pkts_burst[], uint64_t total_pkts) { unsigned i, portid, nb_rx, nb_tx; uint64_t diff_tsc, cur_tsc; printf("do tx measure\n"); diff_tsc = 0; while (likely(!stop)) { for (i = 0; i < conf->nb_ports; i++) { portid = conf->portlist[i]; nb_rx = rte_eth_rx_burst((uint8_t) portid, 0, pkts_burst, MAX_PKT_BURST); if (unlikely(nb_rx == 0)) { idle++; continue; } count += nb_rx; cur_tsc = rte_rdtsc(); nb_tx = rte_eth_tx_burst(portid, 0, pkts_burst, nb_rx); if (unlikely(nb_tx < nb_rx)) { drop += (nb_rx - nb_tx); do { rte_pktmbuf_free(pkts_burst[nb_tx]); } while (++nb_tx < nb_rx); } diff_tsc += rte_rdtsc() - cur_tsc; } if (unlikely(count >= total_pkts)) break; } return diff_tsc; }
static void * receiver(void *arg) { struct virtif_user *viu = arg; /* step 1: this newly created host thread needs a rump kernel context */ rumpuser_component_kthread(); /* step 2: deliver packets until interface is decommissioned */ while (!viu->viu_dying) { /* we have cached frames. schedule + deliver */ if (viu->viu_nbufpkts > 0) { rumpuser_component_schedule(NULL); while (viu->viu_nbufpkts > 0) { deliverframe(viu); } rumpuser_component_unschedule(); } /* none cached. ok, try to get some */ if (viu->viu_nbufpkts == 0) { viu->viu_nbufpkts = rte_eth_rx_burst(IF_PORTID, 0, viu->viu_m_pkts, MAX_PKT_BURST); viu->viu_bufidx = 0; } if (viu->viu_nbufpkts == 0) { /* * For now, don't ultrabusyloop. * I don't have an overabundance of * spare cores in my vm. */ usleep(10000); } } return NULL; }
void poll_receiver(struct receiver_t *receiver) { const uint16_t port = receiver->in_port; struct rte_mbuf **pkts_burst = receiver->burst_buffer; uint64_t start_a = rte_get_tsc_cycles(); uint64_t nb_rx = rte_eth_rx_burst((uint8_t) port, 0, pkts_burst, BURST_SIZE); if (nb_rx > 0) { receiver->time_b += rte_get_tsc_cycles() - start_a; } receiver->pkts_received += nb_rx; if (nb_rx != 0) { receiver->nb_polls++; } receiver->nb_rec += nb_rx; for (unsigned h_index = 0; h_index < receiver->nb_handler; ++h_index) { /* handover packet to handler. */ receiver->handler[h_index](receiver->args[h_index], pkts_burst, nb_rx); } for (unsigned p_index = 0; p_index < nb_rx; ++p_index) { // rte_pktmbuf_free(pkts_burst[p_index]); // if (rte_mbuf_refcnt_read(pkts_burst[p_index]) > 1) { // rte_mbuf_refcnt_update(pkts_burst[p_index], -1); // } else { rte_pktmbuf_free(pkts_burst[p_index]); // } } if (nb_rx > 0) { receiver->time_a += rte_get_tsc_cycles() - start_a; receiver->nb_measurements += nb_rx; } }
/** * Receive packet from ethernet driver and queueing into worker queue. * This function is called from I/O (Input) thread. */ static inline void app_lcore_io_rx( struct app_lcore_params_io *lp, uint32_t n_workers, uint32_t bsz_rd, uint32_t bsz_wr) { struct rte_mbuf *mbuf_1_0, *mbuf_1_1, *mbuf_2_0, *mbuf_2_1; uint32_t i, fifoness; fifoness = app.fifoness; for (i = 0; i < lp->rx.n_nic_queues; i++) { uint8_t portid = lp->rx.nic_queues[i].port; uint8_t queue = lp->rx.nic_queues[i].queue; uint32_t n_mbufs, j; if (unlikely(lp->rx.nic_queues[i].enabled != true)) { continue; } n_mbufs = rte_eth_rx_burst(portid, queue, lp->rx.mbuf_in.array, (uint16_t) bsz_rd); if (unlikely(n_mbufs == 0)) { continue; } #if APP_STATS lp->rx.nic_queues_iters[i] ++; lp->rx.nic_queues_count[i] += n_mbufs; if (unlikely(lp->rx.nic_queues_iters[i] == APP_STATS)) { struct rte_eth_stats stats; unsigned lcore = rte_lcore_id(); rte_eth_stats_get(portid, &stats); printf("I/O RX %u in (NIC port %u): NIC drop ratio = %.2f avg burst size = %.2f\n", lcore, (unsigned) portid, (double) stats.ierrors / (double) (stats.ierrors + stats.ipackets), ((double) lp->rx.nic_queues_count[i]) / ((double) lp->rx.nic_queues_iters[i])); lp->rx.nic_queues_iters[i] = 0; lp->rx.nic_queues_count[i] = 0; } #endif #if APP_IO_RX_DROP_ALL_PACKETS for (j = 0; j < n_mbufs; j ++) { struct rte_mbuf *pkt = lp->rx.mbuf_in.array[j]; rte_pktmbuf_free(pkt); } continue; #endif mbuf_1_0 = lp->rx.mbuf_in.array[0]; mbuf_1_1 = lp->rx.mbuf_in.array[1]; mbuf_2_0 = lp->rx.mbuf_in.array[2]; mbuf_2_1 = lp->rx.mbuf_in.array[3]; APP_IO_RX_PREFETCH0(mbuf_2_0); APP_IO_RX_PREFETCH0(mbuf_2_1); for (j = 0; j + 3 < n_mbufs; j += 2) { struct rte_mbuf *mbuf_0_0, *mbuf_0_1; uint32_t worker_0, worker_1; mbuf_0_0 = mbuf_1_0; mbuf_0_1 = mbuf_1_1; mbuf_1_0 = mbuf_2_0; mbuf_1_1 = mbuf_2_1; mbuf_2_0 = lp->rx.mbuf_in.array[j+4]; mbuf_2_1 = lp->rx.mbuf_in.array[j+5]; APP_IO_RX_PREFETCH0(mbuf_2_0); APP_IO_RX_PREFETCH0(mbuf_2_1); switch (fifoness) { case FIFONESS_FLOW: #ifdef __SSE4_2__ worker_0 = rte_hash_crc(rte_pktmbuf_mtod(mbuf_0_0, void *), sizeof(ETHER_HDR) + 2, portid) % n_workers; worker_1 = rte_hash_crc(rte_pktmbuf_mtod(mbuf_0_1, void *), sizeof(ETHER_HDR) + 2, portid) % n_workers; #else worker_0 = CityHash64WithSeed(rte_pktmbuf_mtod(mbuf_0_0, void *), sizeof(ETHER_HDR) + 2, portid) % n_workers; worker_1 = CityHash64WithSeed(rte_pktmbuf_mtod(mbuf_0_1, void *), sizeof(ETHER_HDR) + 2, portid) % n_workers; #endif /* __SSE4_2__ */ break; case FIFONESS_PORT: worker_0 = worker_1 = portid % n_workers; break; case FIFONESS_NONE: default: worker_0 = j % n_workers; worker_1 = (j + 1) % n_workers; break; } app_lcore_io_rx_buffer_to_send(lp, worker_0, mbuf_0_0, bsz_wr); app_lcore_io_rx_buffer_to_send(lp, worker_1, mbuf_0_1, bsz_wr); } /* * Handle the last 1, 2 (when n_mbufs is even) or * 3 (when n_mbufs is odd) packets */ for ( ; j < n_mbufs; j += 1) { struct rte_mbuf *mbuf; uint32_t worker; mbuf = mbuf_1_0; mbuf_1_0 = mbuf_1_1; mbuf_1_1 = mbuf_2_0; mbuf_2_0 = mbuf_2_1; APP_IO_RX_PREFETCH0(mbuf_1_0); switch (fifoness) { case FIFONESS_FLOW: #ifdef __SSE4_2__ worker = rte_hash_crc(rte_pktmbuf_mtod(mbuf, void *), sizeof(ETHER_HDR) + 2, portid) % n_workers; #else worker = CityHash64WithSeed(rte_pktmbuf_mtod(mbuf, void *), sizeof(ETHER_HDR) + 2, portid) % n_workers; #endif /* __SSE4_2__ */ break; case FIFONESS_PORT: worker = portid % n_workers; break; case FIFONESS_NONE: default: worker = j % n_workers; break; } app_lcore_io_rx_buffer_to_send(lp, worker, mbuf, bsz_wr); } } }
/* * Forwarding of packets in MAC mode. * Change the source and the destination Ethernet addressed of packets * before forwarding them. */ static void pkt_burst_mac_forward(struct fwd_stream *fs) { struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; struct rte_port *txp; struct rte_mbuf *mb; struct ether_hdr *eth_hdr; uint16_t nb_rx; uint16_t nb_tx; uint16_t i; #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES uint64_t start_tsc; uint64_t end_tsc; uint64_t core_cycles; #endif #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES start_tsc = rte_rdtsc(); #endif /* * Receive a burst of packets and forward them. */ nb_rx = rte_eth_rx_burst(fs->rx_port, fs->rx_queue, pkts_burst, nb_pkt_per_burst); if (unlikely(nb_rx == 0)) return; #ifdef RTE_TEST_PMD_RECORD_BURST_STATS fs->rx_burst_stats.pkt_burst_spread[nb_rx]++; #endif fs->rx_packets += nb_rx; txp = &ports[fs->tx_port]; for (i = 0; i < nb_rx; i++) { mb = pkts_burst[i]; eth_hdr = (struct ether_hdr *) mb->pkt.data; ether_addr_copy(&peer_eth_addrs[fs->peer_addr], ð_hdr->d_addr); ether_addr_copy(&ports[fs->tx_port].eth_addr, ð_hdr->s_addr); mb->ol_flags = txp->tx_ol_flags; mb->pkt.vlan_macip.f.l2_len = sizeof(struct ether_hdr); mb->pkt.vlan_macip.f.l3_len = sizeof(struct ipv4_hdr); mb->pkt.vlan_macip.f.vlan_tci = txp->tx_vlan_id; } nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, nb_rx); fs->tx_packets += nb_tx; #ifdef RTE_TEST_PMD_RECORD_BURST_STATS fs->tx_burst_stats.pkt_burst_spread[nb_tx]++; #endif if (unlikely(nb_tx < nb_rx)) { fs->fwd_dropped += (nb_rx - nb_tx); do { rte_pktmbuf_free(pkts_burst[nb_tx]); } while (++nb_tx < nb_rx); } #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES end_tsc = rte_rdtsc(); core_cycles = (end_tsc - start_tsc); fs->core_cycles = (uint64_t) (fs->core_cycles + core_cycles); #endif }
/* * Receive a burst of packets, lookup for ICMP echo requets, and, if any, * send back ICMP echo replies. */ static void reply_to_icmp_echo_rqsts(struct fwd_stream *fs) { struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; struct rte_mbuf *pkt; struct ether_hdr *eth_h; struct vlan_hdr *vlan_h; struct arp_hdr *arp_h; struct ipv4_hdr *ip_h; struct icmp_hdr *icmp_h; struct ether_addr eth_addr; uint32_t ip_addr; uint16_t nb_rx; uint16_t nb_tx; uint16_t nb_replies; uint16_t eth_type; uint16_t vlan_id; uint16_t arp_op; uint16_t arp_pro; uint8_t i; int l2_len; #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES uint64_t start_tsc; uint64_t end_tsc; uint64_t core_cycles; #endif #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES start_tsc = rte_rdtsc(); #endif /* * First, receive a burst of packets. */ nb_rx = rte_eth_rx_burst(fs->rx_port, fs->rx_queue, pkts_burst, nb_pkt_per_burst); if (unlikely(nb_rx == 0)) return; #ifdef RTE_TEST_PMD_RECORD_BURST_STATS fs->rx_burst_stats.pkt_burst_spread[nb_rx]++; #endif fs->rx_packets += nb_rx; nb_replies = 0; for (i = 0; i < nb_rx; i++) { pkt = pkts_burst[i]; eth_h = (struct ether_hdr *) pkt->pkt.data; eth_type = RTE_BE_TO_CPU_16(eth_h->ether_type); l2_len = sizeof(struct ether_hdr); if (verbose_level > 0) { printf("\nPort %d pkt-len=%u nb-segs=%u\n", fs->rx_port, pkt->pkt.pkt_len, pkt->pkt.nb_segs); ether_addr_dump(" ETH: src=", ð_h->s_addr); ether_addr_dump(" dst=", ð_h->d_addr); } if (eth_type == ETHER_TYPE_VLAN) { vlan_h = (struct vlan_hdr *) ((char *)eth_h + sizeof(struct ether_hdr)); l2_len += sizeof(struct vlan_hdr); eth_type = rte_be_to_cpu_16(vlan_h->eth_proto); if (verbose_level > 0) { vlan_id = rte_be_to_cpu_16(vlan_h->vlan_tci) & 0xFFF; printf(" [vlan id=%u]", vlan_id); } } if (verbose_level > 0) { printf(" type=0x%04x\n", eth_type); } /* Reply to ARP requests */ if (eth_type == ETHER_TYPE_ARP) { arp_h = (struct arp_hdr *) ((char *)eth_h + l2_len); arp_op = RTE_BE_TO_CPU_16(arp_h->arp_op); arp_pro = RTE_BE_TO_CPU_16(arp_h->arp_pro); if (verbose_level > 0) { printf(" ARP: hrd=%d proto=0x%04x hln=%d " "pln=%d op=%u (%s)\n", RTE_BE_TO_CPU_16(arp_h->arp_hrd), arp_pro, arp_h->arp_hln, arp_h->arp_pln, arp_op, arp_op_name(arp_op)); } if ((RTE_BE_TO_CPU_16(arp_h->arp_hrd) != ARP_HRD_ETHER) || (arp_pro != ETHER_TYPE_IPv4) || (arp_h->arp_hln != 6) || (arp_h->arp_pln != 4) ) { rte_pktmbuf_free(pkt); if (verbose_level > 0) printf("\n"); continue; } if (verbose_level > 0) { memcpy(ð_addr, arp_h->arp_data.arp_ip.arp_sha, 6); ether_addr_dump(" sha=", ð_addr); memcpy(&ip_addr, arp_h->arp_data.arp_ip.arp_sip, 4); ipv4_addr_dump(" sip=", ip_addr); printf("\n"); memcpy(ð_addr, arp_h->arp_data.arp_ip.arp_tha, 6); ether_addr_dump(" tha=", ð_addr); memcpy(&ip_addr, arp_h->arp_data.arp_ip.arp_tip, 4); ipv4_addr_dump(" tip=", ip_addr); printf("\n"); } if (arp_op != ARP_OP_REQUEST) { rte_pktmbuf_free(pkt); continue; } /* * Build ARP reply. */ /* Use source MAC address as destination MAC address. */ ether_addr_copy(ð_h->s_addr, ð_h->d_addr); /* Set source MAC address with MAC address of TX port */ ether_addr_copy(&ports[fs->tx_port].eth_addr, ð_h->s_addr); arp_h->arp_op = rte_cpu_to_be_16(ARP_OP_REPLY); memcpy(ð_addr, arp_h->arp_data.arp_ip.arp_tha, 6); memcpy(arp_h->arp_data.arp_ip.arp_tha, arp_h->arp_data.arp_ip.arp_sha, 6); memcpy(arp_h->arp_data.arp_ip.arp_sha, ð_h->s_addr, 6); /* Swap IP addresses in ARP payload */ memcpy(&ip_addr, arp_h->arp_data.arp_ip.arp_sip, 4); memcpy(arp_h->arp_data.arp_ip.arp_sip, arp_h->arp_data.arp_ip.arp_tip, 4); memcpy(arp_h->arp_data.arp_ip.arp_tip, &ip_addr, 4); pkts_burst[nb_replies++] = pkt; continue; } if (eth_type != ETHER_TYPE_IPv4) { rte_pktmbuf_free(pkt); continue; } ip_h = (struct ipv4_hdr *) ((char *)eth_h + l2_len); if (verbose_level > 0) { ipv4_addr_dump(" IPV4: src=", ip_h->src_addr); ipv4_addr_dump(" dst=", ip_h->dst_addr); printf(" proto=%d (%s)\n", ip_h->next_proto_id, ip_proto_name(ip_h->next_proto_id)); } /* * Check if packet is a ICMP echo request. */ icmp_h = (struct icmp_hdr *) ((char *)ip_h + sizeof(struct ipv4_hdr)); if (! ((ip_h->next_proto_id == IPPROTO_ICMP) && (icmp_h->icmp_type == IP_ICMP_ECHO_REQUEST) && (icmp_h->icmp_code == 0))) { rte_pktmbuf_free(pkt); continue; } if (verbose_level > 0) printf(" ICMP: echo request seq id=%d\n", rte_be_to_cpu_16(icmp_h->icmp_seq_nb)); /* * Prepare ICMP echo reply to be sent back. * - switch ethernet source and destinations addresses, * - switch IPv4 source and destinations addresses, * - set IP_ICMP_ECHO_REPLY in ICMP header. * No need to re-compute the IP header checksum. * Reset ICMP checksum. */ ether_addr_copy(ð_h->s_addr, ð_addr); ether_addr_copy(ð_h->d_addr, ð_h->s_addr); ether_addr_copy(ð_addr, ð_h->d_addr); ip_addr = ip_h->src_addr; ip_h->src_addr = ip_h->dst_addr; ip_h->dst_addr = ip_addr; icmp_h->icmp_type = IP_ICMP_ECHO_REPLY; icmp_h->icmp_cksum = 0; pkts_burst[nb_replies++] = pkt; } /* Send back ICMP echo replies, if any. */ if (nb_replies > 0) { nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, nb_replies); fs->tx_packets += nb_tx; #ifdef RTE_TEST_PMD_RECORD_BURST_STATS fs->tx_burst_stats.pkt_burst_spread[nb_tx]++; #endif if (unlikely(nb_tx < nb_replies)) { fs->fwd_dropped += (nb_replies - nb_tx); do { rte_pktmbuf_free(pkts_burst[nb_tx]); } while (++nb_tx < nb_replies); } } #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES end_tsc = rte_rdtsc(); core_cycles = (end_tsc - start_tsc); fs->core_cycles = (uint64_t) (fs->core_cycles + core_cycles); #endif }