/* Perform a sanity test of the distributor with a large number of packets, * where we allocate a new set of mbufs for each burst. The workers then * free the mbufs. This ensures that we don't have any packet leaks in the * library. */ static int sanity_test_with_worker_shutdown(struct rte_distributor *d, struct rte_mempool *p) { struct rte_mbuf *bufs[BURST]; unsigned i; printf("=== Sanity test of worker shutdown ===\n"); clear_packet_count(); if (rte_mempool_get_bulk(p, (void *)bufs, BURST) != 0) { printf("line %d: Error getting mbufs from pool\n", __LINE__); return -1; } /* now set all hash values in all buffers to zero, so all pkts go to the * one worker thread */ for (i = 0; i < BURST; i++) bufs[i]->pkt.hash.rss = 0; rte_distributor_process(d, bufs, BURST); /* at this point, we will have processed some packets and have a full * backlog for the other ones at worker 0. */ /* get more buffers to queue up, again setting them to the same flow */ if (rte_mempool_get_bulk(p, (void *)bufs, BURST) != 0) { printf("line %d: Error getting mbufs from pool\n", __LINE__); return -1; } for (i = 0; i < BURST; i++) bufs[i]->pkt.hash.rss = 0; /* get worker zero to quit */ zero_quit = 1; rte_distributor_process(d, bufs, BURST); /* flush the distributor */ rte_distributor_flush(d); if (total_packet_count() != BURST * 2) { printf("Line %d: Error, not all packets flushed. " "Expected %u, got %u\n", __LINE__, BURST * 2, total_packet_count()); return -1; } for (i = 0; i < rte_lcore_count() - 1; i++) printf("Worker %u handled %u packets\n", i, worker_stats[i].handled_packets); printf("Sanity test with worker shutdown passed\n\n"); return 0; }
/* Perform a sanity test of the distributor with a large number of packets, * where we allocate a new set of mbufs for each burst. The workers then * free the mbufs. This ensures that we don't have any packet leaks in the * library. */ static int sanity_test_with_mbuf_alloc(struct rte_distributor *d, struct rte_mempool *p) { unsigned i; struct rte_mbuf *bufs[BURST]; printf("=== Sanity test with mbuf alloc/free ===\n"); clear_packet_count(); for (i = 0; i < ((1<<ITER_POWER)); i += BURST) { unsigned j; while (rte_mempool_get_bulk(p, (void *)bufs, BURST) < 0) rte_distributor_process(d, NULL, 0); for (j = 0; j < BURST; j++) { bufs[j]->pkt.hash.rss = (i+j) << 1; bufs[j]->refcnt = 1; } rte_distributor_process(d, bufs, BURST); } rte_distributor_flush(d); if (total_packet_count() < (1<<ITER_POWER)) { printf("Line %u: Packet count is incorrect, %u, expected %u\n", __LINE__, total_packet_count(), (1<<ITER_POWER)); return -1; } printf("Sanity test with mbuf alloc/free passed\n\n"); return 0; }
/* allocate and build ops (no free) */ static int pmd_cyclecount_build_ops(struct pmd_cyclecount_state *state, uint32_t iter_ops_needed, uint16_t test_burst_size) { uint32_t cur_iter_op; uint32_t imix_idx = 0; for (cur_iter_op = 0; cur_iter_op < iter_ops_needed; cur_iter_op += test_burst_size) { uint32_t burst_size = RTE_MIN( iter_ops_needed - cur_iter_op, test_burst_size); struct rte_crypto_op **ops = &state->ctx->ops[cur_iter_op]; /* Allocate objects containing crypto operations and mbufs */ if (rte_mempool_get_bulk(state->ctx->pool, (void **)ops, burst_size) != 0) { RTE_LOG(ERR, USER1, "Failed to allocate more crypto operations " "from the crypto operation pool.\n" "Consider increasing the pool size " "with --pool-sz\n"); return -1; } /* Setup crypto op, attach mbuf etc */ (state->ctx->populate_ops)(ops, state->ctx->src_buf_offset, state->ctx->dst_buf_offset, burst_size, state->ctx->sess, state->opts, state->ctx->test_vector, iv_offset, &imix_idx); } return 0; }
static inline void i40e_rxq_rearm(struct i40e_rx_queue *rxq) { int i; uint16_t rx_id; volatile union i40e_rx_desc *rxdp; struct i40e_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start]; struct rte_mbuf *mb0, *mb1; uint64x2_t dma_addr0, dma_addr1; uint64x2_t zero = vdupq_n_u64(0); uint64_t paddr; rxdp = rxq->rx_ring + rxq->rxrearm_start; /* Pull 'n' more MBUFs into the software ring */ if (unlikely(rte_mempool_get_bulk(rxq->mp, (void *)rxep, RTE_I40E_RXQ_REARM_THRESH) < 0)) { if (rxq->rxrearm_nb + RTE_I40E_RXQ_REARM_THRESH >= rxq->nb_rx_desc) { for (i = 0; i < RTE_I40E_DESCS_PER_LOOP; i++) { rxep[i].mbuf = &rxq->fake_mbuf; vst1q_u64((uint64_t *)&rxdp[i].read, zero); } } rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed += RTE_I40E_RXQ_REARM_THRESH; return; } /* Initialize the mbufs in vector, process 2 mbufs in one loop */ for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH; i += 2, rxep += 2) { mb0 = rxep[0].mbuf; mb1 = rxep[1].mbuf; paddr = mb0->buf_iova + RTE_PKTMBUF_HEADROOM; dma_addr0 = vdupq_n_u64(paddr); /* flush desc with pa dma_addr */ vst1q_u64((uint64_t *)&rxdp++->read, dma_addr0); paddr = mb1->buf_iova + RTE_PKTMBUF_HEADROOM; dma_addr1 = vdupq_n_u64(paddr); vst1q_u64((uint64_t *)&rxdp++->read, dma_addr1); } rxq->rxrearm_start += RTE_I40E_RXQ_REARM_THRESH; if (rxq->rxrearm_start >= rxq->nb_rx_desc) rxq->rxrearm_start = 0; rxq->rxrearm_nb -= RTE_I40E_RXQ_REARM_THRESH; rx_id = (uint16_t)((rxq->rxrearm_start == 0) ? (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1)); /* Update the tail pointer on the NIC */ I40E_PCI_REG_WRITE(rxq->qrx_tail, rx_id); }
/** * Bulk allocate raw element from mempool and return as comp operations * * @param mempool * Compress operation mempool * @param ops * Array to place allocated operations * @param nb_ops * Number of operations to allocate * @return * - 0: Success * - -ENOENT: Not enough entries in the mempool; no ops are retrieved. */ static inline int rte_comp_op_raw_bulk_alloc(struct rte_mempool *mempool, struct rte_comp_op **ops, uint16_t nb_ops) { if (rte_mempool_get_bulk(mempool, (void **)ops, nb_ops) == 0) return nb_ops; return 0; }
static int rte_port_source_rx(void *port, struct rte_mbuf **pkts, uint32_t n_pkts) { struct rte_port_source *p = (struct rte_port_source *) port; if (rte_mempool_get_bulk(p->mempool, (void **) pkts, n_pkts) != 0) return 0; return n_pkts; }
/* benchmark alloc-build-free of ops */ static inline int pmd_cyclecount_bench_ops(struct pmd_cyclecount_state *state, uint32_t cur_op, uint16_t test_burst_size) { uint32_t iter_ops_left = state->opts->total_ops - cur_op; uint32_t iter_ops_needed = RTE_MIN(state->opts->nb_descriptors, iter_ops_left); uint32_t cur_iter_op; uint32_t imix_idx = 0; for (cur_iter_op = 0; cur_iter_op < iter_ops_needed; cur_iter_op += test_burst_size) { uint32_t burst_size = RTE_MIN(state->opts->total_ops - cur_op, test_burst_size); struct rte_crypto_op **ops = &state->ctx->ops[cur_iter_op]; /* Allocate objects containing crypto operations and mbufs */ if (rte_mempool_get_bulk(state->ctx->pool, (void **)ops, burst_size) != 0) { RTE_LOG(ERR, USER1, "Failed to allocate more crypto operations " "from the crypto operation pool.\n" "Consider increasing the pool size " "with --pool-sz\n"); return -1; } /* Setup crypto op, attach mbuf etc */ (state->ctx->populate_ops)(ops, state->ctx->src_buf_offset, state->ctx->dst_buf_offset, burst_size, state->ctx->sess, state->opts, state->ctx->test_vector, iv_offset, &imix_idx); #ifdef CPERF_LINEARIZATION_ENABLE /* Check if source mbufs require coalescing */ if (state->linearize) { uint8_t i; for (i = 0; i < burst_size; i++) { struct rte_mbuf *src = ops[i]->sym->m_src; rte_pktmbuf_linearize(src); } } #endif /* CPERF_LINEARIZATION_ENABLE */ rte_mempool_put_bulk(state->ctx->pool, (void **)ops, burst_size); } return 0; }
/* Test that the flush function is able to move packets between workers when * one worker shuts down.. */ static int test_flush_with_worker_shutdown(struct worker_params *wp, struct rte_mempool *p) { struct rte_distributor *d = wp->dist; struct rte_mbuf *bufs[BURST]; unsigned i; printf("=== Test flush fn with worker shutdown (%s) ===\n", wp->name); clear_packet_count(); if (rte_mempool_get_bulk(p, (void *)bufs, BURST) != 0) { printf("line %d: Error getting mbufs from pool\n", __LINE__); return -1; } /* now set all hash values in all buffers to zero, so all pkts go to the * one worker thread */ for (i = 0; i < BURST; i++) bufs[i]->hash.usr = 0; rte_distributor_process(d, bufs, BURST); /* at this point, we will have processed some packets and have a full * backlog for the other ones at worker 0. */ /* get worker zero to quit */ zero_quit = 1; /* flush the distributor */ rte_distributor_flush(d); rte_delay_us(10000); zero_quit = 0; for (i = 0; i < rte_lcore_count() - 1; i++) printf("Worker %u handled %u packets\n", i, worker_stats[i].handled_packets); if (total_packet_count() != BURST) { printf("Line %d: Error, not all packets flushed. " "Expected %u, got %u\n", __LINE__, BURST, total_packet_count()); return -1; } printf("Flush test with worker shutdown passed\n\n"); return 0; }
static int refill_mbufs(uint32_t *n_new_mbufs, struct rte_mempool *mempool, struct rte_mbuf **mbufs) { if (*n_new_mbufs == MAX_PKT_BURST) return 0; if (rte_mempool_get_bulk(mempool, (void **)mbufs, MAX_PKT_BURST - *n_new_mbufs) < 0) { plogx_err("4Mempool alloc failed for %d mbufs\n", MAX_PKT_BURST - *n_new_mbufs); return -1; } for (uint32_t i = 0; i < MAX_PKT_BURST - *n_new_mbufs; ++i) { init_mbuf_seg(mbufs[i]); } *n_new_mbufs = MAX_PKT_BURST; return 0; }
/* * This basic performance test just repeatedly sends in 32 packets at a time * to the distributor and verifies at the end that we got them all in the worker * threads and finally how long per packet the processing took. */ static inline int perf_test(struct rte_distributor *d, struct rte_mempool *p) { unsigned int i; uint64_t start, end; struct rte_mbuf *bufs[BURST]; clear_packet_count(); if (rte_mempool_get_bulk(p, (void *)bufs, BURST) != 0) { printf("Error getting mbufs from pool\n"); return -1; } /* ensure we have different hash value for each pkt */ for (i = 0; i < BURST; i++) bufs[i]->hash.usr = i; start = rte_rdtsc(); for (i = 0; i < (1<<ITER_POWER); i++) rte_distributor_process(d, bufs, BURST); end = rte_rdtsc(); do { usleep(100); rte_distributor_process(d, NULL, 0); } while (total_packet_count() < (BURST << ITER_POWER)); rte_distributor_clear_returns(d); printf("Time per burst: %"PRIu64"\n", (end - start) >> ITER_POWER); printf("Time per packet: %"PRIu64"\n\n", ((end - start) >> ITER_POWER)/BURST); rte_mempool_put_bulk(p, (void *)bufs, BURST); for (i = 0; i < rte_lcore_count() - 1; i++) printf("Worker %u handled %u packets\n", i, worker_stats[i].handled_packets); printf("Total packets: %u (%x)\n", total_packet_count(), total_packet_count()); printf("=== Perf test done ===\n\n"); return 0; }
static void shuffle_mempool(struct rte_mempool* mempool, uint32_t nb_mbuf) { struct rte_mbuf** pkts = prox_zmalloc(nb_mbuf * sizeof(*pkts), rte_socket_id()); uint64_t got = 0; while (rte_mempool_get_bulk(mempool, (void**)(pkts + got), 1) == 0) ++got; while (got) { int idx; do { idx = rand() % nb_mbuf - 1; } while (pkts[idx] == 0); rte_mempool_put_bulk(mempool, (void**)&pkts[idx], 1); pkts[idx] = 0; --got; }; prox_free(pkts); }
/* Useful function which ensures that all worker functions terminate */ static void quit_workers(struct rte_distributor *d, struct rte_mempool *p) { const unsigned num_workers = rte_lcore_count() - 1; unsigned i; struct rte_mbuf *bufs[RTE_MAX_LCORE]; rte_mempool_get_bulk(p, (void *)bufs, num_workers); quit = 1; for (i = 0; i < num_workers; i++) bufs[i]->hash.usr = i << 1; rte_distributor_process(d, bufs, num_workers); rte_mempool_put_bulk(p, (void *)bufs, num_workers); rte_distributor_process(d, NULL, 0); rte_eal_mp_wait_lcore(); quit = 0; worker_idx = 0; }
void alloc_mbufs(struct rte_mempool* mp, struct rte_mbuf* bufs[], uint32_t len, uint16_t pkt_len) { // this is essentially rte_pktmbuf_alloc_bulk() // but the loop is optimized to directly set the pkt/data len flags as well // since most allocs directly do this (packet generators) rte_mempool_get_bulk(mp, (void **)bufs, len); uint32_t i = 0; switch (len % 4) { while (i != len) { case 0: rte_mbuf_refcnt_set(bufs[i], 1); rte_pktmbuf_reset(bufs[i]); bufs[i]->pkt_len = pkt_len; bufs[i]->data_len = pkt_len; i++; // fall through case 3: rte_mbuf_refcnt_set(bufs[i], 1); rte_pktmbuf_reset(bufs[i]); bufs[i]->pkt_len = pkt_len; bufs[i]->data_len = pkt_len; i++; // fall through case 2: rte_mbuf_refcnt_set(bufs[i], 1); rte_pktmbuf_reset(bufs[i]); bufs[i]->pkt_len = pkt_len; bufs[i]->data_len = pkt_len; i++; // fall through case 1: rte_mbuf_refcnt_set(bufs[i], 1); rte_pktmbuf_reset(bufs[i]); bufs[i]->pkt_len = pkt_len; bufs[i]->data_len = pkt_len; i++; } } }
/* Perform a sanity test of the distributor with a large number of packets, * where we allocate a new set of mbufs for each burst. The workers then * free the mbufs. This ensures that we don't have any packet leaks in the * library. */ static int sanity_test_with_mbuf_alloc(struct worker_params *wp, struct rte_mempool *p) { struct rte_distributor *d = wp->dist; unsigned i; struct rte_mbuf *bufs[BURST]; printf("=== Sanity test with mbuf alloc/free (%s) ===\n", wp->name); clear_packet_count(); for (i = 0; i < ((1<<ITER_POWER)); i += BURST) { unsigned j; while (rte_mempool_get_bulk(p, (void *)bufs, BURST) < 0) rte_distributor_process(d, NULL, 0); for (j = 0; j < BURST; j++) { bufs[j]->hash.usr = (i+j) << 1; rte_mbuf_refcnt_set(bufs[j], 1); } rte_distributor_process(d, bufs, BURST); } rte_distributor_flush(d); rte_delay_us(10000); if (total_packet_count() < (1<<ITER_POWER)) { printf("Line %u: Packet count is incorrect, %u, expected %u\n", __LINE__, total_packet_count(), (1<<ITER_POWER)); return -1; } printf("Sanity test with mbuf alloc/free passed\n\n"); return 0; }
void send_loop(void) { RTE_LOG(INFO, APP, "send_loop()\n"); char pkt[PKT_SIZE] = {0}; int nreceived; int retval = 0; (void) retval; #ifdef CALC_CHECKSUM unsigned int kk = 0; #endif srand(time(NULL)); //Initializate packet contents int i; for(i = 0; i < PKT_SIZE; i++) pkt[i] = rand()%256; #if ALLOC_METHOD == ALLOC_APP struct rte_mempool * packets_pool = rte_mempool_lookup("ovs_mp_1500_0_262144"); //struct rte_mempool * packets_pool = rte_mempool_lookup("packets"); //Create mempool //struct rte_mempool * packets_pool = rte_mempool_create( // "packets", // NUM_PKTS, // MBUF_SIZE, // CACHE_SIZE, //This is the size of the mempool cache // sizeof(struct rte_pktmbuf_pool_private), // rte_pktmbuf_pool_init, // NULL, // rte_pktmbuf_init, // NULL, // rte_socket_id(), // 0 /*NO_FLAGS*/); if(packets_pool == NULL) { RTE_LOG(INFO, APP, "rte_errno: %s\n", rte_strerror(rte_errno)); rte_exit(EXIT_FAILURE, "Cannot find memory pool\n"); } RTE_LOG(INFO, APP, "There are %d free packets in the pool\n", rte_mempool_count(packets_pool)); #endif #ifdef USE_BURST struct rte_mbuf * packets_array[BURST_SIZE] = {0}; struct rte_mbuf * packets_array_rx[BURST_SIZE] = {0}; int ntosend; int n; (void) n; /* prealloc packets */ do { n = rte_mempool_get_bulk(packets_pool, (void **) packets_array, BURST_SIZE); } while(n != 0 && !stop); ntosend = BURST_SIZE; #else struct rte_mbuf * mbuf; /* prealloc packet */ do { mbuf = rte_pktmbuf_alloc(packets_pool); } while(mbuf == NULL); #endif RTE_LOG(INFO, APP, "Starting sender loop\n"); signal (SIGINT, crtl_c_handler); stop = 0; while(likely(!stop)) { while(pause_); #ifdef USE_BURST #if ALLOC_METHOD == ALLOC_OVS //Try to get BURS_SIZE free slots ntosend = rte_ring_dequeue_burst(alloc_q, (void **) packets_array, BURST_SIZE); #elif ALLOC_METHOD == ALLOC_APP //do //{ // n = rte_mempool_get_bulk(packets_pool, (void **) packets_array, BURST_SIZE); //} while(n != 0 && !stop); //ntosend = BURST_SIZE; #else #error "No implemented" #endif //Copy data to the buffers for(i = 0; i < ntosend; i++) { rte_memcpy(packets_array[i]->buf_addr, pkt, PKT_SIZE); //fill_packet(packets_array[i]->pkt.data); packets_array[i]->next = NULL; packets_array[i]->pkt_len = PKT_SIZE; packets_array[i]->data_len = PKT_SIZE; #ifdef CALC_CHECKSUM for(i = 0; i < ntosend; i++) for(kk = 0; kk < 8; kk++) checksum += ((uint64_t *)packets_array[i]->buf_addr)[kk]; #endif } //Enqueue data (try until all the allocated packets are enqueue) i = 0; while(i < ntosend && !stop) { i += rte_ring_enqueue_burst(tx_ring, (void **) &packets_array[i], ntosend - i); /* also dequeue some packets */ nreceived= rte_ring_dequeue_burst(rx_ring, (void **) packets_array_rx, BURST_SIZE); rx += nreceived; /* update statistics */ } #else // [NO] USE_BURST #if ALLOC_METHOD == ALLOC_OVS //Method 1 //Read a buffer to be used as a buffer for a packet retval = rte_ring_dequeue(alloc_q, (void **)&mbuf); if(retval != 0) { #ifdef CALC_ALLOC_STATS //stats.alloc_fails++; #endif continue; } #elif ALLOC_METHOD == ALLOC_APP //Method 2 //mbuf = rte_pktmbuf_alloc(packets_pool); //if(mbuf == NULL) //{ //#ifdef CALC_ALLOC_STATS // stats.alloc_fails++; //#endif // continue; //} #else #error "ALLOC_METHOD has a non valid value" #endif #if DELAY_CYCLES > 0 //This loop increases mumber of packets per second (don't ask me why) unsigned long long j = 0; for(j = 0; j < DELAY_CYCLES; j++) asm(""); #endif //Copy packet to the correct buffer rte_memcpy(mbuf->buf_addr, pkt, PKT_SIZE); //fill_packet(mbuf->pkt.data); //mbuf->pkt.next = NULL; //mbuf->pkt.pkt_len = PKT_SIZE; //mbuf->pkt.data_len = PKT_SIZE; (void) pkt; mbuf->next = NULL; mbuf->pkt_len = PKT_SIZE; mbuf->data_len = PKT_SIZE; #ifdef CALC_CHECKSUM for(kk = 0; kk < 8; kk++) checksum += ((uint64_t *)mbuf->buf_addr)[kk]; #endif //this method avoids dropping packets: //Simple tries until the packet is inserted in the queue tryagain: retval = rte_ring_enqueue(tx_ring, (void *) mbuf); if(retval == -ENOBUFS && !stop) { #ifdef CALC_TX_TRIES //stats.tx_retries++; #endif goto tryagain; } #ifdef CALC_TX_STATS //stats.tx++; #endif #endif //USE_BURST } #ifdef CALC_CHECKSUM printf("Checksum was %" PRIu64 "\n", checksum); #endif }
/* do basic sanity testing of the distributor. This test tests the following: * - send 32 packets through distributor with the same tag and ensure they * all go to the one worker * - send 32 packets through the distributor with two different tags and * verify that they go equally to two different workers. * - send 32 packets with different tags through the distributors and * just verify we get all packets back. * - send 1024 packets through the distributor, gathering the returned packets * as we go. Then verify that we correctly got all 1024 pointers back again, * not necessarily in the same order (as different flows). */ static int sanity_test(struct worker_params *wp, struct rte_mempool *p) { struct rte_distributor *db = wp->dist; struct rte_mbuf *bufs[BURST]; struct rte_mbuf *returns[BURST*2]; unsigned int i, count; unsigned int retries; printf("=== Basic distributor sanity tests ===\n"); clear_packet_count(); if (rte_mempool_get_bulk(p, (void *)bufs, BURST) != 0) { printf("line %d: Error getting mbufs from pool\n", __LINE__); return -1; } /* now set all hash values in all buffers to zero, so all pkts go to the * one worker thread */ for (i = 0; i < BURST; i++) bufs[i]->hash.usr = 0; rte_distributor_process(db, bufs, BURST); count = 0; do { rte_distributor_flush(db); count += rte_distributor_returned_pkts(db, returns, BURST*2); } while (count < BURST); if (total_packet_count() != BURST) { printf("Line %d: Error, not all packets flushed. " "Expected %u, got %u\n", __LINE__, BURST, total_packet_count()); return -1; } for (i = 0; i < rte_lcore_count() - 1; i++) printf("Worker %u handled %u packets\n", i, worker_stats[i].handled_packets); printf("Sanity test with all zero hashes done.\n"); /* pick two flows and check they go correctly */ if (rte_lcore_count() >= 3) { clear_packet_count(); for (i = 0; i < BURST; i++) bufs[i]->hash.usr = (i & 1) << 8; rte_distributor_process(db, bufs, BURST); count = 0; do { rte_distributor_flush(db); count += rte_distributor_returned_pkts(db, returns, BURST*2); } while (count < BURST); if (total_packet_count() != BURST) { printf("Line %d: Error, not all packets flushed. " "Expected %u, got %u\n", __LINE__, BURST, total_packet_count()); return -1; } for (i = 0; i < rte_lcore_count() - 1; i++) printf("Worker %u handled %u packets\n", i, worker_stats[i].handled_packets); printf("Sanity test with two hash values done\n"); } /* give a different hash value to each packet, * so load gets distributed */ clear_packet_count(); for (i = 0; i < BURST; i++) bufs[i]->hash.usr = i+1; rte_distributor_process(db, bufs, BURST); count = 0; do { rte_distributor_flush(db); count += rte_distributor_returned_pkts(db, returns, BURST*2); } while (count < BURST); if (total_packet_count() != BURST) { printf("Line %d: Error, not all packets flushed. " "Expected %u, got %u\n", __LINE__, BURST, total_packet_count()); return -1; } for (i = 0; i < rte_lcore_count() - 1; i++) printf("Worker %u handled %u packets\n", i, worker_stats[i].handled_packets); printf("Sanity test with non-zero hashes done\n"); rte_mempool_put_bulk(p, (void *)bufs, BURST); /* sanity test with BIG_BATCH packets to ensure they all arrived back * from the returned packets function */ clear_packet_count(); struct rte_mbuf *many_bufs[BIG_BATCH], *return_bufs[BIG_BATCH]; unsigned num_returned = 0; /* flush out any remaining packets */ rte_distributor_flush(db); rte_distributor_clear_returns(db); if (rte_mempool_get_bulk(p, (void *)many_bufs, BIG_BATCH) != 0) { printf("line %d: Error getting mbufs from pool\n", __LINE__); return -1; } for (i = 0; i < BIG_BATCH; i++) many_bufs[i]->hash.usr = i << 2; printf("=== testing big burst (%s) ===\n", wp->name); for (i = 0; i < BIG_BATCH/BURST; i++) { rte_distributor_process(db, &many_bufs[i*BURST], BURST); count = rte_distributor_returned_pkts(db, &return_bufs[num_returned], BIG_BATCH - num_returned); num_returned += count; } rte_distributor_flush(db); count = rte_distributor_returned_pkts(db, &return_bufs[num_returned], BIG_BATCH - num_returned); num_returned += count; retries = 0; do { rte_distributor_flush(db); count = rte_distributor_returned_pkts(db, &return_bufs[num_returned], BIG_BATCH - num_returned); num_returned += count; retries++; } while ((num_returned < BIG_BATCH) && (retries < 100)); if (num_returned != BIG_BATCH) { printf("line %d: Missing packets, expected %d\n", __LINE__, num_returned); return -1; } /* big check - make sure all packets made it back!! */ for (i = 0; i < BIG_BATCH; i++) { unsigned j; struct rte_mbuf *src = many_bufs[i]; for (j = 0; j < BIG_BATCH; j++) { if (return_bufs[j] == src) break; } if (j == BIG_BATCH) { printf("Error: could not find source packet #%u\n", i); return -1; } } printf("Sanity test of returned packets done\n"); rte_mempool_put_bulk(p, (void *)many_bufs, BIG_BATCH); printf("\n"); return 0; }
static inline void ixgbe_rxq_rearm(struct igb_rx_queue *rxq) { int i; uint16_t rx_id; volatile union ixgbe_adv_rx_desc *rxdp; struct igb_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start]; struct rte_mbuf *mb0, *mb1; __m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM, RTE_PKTMBUF_HEADROOM); __m128i dma_addr0, dma_addr1; rxdp = rxq->rx_ring + rxq->rxrearm_start; /* Pull 'n' more MBUFs into the software ring */ if (rte_mempool_get_bulk(rxq->mb_pool, (void *)rxep, RTE_IXGBE_RXQ_REARM_THRESH) < 0) { if (rxq->rxrearm_nb + RTE_IXGBE_RXQ_REARM_THRESH >= rxq->nb_rx_desc) { dma_addr0 = _mm_setzero_si128(); for (i = 0; i < RTE_IXGBE_DESCS_PER_LOOP; i++) { rxep[i].mbuf = &rxq->fake_mbuf; _mm_store_si128((__m128i *)&rxdp[i].read, dma_addr0); } } return; } /* Initialize the mbufs in vector, process 2 mbufs in one loop */ for (i = 0; i < RTE_IXGBE_RXQ_REARM_THRESH; i += 2, rxep += 2) { __m128i vaddr0, vaddr1; mb0 = rxep[0].mbuf; mb1 = rxep[1].mbuf; /* flush mbuf with pkt template */ mb0->rearm_data[0] = rxq->mbuf_initializer; mb1->rearm_data[0] = rxq->mbuf_initializer; /* load buf_addr(lo 64bit) and buf_physaddr(hi 64bit) */ vaddr0 = _mm_loadu_si128((__m128i *)&(mb0->buf_addr)); vaddr1 = _mm_loadu_si128((__m128i *)&(mb1->buf_addr)); /* convert pa to dma_addr hdr/data */ dma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0); dma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1); /* add headroom to pa values */ dma_addr0 = _mm_add_epi64(dma_addr0, hdr_room); dma_addr1 = _mm_add_epi64(dma_addr1, hdr_room); /* flush desc with pa dma_addr */ _mm_store_si128((__m128i *)&rxdp++->read, dma_addr0); _mm_store_si128((__m128i *)&rxdp++->read, dma_addr1); } rxq->rxrearm_start += RTE_IXGBE_RXQ_REARM_THRESH; if (rxq->rxrearm_start >= rxq->nb_rx_desc) rxq->rxrearm_start = 0; rxq->rxrearm_nb -= RTE_IXGBE_RXQ_REARM_THRESH; rx_id = (uint16_t) ((rxq->rxrearm_start == 0) ? (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1)); /* Update the tail pointer on the NIC */ IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id); }
uint16_t fm10k_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) { struct rte_mbuf *mbuf; union fm10k_rx_desc desc; struct fm10k_rx_queue *q = rx_queue; uint16_t count = 0; uint16_t nb_rcv, nb_seg; int alloc = 0; uint16_t next_dd; struct rte_mbuf *first_seg = q->pkt_first_seg; struct rte_mbuf *last_seg = q->pkt_last_seg; int ret; next_dd = q->next_dd; nb_rcv = 0; nb_seg = RTE_MIN(nb_pkts, q->alloc_thresh); for (count = 0; count < nb_seg; count++) { mbuf = q->sw_ring[next_dd]; desc = q->hw_ring[next_dd]; if (!(desc.d.staterr & FM10K_RXD_STATUS_DD)) break; #ifdef RTE_LIBRTE_FM10K_DEBUG_RX dump_rxd(&desc); #endif if (++next_dd == q->nb_desc) { next_dd = 0; alloc = 1; } /* Prefetch next mbuf while processing current one. */ rte_prefetch0(q->sw_ring[next_dd]); /* * When next RX descriptor is on a cache-line boundary, * prefetch the next 4 RX descriptors and the next 8 pointers * to mbufs. */ if ((next_dd & 0x3) == 0) { rte_prefetch0(&q->hw_ring[next_dd]); rte_prefetch0(&q->sw_ring[next_dd]); } /* Fill data length */ rte_pktmbuf_data_len(mbuf) = desc.w.length; /* * If this is the first buffer of the received packet, * set the pointer to the first mbuf of the packet and * initialize its context. * Otherwise, update the total length and the number of segments * of the current scattered packet, and update the pointer to * the last mbuf of the current packet. */ if (!first_seg) { first_seg = mbuf; first_seg->pkt_len = desc.w.length; } else { first_seg->pkt_len = (uint16_t)(first_seg->pkt_len + rte_pktmbuf_data_len(mbuf)); first_seg->nb_segs++; last_seg->next = mbuf; } /* * If this is not the last buffer of the received packet, * update the pointer to the last mbuf of the current scattered * packet and continue to parse the RX ring. */ if (!(desc.d.staterr & FM10K_RXD_STATUS_EOP)) { last_seg = mbuf; continue; } first_seg->ol_flags = 0; #ifdef RTE_LIBRTE_FM10K_RX_OLFLAGS_ENABLE rx_desc_to_ol_flags(first_seg, &desc); #endif first_seg->hash.rss = desc.d.rss; /* Prefetch data of first segment, if configured to do so. */ rte_packet_prefetch((char *)first_seg->buf_addr + first_seg->data_off); /* * Store the mbuf address into the next entry of the array * of returned packets. */ rx_pkts[nb_rcv++] = first_seg; /* * Setup receipt context for a new packet. */ first_seg = NULL; } q->next_dd = next_dd; if ((q->next_dd > q->next_trigger) || (alloc == 1)) { ret = rte_mempool_get_bulk(q->mp, (void **)&q->sw_ring[q->next_alloc], q->alloc_thresh); if (unlikely(ret != 0)) { uint8_t port = q->port_id; PMD_RX_LOG(ERR, "Failed to alloc mbuf"); /* * Need to restore next_dd if we cannot allocate new * buffers to replenish the old ones. */ q->next_dd = (q->next_dd + q->nb_desc - count) % q->nb_desc; rte_eth_devices[port].data->rx_mbuf_alloc_failed++; return 0; } for (; q->next_alloc <= q->next_trigger; ++q->next_alloc) { mbuf = q->sw_ring[q->next_alloc]; /* setup static mbuf fields */ fm10k_pktmbuf_reset(mbuf, q->port_id); /* write descriptor */ desc.q.pkt_addr = MBUF_DMA_ADDR_DEFAULT(mbuf); desc.q.hdr_addr = MBUF_DMA_ADDR_DEFAULT(mbuf); q->hw_ring[q->next_alloc] = desc; } FM10K_PCI_REG_WRITE(q->tail_ptr, q->next_trigger); q->next_trigger += q->alloc_thresh; if (q->next_trigger >= q->nb_desc) { q->next_trigger = q->alloc_thresh - 1; q->next_alloc = 0; } } q->pkt_first_seg = first_seg; q->pkt_last_seg = last_seg; return nb_rcv; }
uint16_t fm10k_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) { struct rte_mbuf *mbuf; union fm10k_rx_desc desc; struct fm10k_rx_queue *q = rx_queue; uint16_t count = 0; int alloc = 0; uint16_t next_dd; int ret; next_dd = q->next_dd; nb_pkts = RTE_MIN(nb_pkts, q->alloc_thresh); for (count = 0; count < nb_pkts; ++count) { mbuf = q->sw_ring[next_dd]; desc = q->hw_ring[next_dd]; if (!(desc.d.staterr & FM10K_RXD_STATUS_DD)) break; #ifdef RTE_LIBRTE_FM10K_DEBUG_RX dump_rxd(&desc); #endif rte_pktmbuf_pkt_len(mbuf) = desc.w.length; rte_pktmbuf_data_len(mbuf) = desc.w.length; mbuf->ol_flags = 0; #ifdef RTE_LIBRTE_FM10K_RX_OLFLAGS_ENABLE rx_desc_to_ol_flags(mbuf, &desc); #endif mbuf->hash.rss = desc.d.rss; rx_pkts[count] = mbuf; if (++next_dd == q->nb_desc) { next_dd = 0; alloc = 1; } /* Prefetch next mbuf while processing current one. */ rte_prefetch0(q->sw_ring[next_dd]); /* * When next RX descriptor is on a cache-line boundary, * prefetch the next 4 RX descriptors and the next 8 pointers * to mbufs. */ if ((next_dd & 0x3) == 0) { rte_prefetch0(&q->hw_ring[next_dd]); rte_prefetch0(&q->sw_ring[next_dd]); } } q->next_dd = next_dd; if ((q->next_dd > q->next_trigger) || (alloc == 1)) { ret = rte_mempool_get_bulk(q->mp, (void **)&q->sw_ring[q->next_alloc], q->alloc_thresh); if (unlikely(ret != 0)) { uint8_t port = q->port_id; PMD_RX_LOG(ERR, "Failed to alloc mbuf"); /* * Need to restore next_dd if we cannot allocate new * buffers to replenish the old ones. */ q->next_dd = (q->next_dd + q->nb_desc - count) % q->nb_desc; rte_eth_devices[port].data->rx_mbuf_alloc_failed++; return 0; } for (; q->next_alloc <= q->next_trigger; ++q->next_alloc) { mbuf = q->sw_ring[q->next_alloc]; /* setup static mbuf fields */ fm10k_pktmbuf_reset(mbuf, q->port_id); /* write descriptor */ desc.q.pkt_addr = MBUF_DMA_ADDR_DEFAULT(mbuf); desc.q.hdr_addr = MBUF_DMA_ADDR_DEFAULT(mbuf); q->hw_ring[q->next_alloc] = desc; } FM10K_PCI_REG_WRITE(q->tail_ptr, q->next_trigger); q->next_trigger += q->alloc_thresh; if (q->next_trigger >= q->nb_desc) { q->next_trigger = q->alloc_thresh - 1; q->next_alloc = 0; } } return count; }
fm10k_rxq_vec_setup(struct fm10k_rx_queue *rxq) { uintptr_t p; struct rte_mbuf mb_def = { .buf_addr = 0 }; /* zeroed mbuf */ mb_def.nb_segs = 1; /* data_off will be ajusted after new mbuf allocated for 512-byte * alignment. */ mb_def.data_off = RTE_PKTMBUF_HEADROOM; mb_def.port = rxq->port_id; rte_mbuf_refcnt_set(&mb_def, 1); /* prevent compiler reordering: rearm_data covers previous fields */ rte_compiler_barrier(); p = (uintptr_t)&mb_def.rearm_data; rxq->mbuf_initializer = *(uint64_t *)p; return 0; } static inline void fm10k_rxq_rearm(struct fm10k_rx_queue *rxq) { int i; uint16_t rx_id; volatile union fm10k_rx_desc *rxdp; struct rte_mbuf **mb_alloc = &rxq->sw_ring[rxq->rxrearm_start]; struct rte_mbuf *mb0, *mb1; __m128i head_off = _mm_set_epi64x( RTE_PKTMBUF_HEADROOM + FM10K_RX_DATABUF_ALIGN - 1, RTE_PKTMBUF_HEADROOM + FM10K_RX_DATABUF_ALIGN - 1); __m128i dma_addr0, dma_addr1; /* Rx buffer need to be aligned with 512 byte */ const __m128i hba_msk = _mm_set_epi64x(0, UINT64_MAX - FM10K_RX_DATABUF_ALIGN + 1); rxdp = rxq->hw_ring + rxq->rxrearm_start; /* Pull 'n' more MBUFs into the software ring */ if (rte_mempool_get_bulk(rxq->mp, (void *)mb_alloc, RTE_FM10K_RXQ_REARM_THRESH) < 0) { dma_addr0 = _mm_setzero_si128(); /* Clean up all the HW/SW ring content */ for (i = 0; i < RTE_FM10K_RXQ_REARM_THRESH; i++) { mb_alloc[i] = &rxq->fake_mbuf; _mm_store_si128((__m128i *)&rxdp[i].q, dma_addr0); } rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed += RTE_FM10K_RXQ_REARM_THRESH; return; } /* Initialize the mbufs in vector, process 2 mbufs in one loop */ for (i = 0; i < RTE_FM10K_RXQ_REARM_THRESH; i += 2, mb_alloc += 2) { __m128i vaddr0, vaddr1; uintptr_t p0, p1; mb0 = mb_alloc[0]; mb1 = mb_alloc[1]; /* Flush mbuf with pkt template. * Data to be rearmed is 6 bytes long. * Though, RX will overwrite ol_flags that are coming next * anyway. So overwrite whole 8 bytes with one load: * 6 bytes of rearm_data plus first 2 bytes of ol_flags. */ p0 = (uintptr_t)&mb0->rearm_data; *(uint64_t *)p0 = rxq->mbuf_initializer; p1 = (uintptr_t)&mb1->rearm_data; *(uint64_t *)p1 = rxq->mbuf_initializer; /* load buf_addr(lo 64bit) and buf_physaddr(hi 64bit) */ vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr); vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr); /* convert pa to dma_addr hdr/data */ dma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0); dma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1); /* add headroom to pa values */ dma_addr0 = _mm_add_epi64(dma_addr0, head_off); dma_addr1 = _mm_add_epi64(dma_addr1, head_off); /* Do 512 byte alignment to satisfy HW requirement, in the * meanwhile, set Header Buffer Address to zero. */ dma_addr0 = _mm_and_si128(dma_addr0, hba_msk); dma_addr1 = _mm_and_si128(dma_addr1, hba_msk); /* flush desc with pa dma_addr */ _mm_store_si128((__m128i *)&rxdp++->q, dma_addr0); _mm_store_si128((__m128i *)&rxdp++->q, dma_addr1); /* enforce 512B alignment on default Rx virtual addresses */ mb0->data_off = (uint16_t)(RTE_PTR_ALIGN((char *)mb0->buf_addr + RTE_PKTMBUF_HEADROOM, FM10K_RX_DATABUF_ALIGN) - (char *)mb0->buf_addr); mb1->data_off = (uint16_t)(RTE_PTR_ALIGN((char *)mb1->buf_addr + RTE_PKTMBUF_HEADROOM, FM10K_RX_DATABUF_ALIGN) - (char *)mb1->buf_addr); } rxq->rxrearm_start += RTE_FM10K_RXQ_REARM_THRESH; if (rxq->rxrearm_start >= rxq->nb_desc) rxq->rxrearm_start = 0; rxq->rxrearm_nb -= RTE_FM10K_RXQ_REARM_THRESH; rx_id = (uint16_t)((rxq->rxrearm_start == 0) ? (rxq->nb_desc - 1) : (rxq->rxrearm_start - 1)); /* Update the tail pointer on the NIC */ FM10K_PCI_REG_WRITE(rxq->tail_ptr, rx_id); }
static void handle_gen_bulk(struct task_base *tbase, struct rte_mbuf **mbufs, uint16_t n_pkts) { struct task_gen_server *task = (struct task_gen_server *)tbase; struct pkt_tuple pkt_tuple[MAX_PKT_BURST]; uint8_t out[MAX_PKT_BURST]; struct l4_meta l4_meta[MAX_PKT_BURST]; struct bundle_ctx *conn; int ret; for (uint16_t j = 0; j < n_pkts; ++j) { if (parse_pkt(mbufs[j], &pkt_tuple[j], &l4_meta[j])) plogdx_err(mbufs[j], "Unknown packet, parsing failed\n"); } /* Main proc loop */ for (uint16_t j = 0; j < n_pkts; ++j) { conn = NULL; ret = rte_hash_lookup(task->bundle_ctx_pool.hash, (const void *)&pkt_tuple[j]); if (ret >= 0) conn = task->bundle_ctx_pool.hash_entries[ret]; /* If not part of existing connection, try to create a connection */ if (NULL == conn) { struct new_tuple nt; nt.dst_addr = pkt_tuple[j].dst_addr; nt.proto_id = pkt_tuple[j].proto_id; nt.dst_port = pkt_tuple[j].dst_port; rte_memcpy(nt.l2_types, pkt_tuple[j].l2_types, sizeof(nt.l2_types)); const struct bundle_cfg *n; if (NULL != (n = server_accept(task, &nt))) { conn = bundle_ctx_pool_get(&task->bundle_ctx_pool); if (!conn) { out[j] = NO_PORT_AVAIL; plogx_err("No more free bundles to accept new connection\n"); continue; } ret = rte_hash_add_key(task->bundle_ctx_pool.hash, (const void *)&pkt_tuple[j]); if (ret < 0) { out[j] = NO_PORT_AVAIL; bundle_ctx_pool_put(&task->bundle_ctx_pool, conn); plog_err("Adding key failed while trying to accept connection\n"); continue; } task->bundle_ctx_pool.hash_entries[ret] = conn; bundle_init(conn, n, task->heap, PEER_SERVER, &task->seed); conn->tuple = pkt_tuple[j]; if (conn->ctx.stream_cfg->proto == IPPROTO_TCP) task->l4_stats.tcp_created++; else task->l4_stats.udp_created++; } } /* bundle contains either an active connection or a newly created connection. If it is NULL, then not listening. */ if (NULL != conn) { int ret = bundle_proc_data(conn, mbufs[j], &l4_meta[j], &task->bundle_ctx_pool, &task->seed, &task->l4_stats); out[j] = ret == 0? 0: NO_PORT_AVAIL; } else { plog_err("Packet received for service that does not exist\n"); pkt_tuple_debug(&pkt_tuple[j]); plogd_dbg(mbufs[j], NULL); out[j] = NO_PORT_AVAIL; } } conn = NULL; task->base.tx_pkt(&task->base, mbufs, n_pkts, out); if (!(task->heap->n_elems && rte_rdtsc() > heap_peek_prio(task->heap))) return ; if (task->n_new_mbufs < MAX_PKT_BURST) { if (rte_mempool_get_bulk(task->mempool, (void **)task->new_mbufs, MAX_PKT_BURST - task->n_new_mbufs) < 0) { return ; } for (uint32_t i = 0; i < MAX_PKT_BURST - task->n_new_mbufs; ++i) { init_mbuf_seg(task->new_mbufs[i]); } task->n_new_mbufs = MAX_PKT_BURST; } if (task->heap->n_elems && rte_rdtsc() > heap_peek_prio(task->heap)) { uint16_t n_called_back = 0; while (task->heap->n_elems && rte_rdtsc() > heap_peek_prio(task->heap) && n_called_back < MAX_PKT_BURST) { conn = BUNDLE_CTX_UPCAST(heap_pop(task->heap)); /* handle packet TX (retransmit or delayed transmit) */ ret = bundle_proc_data(conn, task->new_mbufs[n_called_back], NULL, &task->bundle_ctx_pool, &task->seed, &task->l4_stats); if (ret == 0) { out[n_called_back] = 0; n_called_back++; } } task->base.tx_pkt(&task->base, task->new_mbufs, n_called_back, out); task->n_new_mbufs -= n_called_back; } }
static void handle_gen_bulk_client(struct task_base *tbase, struct rte_mbuf **mbufs, uint16_t n_pkts) { struct task_gen_client *task = (struct task_gen_client *)tbase; uint8_t out[MAX_PKT_BURST] = {0}; struct bundle_ctx *conn; int ret; if (n_pkts) { for (int i = 0; i < n_pkts; ++i) { struct pkt_tuple pt; struct l4_meta l4_meta; if (parse_pkt(mbufs[i], &pt, &l4_meta)) { plogdx_err(mbufs[i], "Parsing failed\n"); out[i] = NO_PORT_AVAIL; continue; } ret = rte_hash_lookup(task->bundle_ctx_pool.hash, (const void *)&pt); if (ret < 0) { plogx_dbg("Client: packet RX that does not belong to connection:" "Client = "IPv4_BYTES_FMT":%d, Server = "IPv4_BYTES_FMT":%d\n", IPv4_BYTES(((uint8_t*)&pt.dst_addr)), rte_bswap16(pt.dst_port), IPv4_BYTES(((uint8_t*)&pt.src_addr)), rte_bswap16(pt.src_port)); plogdx_dbg(mbufs[i], NULL); // if tcp, send RST /* pkt_tuple_debug2(&pt); */ out[i] = NO_PORT_AVAIL; continue; } conn = task->bundle_ctx_pool.hash_entries[ret]; ret = bundle_proc_data(conn, mbufs[i], &l4_meta, &task->bundle_ctx_pool, &task->seed, &task->l4_stats); out[i] = ret == 0? 0: NO_PORT_AVAIL; } task->base.tx_pkt(&task->base, mbufs, n_pkts, out); } if (task->n_new_mbufs < MAX_PKT_BURST) { if (rte_mempool_get_bulk(task->mempool, (void **)task->new_mbufs, MAX_PKT_BURST - task->n_new_mbufs) < 0) { plogx_err("4Mempool alloc failed %d\n", MAX_PKT_BURST); return ; } for (uint32_t i = 0; i < MAX_PKT_BURST - task->n_new_mbufs; ++i) { init_mbuf_seg(task->new_mbufs[i]); } task->n_new_mbufs = MAX_PKT_BURST; } /* If there is at least one callback to handle, handle at most MAX_PKT_BURST */ if (task->heap->n_elems && rte_rdtsc() > heap_peek_prio(task->heap)) { uint16_t n_called_back = 0; while (task->heap->n_elems && rte_rdtsc() > heap_peek_prio(task->heap) && n_called_back < MAX_PKT_BURST) { conn = BUNDLE_CTX_UPCAST(heap_pop(task->heap)); /* handle packet TX (retransmit or delayed transmit) */ ret = bundle_proc_data(conn, task->new_mbufs[n_called_back], NULL, &task->bundle_ctx_pool, &task->seed, &task->l4_stats); if (ret == 0) { out[n_called_back] = 0; n_called_back++; } } plogx_dbg("During callback, will send %d packets\n", n_called_back); task->base.tx_pkt(&task->base, task->new_mbufs, n_called_back, out); task->n_new_mbufs -= n_called_back; } int n_new = task->bundle_ctx_pool.n_free_bundles; n_new = n_new > MAX_PKT_BURST? MAX_PKT_BURST : n_new; if (n_new == 0) return ; if (task->n_new_mbufs < MAX_PKT_BURST) { if (rte_mempool_get_bulk(task->mempool, (void **)task->new_mbufs, MAX_PKT_BURST - task->n_new_mbufs) < 0) { plogx_err("4Mempool alloc failed %d\n", MAX_PKT_BURST); return ; } for (uint32_t i = 0; i < MAX_PKT_BURST - task->n_new_mbufs; ++i) { init_mbuf_seg(task->new_mbufs[i]); } task->n_new_mbufs = MAX_PKT_BURST; } for (int i = 0; i < n_new; ++i) { int32_t ret = cdf_sample(task->cdf, &task->seed); /* Select a new bundle_cfg according to imix */ struct bundle_cfg *bundle_cfg = &task->bundle_cfgs[ret]; struct bundle_ctx *bundle_ctx; bundle_ctx = bundle_ctx_pool_get(&task->bundle_ctx_pool); /* Should be an assert: */ if (!bundle_ctx) { plogx_err("No more available bundles\n"); exit(-1); } struct pkt_tuple *pt = &bundle_ctx->tuple; int n_retries = 0; do { /* Note that the actual packet sent will contain swapped addresses and ports (i.e. pkt.src <=> tuple.dst). The incoming packet will match this struct. */ bundle_init(bundle_ctx, bundle_cfg, task->heap, PEER_CLIENT, &task->seed); ret = rte_hash_lookup(task->bundle_ctx_pool.hash, (const void *)pt); if (n_retries == 1000) { plogx_err("Already tried 1K times\n"); } if (ret >= 0) { n_retries++; } } while (ret >= 0); ret = rte_hash_add_key(task->bundle_ctx_pool.hash, (const void *)pt); if (ret < 0) { plogx_err("Failed to add key ret = %d, n_free = %d\n", ret, task->bundle_ctx_pool.n_free_bundles); bundle_ctx_pool_put(&task->bundle_ctx_pool, bundle_ctx); pkt_tuple_debug2(pt); out[i] = NO_PORT_AVAIL; continue; } task->bundle_ctx_pool.hash_entries[ret] = bundle_ctx; if (bundle_ctx->ctx.stream_cfg->proto == IPPROTO_TCP) task->l4_stats.tcp_created++; else task->l4_stats.udp_created++; ret = bundle_proc_data(bundle_ctx, task->new_mbufs[i], NULL, &task->bundle_ctx_pool, &task->seed, &task->l4_stats); out[i] = ret == 0? 0: NO_PORT_AVAIL; } task->base.tx_pkt(&task->base, task->new_mbufs, n_new, out); task->n_new_mbufs -= n_new; }
/* do basic sanity testing of the distributor. This test tests the following: * - send 32 packets through distributor with the same tag and ensure they * all go to the one worker * - send 32 packets throught the distributor with two different tags and * verify that they go equally to two different workers. * - send 32 packets with different tags through the distributors and * just verify we get all packets back. * - send 1024 packets through the distributor, gathering the returned packets * as we go. Then verify that we correctly got all 1024 pointers back again, * not necessarily in the same order (as different flows). */ static int sanity_test(struct rte_distributor *d, struct rte_mempool *p) { struct rte_mbuf *bufs[BURST]; unsigned i; printf("=== Basic distributor sanity tests ===\n"); clear_packet_count(); if (rte_mempool_get_bulk(p, (void *)bufs, BURST) != 0) { printf("line %d: Error getting mbufs from pool\n", __LINE__); return -1; } /* now set all hash values in all buffers to zero, so all pkts go to the * one worker thread */ for (i = 0; i < BURST; i++) bufs[i]->pkt.hash.rss = 0; rte_distributor_process(d, bufs, BURST); rte_distributor_flush(d); if (total_packet_count() != BURST) { printf("Line %d: Error, not all packets flushed. " "Expected %u, got %u\n", __LINE__, BURST, total_packet_count()); return -1; } for (i = 0; i < rte_lcore_count() - 1; i++) printf("Worker %u handled %u packets\n", i, worker_stats[i].handled_packets); printf("Sanity test with all zero hashes done.\n"); if (worker_stats[0].handled_packets != BURST) return -1; /* pick two flows and check they go correctly */ if (rte_lcore_count() >= 3) { clear_packet_count(); for (i = 0; i < BURST; i++) bufs[i]->pkt.hash.rss = (i & 1) << 8; rte_distributor_process(d, bufs, BURST); rte_distributor_flush(d); if (total_packet_count() != BURST) { printf("Line %d: Error, not all packets flushed. " "Expected %u, got %u\n", __LINE__, BURST, total_packet_count()); return -1; } for (i = 0; i < rte_lcore_count() - 1; i++) printf("Worker %u handled %u packets\n", i, worker_stats[i].handled_packets); printf("Sanity test with two hash values done\n"); if (worker_stats[0].handled_packets != 16 || worker_stats[1].handled_packets != 16) return -1; } /* give a different hash value to each packet, * so load gets distributed */ clear_packet_count(); for (i = 0; i < BURST; i++) bufs[i]->pkt.hash.rss = i; rte_distributor_process(d, bufs, BURST); rte_distributor_flush(d); if (total_packet_count() != BURST) { printf("Line %d: Error, not all packets flushed. " "Expected %u, got %u\n", __LINE__, BURST, total_packet_count()); return -1; } for (i = 0; i < rte_lcore_count() - 1; i++) printf("Worker %u handled %u packets\n", i, worker_stats[i].handled_packets); printf("Sanity test with non-zero hashes done\n"); rte_mempool_put_bulk(p, (void *)bufs, BURST); /* sanity test with BIG_BATCH packets to ensure they all arrived back * from the returned packets function */ clear_packet_count(); struct rte_mbuf *many_bufs[BIG_BATCH], *return_bufs[BIG_BATCH]; unsigned num_returned = 0; /* flush out any remaining packets */ rte_distributor_flush(d); rte_distributor_clear_returns(d); if (rte_mempool_get_bulk(p, (void *)many_bufs, BIG_BATCH) != 0) { printf("line %d: Error getting mbufs from pool\n", __LINE__); return -1; } for (i = 0; i < BIG_BATCH; i++) many_bufs[i]->pkt.hash.rss = i << 2; for (i = 0; i < BIG_BATCH/BURST; i++) { rte_distributor_process(d, &many_bufs[i*BURST], BURST); num_returned += rte_distributor_returned_pkts(d, &return_bufs[num_returned], BIG_BATCH - num_returned); } rte_distributor_flush(d); num_returned += rte_distributor_returned_pkts(d, &return_bufs[num_returned], BIG_BATCH - num_returned); if (num_returned != BIG_BATCH) { printf("line %d: Number returned is not the same as " "number sent\n", __LINE__); return -1; } /* big check - make sure all packets made it back!! */ for (i = 0; i < BIG_BATCH; i++) { unsigned j; struct rte_mbuf *src = many_bufs[i]; for (j = 0; j < BIG_BATCH; j++) if (return_bufs[j] == src) break; if (j == BIG_BATCH) { printf("Error: could not find source packet #%u\n", i); return -1; } } printf("Sanity test of returned packets done\n"); rte_mempool_put_bulk(p, (void *)many_bufs, BIG_BATCH); printf("\n"); return 0; }
int i; __m128i template; /* 256-bit write was worse... */ __m128i rxdesc_fields; struct rte_mbuf tmp; /* DPDK 2.1 specific * packet_type 0 (32 bits) * pkt_len len (32 bits) * data_len len (16 bits) * vlan_tci 0 (16 bits) * rss 0 (32 bits) */ rxdesc_fields = _mm_setr_epi32(0, len, len, 0); ret = rte_mempool_get_bulk(current_pframe_pool(), (void**)array, cnt); if (ret != 0) { return ret; } template = *((__m128i*)¤t_template()->buf_len); if (cnt & 1) { array[cnt] = &tmp; } /* 4 at a time didn't help */ for (i = 0; i < cnt; i+=2) { /* since the data is likely to be in the store buffer * as 64-bit writes, 128-bit read will cause stalls */ struct rte_mbuf *mbuf0 = array[i];
static inline void ixgbe_rxq_rearm(struct ixgbe_rx_queue *rxq) { int i; uint16_t rx_id; volatile union ixgbe_adv_rx_desc *rxdp; struct ixgbe_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start]; struct rte_mbuf *mb0, *mb1; __m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM, RTE_PKTMBUF_HEADROOM); __m128i dma_addr0, dma_addr1; const __m128i hba_msk = _mm_set_epi64x(0, UINT64_MAX); rxdp = rxq->rx_ring + rxq->rxrearm_start; /* Pull 'n' more MBUFs into the software ring */ if (rte_mempool_get_bulk(rxq->mb_pool, (void *)rxep, RTE_IXGBE_RXQ_REARM_THRESH) < 0) { if (rxq->rxrearm_nb + RTE_IXGBE_RXQ_REARM_THRESH >= rxq->nb_rx_desc) { dma_addr0 = _mm_setzero_si128(); for (i = 0; i < RTE_IXGBE_DESCS_PER_LOOP; i++) { rxep[i].mbuf = &rxq->fake_mbuf; _mm_store_si128((__m128i *)&rxdp[i].read, dma_addr0); } } rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed += RTE_IXGBE_RXQ_REARM_THRESH; return; } /* Initialize the mbufs in vector, process 2 mbufs in one loop */ for (i = 0; i < RTE_IXGBE_RXQ_REARM_THRESH; i += 2, rxep += 2) { __m128i vaddr0, vaddr1; uintptr_t p0, p1; mb0 = rxep[0].mbuf; mb1 = rxep[1].mbuf; /* * Flush mbuf with pkt template. * Data to be rearmed is 6 bytes long. * Though, RX will overwrite ol_flags that are coming next * anyway. So overwrite whole 8 bytes with one load: * 6 bytes of rearm_data plus first 2 bytes of ol_flags. */ p0 = (uintptr_t)&mb0->rearm_data; *(uint64_t *)p0 = rxq->mbuf_initializer; p1 = (uintptr_t)&mb1->rearm_data; *(uint64_t *)p1 = rxq->mbuf_initializer; /* load buf_addr(lo 64bit) and buf_physaddr(hi 64bit) */ vaddr0 = _mm_loadu_si128((__m128i *)&(mb0->buf_addr)); vaddr1 = _mm_loadu_si128((__m128i *)&(mb1->buf_addr)); /* convert pa to dma_addr hdr/data */ dma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0); dma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1); /* add headroom to pa values */ dma_addr0 = _mm_add_epi64(dma_addr0, hdr_room); dma_addr1 = _mm_add_epi64(dma_addr1, hdr_room); /* set Header Buffer Address to zero */ dma_addr0 = _mm_and_si128(dma_addr0, hba_msk); dma_addr1 = _mm_and_si128(dma_addr1, hba_msk); /* flush desc with pa dma_addr */ _mm_store_si128((__m128i *)&rxdp++->read, dma_addr0); _mm_store_si128((__m128i *)&rxdp++->read, dma_addr1); } rxq->rxrearm_start += RTE_IXGBE_RXQ_REARM_THRESH; if (rxq->rxrearm_start >= rxq->nb_rx_desc) rxq->rxrearm_start = 0; rxq->rxrearm_nb -= RTE_IXGBE_RXQ_REARM_THRESH; rx_id = (uint16_t) ((rxq->rxrearm_start == 0) ? (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1)); /* Update the tail pointer on the NIC */ IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id); }
int cperf_verify_test_runner(void *test_ctx) { struct cperf_verify_ctx *ctx = test_ctx; uint64_t ops_enqd = 0, ops_enqd_total = 0, ops_enqd_failed = 0; uint64_t ops_deqd = 0, ops_deqd_total = 0, ops_deqd_failed = 0; uint64_t ops_failed = 0; static int only_once; uint64_t i; uint16_t ops_unused = 0; struct rte_crypto_op *ops[ctx->options->max_burst_size]; struct rte_crypto_op *ops_processed[ctx->options->max_burst_size]; uint32_t lcore = rte_lcore_id(); #ifdef CPERF_LINEARIZATION_ENABLE struct rte_cryptodev_info dev_info; int linearize = 0; /* Check if source mbufs require coalescing */ if (ctx->options->segment_sz < ctx->options->max_buffer_size) { rte_cryptodev_info_get(ctx->dev_id, &dev_info); if ((dev_info.feature_flags & RTE_CRYPTODEV_FF_MBUF_SCATTER_GATHER) == 0) linearize = 1; } #endif /* CPERF_LINEARIZATION_ENABLE */ ctx->lcore_id = lcore; if (!ctx->options->csv) printf("\n# Running verify test on device: %u, lcore: %u\n", ctx->dev_id, lcore); uint16_t iv_offset = sizeof(struct rte_crypto_op) + sizeof(struct rte_crypto_sym_op); while (ops_enqd_total < ctx->options->total_ops) { uint16_t burst_size = ((ops_enqd_total + ctx->options->max_burst_size) <= ctx->options->total_ops) ? ctx->options->max_burst_size : ctx->options->total_ops - ops_enqd_total; uint16_t ops_needed = burst_size - ops_unused; /* Allocate objects containing crypto operations and mbufs */ if (rte_mempool_get_bulk(ctx->pool, (void **)ops, ops_needed) != 0) { RTE_LOG(ERR, USER1, "Failed to allocate more crypto operations " "from the the crypto operation pool.\n" "Consider increasing the pool size " "with --pool-sz\n"); return -1; } /* Setup crypto op, attach mbuf etc */ (ctx->populate_ops)(ops, ctx->src_buf_offset, ctx->dst_buf_offset, ops_needed, ctx->sess, ctx->options, ctx->test_vector, iv_offset); /* Populate the mbuf with the test vector, for verification */ for (i = 0; i < ops_needed; i++) cperf_mbuf_set(ops[i]->sym->m_src, ctx->options, ctx->test_vector); #ifdef CPERF_LINEARIZATION_ENABLE if (linearize) { /* PMD doesn't support scatter-gather and source buffer * is segmented. * We need to linearize it before enqueuing. */ for (i = 0; i < burst_size; i++) rte_pktmbuf_linearize(ops[i]->sym->m_src); } #endif /* CPERF_LINEARIZATION_ENABLE */ /* Enqueue burst of ops on crypto device */ ops_enqd = rte_cryptodev_enqueue_burst(ctx->dev_id, ctx->qp_id, ops, burst_size); if (ops_enqd < burst_size) ops_enqd_failed++; /** * Calculate number of ops not enqueued (mainly for hw * accelerators whose ingress queue can fill up). */ ops_unused = burst_size - ops_enqd; ops_enqd_total += ops_enqd; /* Dequeue processed burst of ops from crypto device */ ops_deqd = rte_cryptodev_dequeue_burst(ctx->dev_id, ctx->qp_id, ops_processed, ctx->options->max_burst_size); if (ops_deqd == 0) { /** * Count dequeue polls which didn't return any * processed operations. This statistic is mainly * relevant to hw accelerators. */ ops_deqd_failed++; continue; } for (i = 0; i < ops_deqd; i++) { if (cperf_verify_op(ops_processed[i], ctx->options, ctx->test_vector)) ops_failed++; } /* Free crypto ops so they can be reused. */ rte_mempool_put_bulk(ctx->pool, (void **)ops_processed, ops_deqd); ops_deqd_total += ops_deqd; } /* Dequeue any operations still in the crypto device */ while (ops_deqd_total < ctx->options->total_ops) { /* Sending 0 length burst to flush sw crypto device */ rte_cryptodev_enqueue_burst(ctx->dev_id, ctx->qp_id, NULL, 0); /* dequeue burst */ ops_deqd = rte_cryptodev_dequeue_burst(ctx->dev_id, ctx->qp_id, ops_processed, ctx->options->max_burst_size); if (ops_deqd == 0) { ops_deqd_failed++; continue; } for (i = 0; i < ops_deqd; i++) { if (cperf_verify_op(ops_processed[i], ctx->options, ctx->test_vector)) ops_failed++; } /* Free crypto ops so they can be reused. */ rte_mempool_put_bulk(ctx->pool, (void **)ops_processed, ops_deqd); ops_deqd_total += ops_deqd; } if (!ctx->options->csv) { if (!only_once) printf("%12s%12s%12s%12s%12s%12s%12s%12s\n\n", "lcore id", "Buf Size", "Burst size", "Enqueued", "Dequeued", "Failed Enq", "Failed Deq", "Failed Ops"); only_once = 1; printf("%12u%12u%12u%12"PRIu64"%12"PRIu64"%12"PRIu64 "%12"PRIu64"%12"PRIu64"\n", ctx->lcore_id, ctx->options->max_buffer_size, ctx->options->max_burst_size, ops_enqd_total, ops_deqd_total, ops_enqd_failed, ops_deqd_failed, ops_failed); } else { if (!only_once) printf("\n# lcore id, Buffer Size(B), " "Burst Size,Enqueued,Dequeued,Failed Enq," "Failed Deq,Failed Ops\n"); only_once = 1; printf("%10u;%10u;%u;%"PRIu64";%"PRIu64";%"PRIu64";%"PRIu64";" "%"PRIu64"\n", ctx->lcore_id, ctx->options->max_buffer_size, ctx->options->max_burst_size, ops_enqd_total, ops_deqd_total, ops_enqd_failed, ops_deqd_failed, ops_failed); } return 0; }