int32_t perf_test(void) { struct rte_lpm *lpm = NULL; uint64_t begin, total_time, lpm_used_entries = 0; unsigned i, j; uint8_t next_hop_add = 0xAA, next_hop_return = 0; int status = 0; uint64_t cache_line_counter = 0; int64_t count = 0; rte_srand(rte_rdtsc()); printf("No. routes = %u\n", (unsigned) NUM_ROUTE_ENTRIES); print_route_distribution(large_route_table, (uint32_t) NUM_ROUTE_ENTRIES); lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, 1000000, 0); TEST_LPM_ASSERT(lpm != NULL); /* Measue add. */ begin = rte_rdtsc(); for (i = 0; i < NUM_ROUTE_ENTRIES; i++) { if (rte_lpm_add(lpm, large_route_table[i].ip, large_route_table[i].depth, next_hop_add) == 0) status++; } /* End Timer. */ total_time = rte_rdtsc() - begin; printf("Unique added entries = %d\n", status); /* Obtain add statistics. */ for (i = 0; i < RTE_LPM_TBL24_NUM_ENTRIES; i++) { if (lpm->tbl24[i].valid) lpm_used_entries++; if (i % 32 == 0){ if ((uint64_t)count < lpm_used_entries) { cache_line_counter++; count = lpm_used_entries; } } } printf("Used table 24 entries = %u (%g%%)\n", (unsigned) lpm_used_entries, (lpm_used_entries * 100.0) / RTE_LPM_TBL24_NUM_ENTRIES); printf("64 byte Cache entries used = %u (%u bytes)\n", (unsigned) cache_line_counter, (unsigned) cache_line_counter * 64); printf("Average LPM Add: %g cycles\n", (double)total_time / NUM_ROUTE_ENTRIES); /* Measure single Lookup */ total_time = 0; count = 0; for (i = 0; i < ITERATIONS; i ++) { static uint32_t ip_batch[BATCH_SIZE]; for (j = 0; j < BATCH_SIZE; j ++) ip_batch[j] = rte_rand(); /* Lookup per batch */ begin = rte_rdtsc(); for (j = 0; j < BATCH_SIZE; j ++) { if (rte_lpm_lookup(lpm, ip_batch[j], &next_hop_return) != 0) count++; } total_time += rte_rdtsc() - begin; } printf("Average LPM Lookup: %.1f cycles (fails = %.1f%%)\n", (double)total_time / ((double)ITERATIONS * BATCH_SIZE), (count * 100.0) / (double)(ITERATIONS * BATCH_SIZE)); /* Measure bulk Lookup */ total_time = 0; count = 0; for (i = 0; i < ITERATIONS; i ++) { static uint32_t ip_batch[BATCH_SIZE]; uint16_t next_hops[BULK_SIZE]; /* Create array of random IP addresses */ for (j = 0; j < BATCH_SIZE; j ++) ip_batch[j] = rte_rand(); /* Lookup per batch */ begin = rte_rdtsc(); for (j = 0; j < BATCH_SIZE; j += BULK_SIZE) { unsigned k; rte_lpm_lookup_bulk(lpm, &ip_batch[j], next_hops, BULK_SIZE); for (k = 0; k < BULK_SIZE; k++) if (unlikely(!(next_hops[k] & RTE_LPM_LOOKUP_SUCCESS))) count++; } total_time += rte_rdtsc() - begin; } printf("BULK LPM Lookup: %.1f cycles (fails = %.1f%%)\n", (double)total_time / ((double)ITERATIONS * BATCH_SIZE), (count * 100.0) / (double)(ITERATIONS * BATCH_SIZE)); /* Measure LookupX4 */ total_time = 0; count = 0; for (i = 0; i < ITERATIONS; i++) { static uint32_t ip_batch[BATCH_SIZE]; uint16_t next_hops[4]; /* Create array of random IP addresses */ for (j = 0; j < BATCH_SIZE; j++) ip_batch[j] = rte_rand(); /* Lookup per batch */ begin = rte_rdtsc(); for (j = 0; j < BATCH_SIZE; j += RTE_DIM(next_hops)) { unsigned k; __m128i ipx4; ipx4 = _mm_loadu_si128((__m128i *)(ip_batch + j)); ipx4 = *(__m128i *)(ip_batch + j); rte_lpm_lookupx4(lpm, ipx4, next_hops, UINT16_MAX); for (k = 0; k < RTE_DIM(next_hops); k++) if (unlikely(next_hops[k] == UINT16_MAX)) count++; } total_time += rte_rdtsc() - begin; } printf("LPM LookupX4: %.1f cycles (fails = %.1f%%)\n", (double)total_time / ((double)ITERATIONS * BATCH_SIZE), (count * 100.0) / (double)(ITERATIONS * BATCH_SIZE)); /* Delete */ status = 0; begin = rte_rdtsc(); for (i = 0; i < NUM_ROUTE_ENTRIES; i++) { /* rte_lpm_delete(lpm, ip, depth) */ status += rte_lpm_delete(lpm, large_route_table[i].ip, large_route_table[i].depth); } total_time += rte_rdtsc() - begin; printf("Average LPM Delete: %g cycles\n", (double)total_time / NUM_ROUTE_ENTRIES); rte_lpm_delete_all(lpm); rte_lpm_free(lpm); return PASS; }
static int test_memzone_reserve_max_aligned(void) { const struct rte_memzone *mz; const struct rte_config *config; const struct rte_memseg *ms; int memseg_idx = 0; int memzone_idx = 0; uintptr_t addr_offset; size_t len = 0; void* last_addr; size_t maxlen = 0; /* random alignment */ rte_srand((unsigned)rte_rdtsc()); const unsigned align = 1 << ((rte_rand() % 8) + 5); /* from 128 up to 4k alignment */ /* get pointer to global configuration */ config = rte_eal_get_configuration(); ms = rte_eal_get_physmem_layout(); addr_offset = 0; for (memseg_idx = 0; memseg_idx < RTE_MAX_MEMSEG; memseg_idx++){ /* ignore smaller memsegs as they can only get smaller */ if (ms[memseg_idx].len < maxlen) continue; /* align everything */ last_addr = RTE_PTR_ALIGN_CEIL(ms[memseg_idx].addr, RTE_CACHE_LINE_SIZE); len = ms[memseg_idx].len - RTE_PTR_DIFF(last_addr, ms[memseg_idx].addr); len &= ~((size_t) RTE_CACHE_LINE_MASK); /* cycle through all memzones */ for (memzone_idx = 0; memzone_idx < RTE_MAX_MEMZONE; memzone_idx++) { /* stop when reaching last allocated memzone */ if (config->mem_config->memzone[memzone_idx].addr == NULL) break; /* check if the memzone is in our memseg and subtract length */ if ((config->mem_config->memzone[memzone_idx].addr >= ms[memseg_idx].addr) && (config->mem_config->memzone[memzone_idx].addr < (RTE_PTR_ADD(ms[memseg_idx].addr, ms[memseg_idx].len)))) { /* since the zones can now be aligned and occasionally skip * some space, we should calculate the length based on * reported length and start addresses difference. */ len -= (uintptr_t) RTE_PTR_SUB( config->mem_config->memzone[memzone_idx].addr, (uintptr_t) last_addr); len -= config->mem_config->memzone[memzone_idx].len; last_addr = RTE_PTR_ADD(config->mem_config->memzone[memzone_idx].addr, (size_t) config->mem_config->memzone[memzone_idx].len); } } /* make sure we get the alignment offset */ if (len > maxlen) { addr_offset = RTE_PTR_ALIGN_CEIL((uintptr_t) last_addr, align) - (uintptr_t) last_addr; maxlen = len; } } if (maxlen == 0 || maxlen == addr_offset) { printf("There is no space left for biggest %u-aligned memzone!\n", align); return 0; } maxlen -= addr_offset; mz = rte_memzone_reserve_aligned("max_zone_aligned", 0, SOCKET_ID_ANY, 0, align); if (mz == NULL){ printf("Failed to reserve a big chunk of memory\n"); rte_dump_physmem_layout(stdout); rte_memzone_dump(stdout); return -1; } if (mz->len != maxlen) { printf("Memzone reserve with 0 size and alignment %u did not return" " bigest block\n", align); printf("Expected size = %zu, actual size = %zu\n", maxlen, mz->len); rte_dump_physmem_layout(stdout); rte_memzone_dump(stdout); return -1; } return 0; }
static int test_reciprocal(void) { int result = 0; uint32_t divisor_u32 = 0; uint32_t dividend_u32; uint32_t nresult_u32; uint32_t rresult_u32; uint64_t i, j; uint64_t divisor_u64 = 0; uint64_t dividend_u64; uint64_t nresult_u64; uint64_t rresult_u64; struct rte_reciprocal reci_u32 = {0}; struct rte_reciprocal_u64 reci_u64 = {0}; rte_srand(rte_rdtsc()); printf("Validating unsigned 32bit division.\n"); for (i = 0; i < MAX_ITERATIONS; i++) { /* Change divisor every DIVIDE_ITER iterations. */ if (i % DIVIDE_ITER == 0) { divisor_u32 = rte_rand(); reci_u32 = rte_reciprocal_value(divisor_u32); } dividend_u32 = rte_rand(); nresult_u32 = dividend_u32 / divisor_u32; rresult_u32 = rte_reciprocal_divide(dividend_u32, reci_u32); if (nresult_u32 != rresult_u32) { printf("Division failed, %"PRIu32"/%"PRIu32" = " "expected %"PRIu32" result %"PRIu32"\n", dividend_u32, divisor_u32, nresult_u32, rresult_u32); result = 1; break; } } printf("Validating unsigned 64bit division.\n"); for (i = 0; i < MAX_ITERATIONS; i++) { /* Change divisor every DIVIDE_ITER iterations. */ if (i % DIVIDE_ITER == 0) { divisor_u64 = rte_rand(); reci_u64 = rte_reciprocal_value_u64(divisor_u64); } dividend_u64 = rte_rand(); nresult_u64 = dividend_u64 / divisor_u64; rresult_u64 = rte_reciprocal_divide_u64(dividend_u64, &reci_u64); if (nresult_u64 != rresult_u64) { printf("Division failed, %"PRIu64"/%"PRIu64" = " "expected %"PRIu64" result %"PRIu64"\n", dividend_u64, divisor_u64, nresult_u64, rresult_u64); result = 1; break; } } printf("Validating unsigned 64bit division with 32bit divisor.\n"); for (i = 0; i < MAX_ITERATIONS; i++) { /* Change divisor every DIVIDE_ITER iterations. */ if (i % DIVIDE_ITER == 0) { divisor_u64 = rte_rand() >> 32; reci_u64 = rte_reciprocal_value_u64(divisor_u64); } dividend_u64 = rte_rand(); nresult_u64 = dividend_u64 / divisor_u64; rresult_u64 = rte_reciprocal_divide_u64(dividend_u64, &reci_u64); if (nresult_u64 != rresult_u64) { printf("Division failed, %"PRIu64"/%"PRIu64" = " "expected %"PRIu64" result %"PRIu64"\n", dividend_u64, divisor_u64, nresult_u64, rresult_u64); result = 1; break; } }
int32_t perf_test(void) { struct rte_lpm6 *lpm = NULL; struct rte_lpm6_config config; uint64_t begin, total_time; unsigned i, j; uint8_t next_hop_add = 0xAA, next_hop_return = 0; int status = 0; int64_t count = 0; config.max_rules = 1000000; config.number_tbl8s = NUMBER_TBL8S; config.flags = 0; rte_srand(rte_rdtsc()); printf("No. routes = %u\n", (unsigned) NUM_ROUTE_ENTRIES); print_route_distribution(large_route_table, (uint32_t) NUM_ROUTE_ENTRIES); lpm = rte_lpm6_create(__func__, SOCKET_ID_ANY, &config); TEST_LPM_ASSERT(lpm != NULL); /* Measure add. */ begin = rte_rdtsc(); for (i = 0; i < NUM_ROUTE_ENTRIES; i++) { if (rte_lpm6_add(lpm, large_route_table[i].ip, large_route_table[i].depth, next_hop_add) == 0) status++; } /* End Timer. */ total_time = rte_rdtsc() - begin; printf("Unique added entries = %d\n", status); printf("Average LPM Add: %g cycles\n", (double)total_time / NUM_ROUTE_ENTRIES); /* Measure single Lookup */ total_time = 0; count = 0; for (i = 0; i < ITERATIONS; i ++) { begin = rte_rdtsc(); for (j = 0; j < NUM_IPS_ENTRIES; j ++) { if (rte_lpm6_lookup(lpm, large_ips_table[j].ip, &next_hop_return) != 0) count++; } total_time += rte_rdtsc() - begin; } printf("Average LPM Lookup: %.1f cycles (fails = %.1f%%)\n", (double)total_time / ((double)ITERATIONS * BATCH_SIZE), (count * 100.0) / (double)(ITERATIONS * BATCH_SIZE)); /* Measure bulk Lookup */ total_time = 0; count = 0; uint8_t ip_batch[NUM_IPS_ENTRIES][16]; int16_t next_hops[NUM_IPS_ENTRIES]; for (i = 0; i < NUM_IPS_ENTRIES; i++) memcpy(ip_batch[i], large_ips_table[i].ip, 16); for (i = 0; i < ITERATIONS; i ++) { /* Lookup per batch */ begin = rte_rdtsc(); rte_lpm6_lookup_bulk_func(lpm, ip_batch, next_hops, NUM_IPS_ENTRIES); total_time += rte_rdtsc() - begin; for (j = 0; j < NUM_IPS_ENTRIES; j++) if (next_hops[j] < 0) count++; } printf("BULK LPM Lookup: %.1f cycles (fails = %.1f%%)\n", (double)total_time / ((double)ITERATIONS * BATCH_SIZE), (count * 100.0) / (double)(ITERATIONS * BATCH_SIZE)); /* Delete */ status = 0; begin = rte_rdtsc(); for (i = 0; i < NUM_ROUTE_ENTRIES; i++) { /* rte_lpm_delete(lpm, ip, depth) */ status += rte_lpm6_delete(lpm, large_route_table[i].ip, large_route_table[i].depth); } total_time += rte_rdtsc() - begin; printf("Average LPM Delete: %g cycles\n", (double)total_time / NUM_ROUTE_ENTRIES); rte_lpm6_delete_all(lpm); rte_lpm6_free(lpm); return PASS; }