int main(void) { static const size_t default_initial_size = 655360; struct p_bench_specification spec; char *raw_mem = NULL; spec.current_size = default_initial_size; setup_memory(&spec.mem, &raw_mem, spec.current_size); for (const struct p_bench_item *item = benchmark_items; item->name != NULL; ++item) { struct item_data data; item_preface(&data, item); item->benchmark(&spec); item_done(&data, &spec, item->name); } return EXIT_SUCCESS; }
struct item_data item_bench(const struct p_bench_item *item, struct p_bench_specification *spec) { struct item_data data, best = { .end = ~(0ULL) }; uint64_t item_start_time = platform_clock(); /* Warm up caches, branch predictors etc. */ /* Calculate inner loop. */ int inner_loop; data.start = platform_clock(); for (int i = 0; i < 50; i++) item->benchmark(spec); data.end = platform_clock(); { float tmp = data.end - data.start; tmp /= 50.0f; /* 50k us seems to work */ tmp = 50000.0f / tmp; inner_loop = ceilf(tmp); } /* Repeat tests to get more stable results between runs */ while (true) { /* Measure 10 iterations so the clocksource's resolution doesn't * play tricks on us */ data.start = platform_clock(); for (int j = 0; j < inner_loop; j++) item->benchmark(spec); data.end = platform_clock(); /* Use best measurement */ if (best.end - best.start > data.end - data.start) best = data; /* Test each function for 1/2 second */ if (data.end - item_start_time >= 500000000ULL) break; } { /* Adjust for iterations in inner loop above */ float tmp = best.end - best.start; tmp /= (float) inner_loop; best.end = best.start + tmp; } return best; } int main(void) { struct p_bench_specification spec = { 0 }; char *raw_mem = NULL; spec.current_size = MAX_ELEMS; setup_memory(&spec.mem, &raw_mem, spec.current_size); bench_printf(";name, size, duration (ns)\n"); for (const struct p_bench_item *item = benchmark_items; item->name != NULL; ++item) { struct item_data best; bool consistent = false; best = item_bench(item, &spec); for (int tries = 0; tries < 50; tries++) { struct item_data snd; float fst_time, snd_time; /* Benchmark again ... */ snd = item_bench(item, &spec); fst_time = best.end - best.start; snd_time = snd.end - snd.start; /* ... and start over if results deviate too much */ if (fst_time / snd_time < 0.995 || snd_time / fst_time < 0.995) { /* Take average so abnormally low results converge over time */ best.end += (snd_time - fst_time) / 2.0f; usleep(100000); continue; } if (fst_time > snd_time) best = snd; consistent = true; break; } if (!consistent) { fprintf(stderr, ";WARNING: %s not consistent\n", item->name); fflush(stderr); } item_done(&best, &spec, item->name); } return EXIT_SUCCESS; } #else /* __epiphany__ */ int main(void) { struct p_bench_specification spec = { 0 }; char *raw_mem = NULL; spec.current_size = MAX_ELEMS; uint32_t nbench = 0; setup_memory(&spec.mem, &raw_mem, spec.current_size); bench_printf(";name, size, duration (ns)\n"); for (const struct p_bench_item *item = benchmark_items; item->name != NULL; ++item) { struct item_data data; data.start = platform_clock(); item->benchmark(&spec); data.end = platform_clock(); strcpy(epiphany_results[nbench].name, item->name); epiphany_results[nbench].ns = data.end - data.start; epiphany_results[nbench].size = (uint64_t) spec.current_size; nbench++; epiphany_status->nbench = nbench; } epiphany_status->done = 1; return EXIT_SUCCESS; } #endif static void setup_output_pointers(struct p_bench_raw_memory *mem, void *p) { /* Assume largest type is 64 bits */ /* TODO: All pointers point to same memory region so output will be bogus */ mem->o1.p_u64 = p; mem->o2.p_u64 = p; mem->o3.p_u64 = p; mem->o4.p_u64 = p; }