int main(int argc, char **argv) { global_options(&argc, &argv); if (argc < 5) { print_help(); m4ri_die(""); } struct smallops_params params; params.k = atoi(argv[1]); params.m = atoi(argv[2]); params.n = atoi(argv[3]); srandom(17); unsigned long long data[2]; if(strcmp(argv[4],"mzed_slice") == 0) { run_bench(run_mzed_slice, (void*)¶ms, data, 2); } else if(strcmp(argv[4],"mzed_cling") == 0) { run_bench(run_mzed_cling, (void*)¶ms, data, 2); } else if(strcmp(argv[4],"mzed_add") == 0) { run_bench(run_mzed_add, (void*)¶ms, data, 2); } double cc_per_op = ((double)data[1])/ ( (double)params.m * (double)params.n ); printf("%s: m: %5d, n: %5d, cpu cycles: %10llu, cc/(mn): %.5lf, wall time: %lf\n", argv[4], params.m, params.n, data[1], cc_per_op, data[0] / 1000000.0); }
static void bench_st_policy( ilka_bench_fn_t fn, void *ctx, size_t n, size_t threads, double *dist) { (void) threads; struct ilka_bench bench = { 0 }; *dist = run_bench(&bench, fn, ctx, 0, n); }
int main(int argc, char *argv[]) { argp_parse (&argp, argc, argv, 0, 0, NULL); init(); printf("Data size: %zu\n", bench_size); printf("Seed: %" PRIu64 "\n", lcg_state); printf("Iterations: %u\n", bench_settings.iterations); run_bench(); return 0; }
int main(int argc, char **argv) { global_options(&argc, &argv); if (argc != 3) { m4ri_die("Parameters m, n expected.\n"); } struct trsm_params p; p.m = atoi(argv[1]); p.n = atoi(argv[2]); srandom(17); unsigned long long data[2]; run_bench(run, (void*)&p, data, 2); printf("m: %5d, n: %5d, cpu cycles: %llu wall time: %lf\n", p.m, p.n, data[1], data[0] / 1000000.0); }
int main(int argc, char *argv[]) { struct cmd_opt opt = {NULL, 0, 0, 0, 0, NULL}; struct bench *bench; /* parse command line options */ if (parse_option(argc, argv, &opt) < 4) { usage(stderr); exit(1); } /* create, initialize, and run a bench */ bench = alloc_bench(opt.ncore, opt.nbg); init_bench(bench, &opt); run_bench(bench); report_bench(bench, stdout); return 0; }
static void run_collection(struct collection *coll) { struct bench *bench; const char *argv[2]; argv[1] = NULL; /* * TODO: * * Preparing preset parameters for * embedded, ordinary PC, HPC, etc... * would be helpful. */ for_each_bench(coll, bench) { if (!bench->fn) break; printf("# Running %s/%s benchmark...\n", coll->name, bench->name); fflush(stdout); argv[1] = bench->name; run_bench(coll->name, bench->name, bench->fn, 1, argv, NULL); printf("\n"); } }
static void *alloc_mmap(void *address){ void *map_address, *best_address; BLASULONG best, start, current; BLASULONG allocsize; if (address){ /* Just give up use advanced operation */ map_address = mmap(address, BUFFER_SIZE, MMAP_ACCESS, MMAP_POLICY | MAP_FIXED, -1, 0); #ifdef OS_LINUX my_mbind(map_address, BUFFER_SIZE, MPOL_PREFERRED, NULL, 0, 0); #endif } else { #if defined(OS_LINUX) && !defined(NO_WARMUP) if (hot_alloc == 0) { map_address = mmap(NULL, BUFFER_SIZE, MMAP_ACCESS, MMAP_POLICY, -1, 0); #ifdef OS_LINUX my_mbind(map_address, BUFFER_SIZE, MPOL_PREFERRED, NULL, 0, 0); #endif } else { #endif map_address = mmap(NULL, BUFFER_SIZE * SCALING, MMAP_ACCESS, MMAP_POLICY, -1, 0); if (map_address != (void *)-1) { #ifdef OS_LINUX #ifdef DEBUG int ret=0; ret=my_mbind(map_address, BUFFER_SIZE * SCALING, MPOL_PREFERRED, NULL, 0, 0); if(ret==-1){ int errsv=errno; perror("OpenBLAS alloc_mmap:"); printf("error code=%d,\tmap_address=%lx\n",errsv,map_address); } #else my_mbind(map_address, BUFFER_SIZE * SCALING, MPOL_PREFERRED, NULL, 0, 0); #endif #endif allocsize = DGEMM_P * DGEMM_Q * sizeof(double); start = (BLASULONG)map_address; current = (SCALING - 1) * BUFFER_SIZE; while(current > 0) { *(long *)start = (long)start + PAGESIZE; start += PAGESIZE; current -= PAGESIZE; } *(long *)(start - PAGESIZE) = (BLASULONG)map_address; start = (BLASULONG)map_address; best = (BLASULONG)-1; best_address = map_address; while ((start + allocsize < (BLASULONG)map_address + (SCALING - 1) * BUFFER_SIZE)) { current = run_bench(start, allocsize); if (best > current) { best = current; best_address = (void *)start; } start += PAGESIZE; } if ((BLASULONG)best_address > (BLASULONG)map_address) munmap(map_address, (BLASULONG)best_address - (BLASULONG)map_address); munmap((void *)((BLASULONG)best_address + BUFFER_SIZE), (SCALING - 1) * BUFFER_SIZE + (BLASULONG)map_address - (BLASULONG)best_address); map_address = best_address; #if defined(OS_LINUX) && !defined(NO_WARMUP) hot_alloc = 2; #endif } } #if defined(OS_LINUX) && !defined(NO_WARMUP) } #endif if (map_address != (void *)-1) { release_info[release_pos].address = map_address; release_info[release_pos].func = alloc_mmap_free; release_pos ++; } return map_address; }
int main() { //dumb(); //sanity_check(); int err = 0; err += check_sincos_precision(0., 1.0); err += check_sincos_precision(-1000, 1000); err += check_explog_precision(-60, 60); if (err) { printf("some precision tests have failed\n"); } check_special_values(); run_bench("sinf", bench_sinf); run_bench("cosf", bench_cosf); #ifdef HAVE_SINCOS_X86_FPU run_bench("sincos (x87)", bench_stupid_sincos_x86_fpu); #endif run_bench("expf", bench_expf); run_bench("logf", bench_logf); run_bench("cephes_sinf", bench_cephes_sinf); run_bench("cephes_cosf", bench_cephes_cosf); run_bench("cephes_expf", bench_cephes_expf); run_bench("cephes_logf", bench_cephes_logf); run_bench("sin_ps", bench_sin_ps); run_bench("cos_ps", bench_cos_ps); run_bench("sincos_ps", bench_stupid_sincos_ps); run_bench("exp_ps", bench_exp_ps); run_bench("log_ps", bench_log_ps); #ifdef HAVE_VECLIB run_bench("vsinf", bench_vsinf); run_bench("vcosf", bench_vcosf); run_bench("vexpf", bench_vexpf); run_bench("vlogf", bench_vlogf); #endif #ifdef HAVE_ACML run_bench("acml vrs4_sinf", bench___vrs4_sinf); run_bench("acml vrs4_cosf", bench___vrs4_cosf); run_bench("acml vrs4_expf", bench___vrs4_expf); run_bench("acml vrs4_logf", bench___vrs4_logf); #endif return err; }
int main(int argc, char **argv) { int opts = global_options(&argc, &argv); int data_len; #ifdef HAVE_LIBPAPI int papi_counters = PAPI_num_counters(); if (papi_counters < papi_array_len) { fprintf(stderr, "%s: Warning: there are only %d hardware counters available!\n", progname, papi_counters); papi_array_len = papi_counters; } if (papi_test(papi_events, papi_array_len)) exit(1); for (int nv = 0; nv <= papi_array_len; ++nv) loop_calibration[nv] = 100000000; data_len = papi_array_len + 1; #else data_len = 2; #endif if (opts < 0 || argc < 2 || argc > 5) { print_help_and_exit(); } struct elim_params params; params.m = atoi(argv[1]); if (argc >= 3) params.n = atoi(argv[2]); else params.n = params.m; if (argc >= 4) params.algorithm = argv[3]; else params.algorithm = "ple"; if (argc >= 5) params.r = atoi(argv[4]); else params.r = MIN(params.m, params.n); srandom(17); unsigned long long data[16]; for (int i = 0; i < 4; ++i) run_nothing((void*)¶ms, data, &data_len); run_bench(run, (void*)¶ms, data, data_len); double cc_per_op = ((double)data[1])/ ( (double)params.m * (double)params.n * powl((double)params.r,0.807) ); printf("m: %5d, n: %5d, last r: %5d, cpu cycles: %12llu, cc/(mnr^0.807): %.5lf, ", params.m, params.n, params.r, data[1], cc_per_op); print_wall_time(data[0] / 1000000.0); printf(", "); print_cpu_time(data[1] / (double)cpucycles_persecond()); printf("\n"); #ifdef HAVE_LIBPAPI for (int n = 1; n < data_len; ++n) { double tmp = ((double)data[n]) / powl((double)params.n,2.807); printf("%20s (%20llu) per bit (divided by n^2.807): %15.5f\n", papi_event_name(papi_events[n - 1]), data[n], tmp); } #endif }
int main(int argc, char **argv) { int block_size = 1024; int nloop = 1024; int num_thread = 2; int opt, ti; int test_id = 0; struct prog_arg prog_arg; struct thread_data *thread_data; size_t mem_size, a_block_size; int verbose = 0; double tb, te; long long cb, ce; while ((opt = getopt(argc, argv, "n:b:t:o:v")) != -1) { switch (opt) { case 'n': nloop = atoi(optarg); break; case 'b': block_size = atoi(optarg); break; case 't': num_thread = atoi(optarg); break; case 'o': { int i; for (i=0; i<NUM_BENCH; i++) { if (strcmp(bench_list[i].name, optarg) == 0) { test_id = i; break; } } if (i == NUM_BENCH) { printf("invalid test name : %s\n", optarg); usage(); } } break; case 'v': verbose = 1; break; default: usage(); break; } } thread_data = malloc(sizeof(*thread_data) * num_thread); a_block_size = ALIGN_UP(block_size, 64); prog_arg.block_size_op = block_size; prog_arg.block_size = a_block_size; prog_arg.nloop = nloop; prog_arg.nthread = num_thread; if (verbose) { printf("op=%s, nloop=%d, block_size=%d, num_thread=%d\n", bench_list[test_id].name, nloop, block_size, num_thread); } mem_size = a_block_size * num_thread; prog_arg.mem1 = memalign(64, mem_size); prog_arg.mem2 = memalign(64, mem_size); prog_arg.mem3 = memalign(64, mem_size); memset(prog_arg.mem1, 0xff, mem_size); memset(prog_arg.mem2, 0xff, mem_size); memset(prog_arg.mem3, 0xff, mem_size); if (bench_list[test_id].flags & SINGLE_THREAD) { bench_list[test_id].func(&prog_arg, NULL); } else { for (ti=0; ti<num_thread; ti++) { thread_data[ti].tid = ti; } tb = get_sec(); for (ti=0; ti<num_thread; ti++) { run_bench(&thread_data[ti], &bench_list[test_id], &prog_arg); } for (ti=0; ti<num_thread; ti++) { pthread_join(thread_data[ti].t, NULL); } te = get_sec(); bench_list[test_id].dump(&prog_arg, te-tb, 0); } }
int cmd_bench(int argc, const char **argv) { struct collection *coll; int ret = 0; if (argc < 2) { /* No collection specified. */ print_usage(); goto end; } argc = parse_options(argc, argv, bench_options, bench_usage, PARSE_OPT_STOP_AT_NON_OPTION); bench_format = bench_str2int(bench_format_str); if (bench_format == BENCH_FORMAT_UNKNOWN) { printf("Unknown format descriptor: '%s'\n", bench_format_str); goto end; } if (bench_repeat == 0) { printf("Invalid repeat option: Must specify a positive value\n"); goto end; } if (argc < 1) { print_usage(); goto end; } if (!strcmp(argv[0], "all")) { run_all_collections(); goto end; } for_each_collection(coll) { struct bench *bench; if (strcmp(coll->name, argv[0])) continue; if (argc < 2) { /* No bench specified. */ dump_benchmarks(coll); goto end; } if (!strcmp(argv[1], "all")) { run_collection(coll); goto end; } for_each_bench(coll, bench) { if (strcmp(bench->name, argv[1])) continue; if (bench_format == BENCH_FORMAT_DEFAULT) printf("# Running '%s/%s' benchmark:\n", coll->name, bench->name); fflush(stdout); ret = run_bench(coll->name, bench->name, bench->fn, argc-1, argv+1); goto end; } if (!strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")) { dump_benchmarks(coll); goto end; } printf("Unknown benchmark: '%s' for collection '%s'\n", argv[1], argv[0]); ret = 1; goto end; } printf("Unknown collection: '%s'\n", argv[0]); ret = 1; end: return ret; }