main() { uint64 enough; double timing, loop; enough = get_enough(0); printf("ENOUGH=%lu\n", (unsigned long)enough); fflush(stdout); timing = t_overhead(); printf("TIMING_OVERHEAD=%f\n", timing); fflush(stdout); loop = l_overhead(); printf("LOOP_OVERHEAD=%f\n", loop); printf("# version %d.%d\n", MAJOR, MINOR); exit(0); }
/* * Assumptions: * * 1) Cache lines are a multiple of pointer-size words * 2) Cache lines are no larger than 1/8 of a page (typically 512 bytes) * 3) Pages are an even multiple of cache lines */ int main(int ac, char **av) { int i; int c; int warmup = 0; int repetitions = (1000000 <= get_enough(0) ? 1 : TRIES); size_t maxlen = 64 * 1024 * 1024; double par; struct mem_state state; char *usage = "[-L <line size>] [-M len[K|M]] [-W <warmup>] [-N <repetitions>]\n"; state.line = getpagesize() / 16; state.pagesize = getpagesize(); while (( c = getopt(ac, av, "L:M:W:N:")) != EOF) { switch(c) { case 'L': state.line = atoi(optarg); if (state.line < sizeof(char*)) state.line = sizeof(char*); break; case 'M': maxlen = bytes(optarg); break; case 'W': warmup = atoi(optarg); break; case 'N': repetitions = atoi(optarg); break; default: lmbench_usage(ac, av, usage); break; } } for (i = MAX_MEM_PARALLELISM * state.line; i <= maxlen; i<<=1) { par = par_mem(i, warmup, repetitions, &state); if (par > 0.) { fprintf(stderr, "%.6f %.2f\n", i / (1000. * 1000.), par); } } exit(0); }
/* * Assumptions: * * 1) Cache lines are a multiple of pointer-size words * 2) Cache lines are no larger than 1/4 a page size * 3) Pages are an even multiple of cache lines */ int main(int ac, char **av) { int l; int verbose = 0; int warmup = 0; int repetitions = (1000000 <= get_enough(0) ? 1 : TRIES); int c; size_t maxlen = 64 * 1024 * 1024; struct mem_state state; char *usage = "[-v] [-W <warmup>] [-N <repetitions>][-M len[K|M]]\n"; state.line = sizeof(char*); state.pagesize = getpagesize(); while (( c = getopt(ac, av, "avM:W:N:")) != EOF) { switch(c) { case 'v': verbose = 1; break; case 'M': maxlen = bytes(optarg); break; case 'W': warmup = atoi(optarg); break; case 'N': repetitions = atoi(optarg); break; default: lmbench_usage(ac, av, usage); break; } } if ((l = line_find(maxlen, warmup, repetitions, &state)) > 0) { if (verbose) { printf("cache line size: %d bytes\n", l); } else { printf("%d\n", l); } } return (0); }
int main(int ac, char **av) { int c; int warmup = 0; int repetitions = (1000000 <= get_enough(0) ? 1 : TRIES); double par; struct _state state; char *usage = "[-W <warmup>] [-N <repetitions>]\n"; state.N = 1; state.M = 1000; state.K = -1023; while (( c = getopt(ac, av, "W:N:")) != EOF) { switch(c) { case 'W': warmup = atoi(optarg); break; case 'N': repetitions = atoi(optarg); break; default: lmbench_usage(ac, av, usage); break; } } handle_scheduler(benchmp_childid(), 0, 0); par = max_parallelism(integer_bit_benchmarks, warmup, repetitions, &state); if (par > 0.) fprintf(stderr, "integer bit parallelism: %.2f\n", par); par = max_parallelism(integer_add_benchmarks, warmup, repetitions, &state); if (par > 0.) fprintf(stderr, "integer add parallelism: %.2f\n", par); par = max_parallelism(integer_mul_benchmarks, warmup, repetitions, &state); if (par > 0.) fprintf(stderr, "integer mul parallelism: %.2f\n", par); par = max_parallelism(integer_div_benchmarks, warmup, repetitions, &state); if (par > 0.) fprintf(stderr, "integer div parallelism: %.2f\n", par); par = max_parallelism(integer_mod_benchmarks, warmup, repetitions, &state); if (par > 0.) fprintf(stderr, "integer mod parallelism: %.2f\n", par); par = max_parallelism(int64_bit_benchmarks, warmup, repetitions, &state); if (par > 0.) fprintf(stderr, "int64 bit parallelism: %.2f\n", par); par = max_parallelism(int64_add_benchmarks, warmup, repetitions, &state); if (par > 0.) fprintf(stderr, "int64 add parallelism: %.2f\n", par); par = max_parallelism(int64_mul_benchmarks, warmup, repetitions, &state); if (par > 0.) fprintf(stderr, "int64 mul parallelism: %.2f\n", par); par = max_parallelism(int64_div_benchmarks, warmup, repetitions, &state); if (par > 0.) fprintf(stderr, "int64 div parallelism: %.2f\n", par); par = max_parallelism(int64_mod_benchmarks, warmup, repetitions, &state); if (par > 0.) fprintf(stderr, "int64 mod parallelism: %.2f\n", par); par = max_parallelism(float_add_benchmarks, warmup, repetitions, &state); if (par > 0.) fprintf(stderr, "float add parallelism: %.2f\n", par); par = max_parallelism(float_mul_benchmarks, warmup, repetitions, &state); if (par > 0.) fprintf(stderr, "float mul parallelism: %.2f\n", par); par = max_parallelism(float_div_benchmarks, warmup, repetitions, &state); if (par > 0.) fprintf(stderr, "float div parallelism: %.2f\n", par); par = max_parallelism(double_add_benchmarks, warmup, repetitions, &state); if (par > 0.) fprintf(stderr, "double add parallelism: %.2f\n", par); par = max_parallelism(double_mul_benchmarks, warmup, repetitions, &state); if (par > 0.) fprintf(stderr, "double mul parallelism: %.2f\n", par); par = max_parallelism(double_div_benchmarks, warmup, repetitions, &state); if (par > 0.) fprintf(stderr, "double div parallelism: %.2f\n", par); return(0); }
void benchmp(benchmp_f initialize, benchmp_f benchmark, benchmp_f cleanup, int enough, int parallel, int warmup, int repetitions, void* cookie) { iter_t iterations = 1; long i; pid_t *pids = NULL; int response[2]; int start_signal[2]; int result_signal[2]; int exit_signal[2]; #ifdef _DEBUG fprintf(stderr, "benchmp(%p, %p, %p, %d, %d, %d, %d, %p): entering\n", initialize, benchmark, cleanup, enough, parallel, warmup, repetitions, cookie); #endif enough = get_enough(enough); #ifdef _DEBUG fprintf(stderr, "\tenough=%d\n", enough); #endif if (repetitions < 0) repetitions = (1 < parallel || 1000000 <= enough ? 1 : TRIES); /* initialize results */ settime(0); save_n(1); if (parallel > 1) { /* Compute the baseline performance */ benchmp(initialize, benchmark, cleanup, enough, 1, warmup, repetitions, cookie); /* if we can't even do a single job, then give up */ if (gettime() == 0) return; /* calculate iterations for 1sec runtime */ iterations = get_n(); if (enough < SHORT) { double tmp = (double)SHORT * (double)get_n(); tmp /= (double)gettime(); iterations = (iter_t)tmp + 1; } settime(0); save_n(1); } /* Create the necessary pipes for control */ if (pipe(response) < 0 || pipe(start_signal) < 0 || pipe(result_signal) < 0 || pipe(exit_signal) < 0) { #ifdef _DEBUG fprintf(stderr, "BENCHMP: Could not create control pipes\n"); #endif /* _DEBUG */ return; } /* fork the necessary children */ benchmp_sigchld_received = 0; benchmp_sigterm_received = 0; benchmp_sigterm_handler = signal(SIGTERM, benchmp_sigterm); benchmp_sigchld_handler = signal(SIGCHLD, benchmp_sigchld); pids = (pid_t*)malloc(parallel * sizeof(pid_t)); if (!pids) return; bzero((void*)pids, parallel * sizeof(pid_t)); for (i = 0; i < parallel; ++i) { if (benchmp_sigterm_received) goto error_exit; #ifdef _DEBUG fprintf(stderr, "benchmp(%p, %p, %p, %d, %d, %d, %d, %p): creating child %d\n", initialize, benchmark, cleanup, enough, parallel, warmup, repetitions, cookie, i); #endif switch(pids[i] = fork()) { case -1: /* could not open enough children! */ #ifdef _DEBUG fprintf(stderr, "BENCHMP: fork() failed!\n"); #endif /* _DEBUG */ goto error_exit; case 0: /* If child */ close(response[0]); close(start_signal[1]); close(result_signal[1]); close(exit_signal[1]); handle_scheduler(i, 0, 0); benchmp_child(initialize, benchmark, cleanup, i, response[1], start_signal[0], result_signal[0], exit_signal[0], enough, iterations, parallel, repetitions, cookie ); exit(0); default: break; } } close(response[1]); close(start_signal[0]); close(result_signal[0]); close(exit_signal[0]); benchmp_parent(response[0], start_signal[1], result_signal[1], exit_signal[1], pids, parallel, iterations, warmup, repetitions, enough ); goto cleanup_exit; error_exit: /* give the children a chance to clean up gracefully */ signal(SIGCHLD, SIG_DFL); while (--i >= 0) { kill(pids[i], SIGTERM); waitpid(pids[i], NULL, 0); } cleanup_exit: /* * Clean up and kill all children * * NOTE: the children themselves SHOULD exit, and * Killing them could prevent them from * cleanup up subprocesses, etc... So, we only * want to kill child processes when it appears * that they will not die of their own accord. * We wait twice the timing interval plus two seconds * for children to die. If they haven't died by * that time, then we start killing them. */ benchmp_sigalrm_timeout = (int)((2 * enough)/1000000) + 2; if (benchmp_sigalrm_timeout < 5) benchmp_sigalrm_timeout = 5; signal(SIGCHLD, SIG_DFL); while (i-- > 0) { /* wait timeout seconds for child to die, then kill it */ benchmp_sigalrm_pid = pids[i]; benchmp_sigalrm_handler = signal(SIGALRM, benchmp_sigalrm); alarm(benchmp_sigalrm_timeout); waitpid(pids[i], NULL, 0); alarm(0); signal(SIGALRM, benchmp_sigalrm_handler); } if (pids) free(pids); #ifdef _DEBUG fprintf(stderr, "benchmp(0x%x, 0x%x, 0x%x, %d, %d, 0x%x): exiting\n", (unsigned int)initialize, (unsigned int)benchmark, (unsigned int)cleanup, enough, parallel, (unsigned int)cookie); #endif }