int job(void) { int i = 0; long j = 0; #ifdef DEBUG double start, mytime; #endif /* Do real-time calculation. */ for (i = 0; i < wcet; ++i) { #ifdef DEBUG start = wctime(); #endif for ( j = 0; j < 490000l; ++j ) sqrt((double)j*(j+1)); #ifdef DEBUG mytime = wctime()-start; printf("Duration:\t%f\n",mytime); #endif } --count; if (count > 0) return 0; // don't exit else return 1; // exit /* Don't exit. */ //return 0; }
int ss_start(int work_size, int chunk_size) { StealStack* s = &stealStack; double t1, t2; s->work_size = work_size; s->chunk_size = chunk_size; if (polling_interval == 0) { // Set a default polling interval polling_interval = default_polling_interval; } if (comm_rank == 0) printf("Work-Sharing release interval = %d\n", polling_interval); if(comm_rank == getWorkQueueId()){ //this thread does not do real work t1 = wctime(); doWorkQueueManager(comm_size - 1, s); t2 = wctime(); s->walltime = t2 - t1; #ifdef TRACE s->startTime = t1; s->sessionRecords[SS_IDLE][s->entries[SS_IDLE] - 1].endTime = t2; #endif return 0; } return 1; }
static void fine_tune(double interval) { double start, end, delta; start = wctime(); loop_for(interval); end = wctime(); delta = (end - start - interval) / interval; if (delta != 0) loop_length = loop_length / (1 - delta); }
int main (int argc, char **argv) { srand(time(NULL)); MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &mpi_my_rank); Formura_Init(&navi, MPI_COMM_WORLD); if (argc <= 1) { T_MAX=100; }else{ sscanf(argv[1], "%d", &T_MAX); } if (argc <= 2) { T_MONITOR=100; }else{ sscanf(argv[2], "%d", &T_MONITOR); } init(); double t_begin, t_end; for(;;){ double t = wctime(); if(navi.time_step % T_MONITOR == 0 || navi.time_step <= 3 * T_MONITOR ) { printf("%d step @ %lf sec\n", navi.time_step, t-t_begin); } if(navi.time_step % T_MONITOR == 0) { write_monitor(); } if (navi.time_step >= T_MAX) break; if (navi.time_step == 0) { t_begin = wctime(); start_collection("main"); } Formura_Forward(&navi); // navi.time_step increases if (navi.time_step >= T_MAX) { t_end = wctime(); stop_collection("main"); } } printf("wct = %lf sec\n",t_end - t_begin); MPI_Barrier(MPI_COMM_WORLD); MPI_Finalize(); }
int main(int argc, char *argv[]) { int workers = 1; int dqsize = 100000; char c; while ((c=getopt(argc, argv, "w:q:h")) != -1) { switch (c) { case 'w': workers = atoi(optarg); break; case 'q': dqsize = atoi(optarg); break; case 'h': usage(argv[0]); break; default: abort(); } } if (optind == argc) { usage(argv[0]); exit(1); } lace_init(workers, dqsize); lace_startup(0, 0, 0); LACE_ME; struct item items[MAX_ITEMS]; /* array of items */ int n, capacity, sol; if (read_input(argv[optind], items, &capacity, &n)) return 1; double t1 = wctime(); sol = CALL(knapsack, items, capacity, n, 0); double t2 = wctime(); printf("Best value is %d\n", sol); printf("Time: %f\n", t2-t1); return 0; }
static void debug_delay_loop(void) { double start, end, delay; show_loop_length(); while (1) { for (delay = 0.5; delay > 0.01; delay -= 0.01) { start = wctime(); loop_for(delay); end = wctime(); printf("%6.4fs: looped for %10.8fs, delta=%11.8fs, error=%7.4f%%\n", delay, end - start, end - start - delay, 100 * (end - start - delay) / delay); } } }
int main(int argc, char **argv) { int workers = 1; int dqsize = 100000; char c; while ((c=getopt(argc, argv, "w:q:h")) != -1) { switch (c) { case 'w': workers = atoi(optarg); break; case 'q': dqsize = atoi(optarg); break; case 'h': usage(argv[0]); break; default: abort(); } } if (optind == argc) { usage(argv[0]); exit(1); } lace_init(workers, dqsize); lace_startup(0, 0, 0); LACE_ME; int n = atoi(argv[optind]); double t1 = wctime(); int m = CALL(pfib, n); double t2 = wctime(); printf("fib(%d) = %d\n", n, m); printf("Time: %f\n", t2-t1); lace_exit(); return 0; }
int main( int argc, char **argv ) { int n,m; if( argc < 2 ) { fprintf( stderr, "Usage: fib-seq <arg>\n" ), exit( 2 ); } n = atoi( argv[ 1 ] ); double t1 = wctime(); m = pfib( n ); double t2 = wctime(); printf( "%d\n", m ); printf("Time: %f\n", t2-t1); return 0; }
static void configure_loop(void) { double start; /* prime cache */ loop_once(); loop_once(); loop_once(); /* measure */ start = wctime(); loop_once(); /* hope we didn't get preempted */ loop_length = wctime(); loop_length -= start; /* fine tune */ fine_tune(0.1); fine_tune(0.1); fine_tune(0.1); }
int main(int argc, char *argv[]) { if (1 == argc) { usage(argv[0]); exit(1); } int n = atoi(argv[1]); char *a = (char*)alloca(n*sizeof(char)); printf("running queens %d sequentially...\n", n); double t1 = wctime(); uint64_t res = nqueens(n, 0, a); double t2 = wctime(); printf("Result: Q(%d) = %lu\n", n, res); printf("Time: %f\n", t2-t1); return 0; }
int main () { for (int k=0;k<NZ;++k) { for (int j=0;j<NY;++j) { for (int i=0;i<NX;++i) { aa[k][j][i] = rand()/(double)RAND_MAX; ab[k][j][i] = rand()/(double)RAND_MAX; ac[k][j][i] = rand()/(double)RAND_MAX; ad[k][j][i] = rand()/(double)RAND_MAX; } } } double t_begin = wctime(); start_collection("region"); #define LOOPBODY(PRE_HOOK,POST_HOOK) \ for (int j=0;j<NY;++j) { \ for (int i=0;i<NX;++i) { \ double x = aa[k][j][i]; \ \ PRE_HOOK; \ \ double x0 = 0.890*x - 0.880*x*x; \ double x1 = 0.889*x - 0.881*x*x; \ double x2 = 0.888*x - 0.882*x*x; \ \ x0 = 3.89*x0 - 3.88*x0*x0; \ x1 = 3.89*x1 - 3.88*x1*x1; \ x2 = 3.89*x2 - 3.88*x2*x2; \ \ x0 = 3.89*x0 - 3.88*x0*x0; \ x1 = 3.89*x1 - 3.88*x1*x1; \ x2 = 3.89*x2 - 3.88*x2*x2; \ \ x0 = 3.89*x0 - 3.88*x0*x0; \ x1 = 3.89*x1 - 3.88*x1*x1; \ x2 = 3.89*x2 - 3.88*x2*x2; \ \ x0 = 3.89*x0 - 3.88*x0*x0; \ x1 = 3.89*x1 - 3.88*x1*x1; \ x2 = 3.89*x2 - 3.88*x2*x2; \ \ x0 = 3.89*x0 - 3.88*x0*x0; \ x1 = 3.89*x1 - 3.88*x1*x1; \ x2 = 3.89*x2 - 3.88*x2*x2; \ \ \ aa[k][j][i] = (x0*x1+x2)/2; \ \ POST_HOOK; \ } \ } for (int t=0;t<T_MAX; ++t){ { const int k = 0; LOOPBODY(x=ab[k][j][i], {}); } for (int k=1;k<NZ-1;++k) { LOOPBODY({},{}); } { const int k = NZ-1; LOOPBODY({}, ab[k][j][i] *= x1); } } stop_collection("region"); double t_end = wctime(); double flops = (6 * 12 + 3.0)*(double)NX*NY*NZ*T_MAX / (t_end - t_begin); double sum = 0; for (int k=0;k<NZ;++k) { for (int j=0;j<NY;++j) { for (int i=0;i<NX;++i) { sum += aa[k][j][i] + ab[k][j][i] + ac[k][j][i] + ad[k][j][i]; } } } printf("sum = %lf, wctime = %lf sec\n", sum, t_end - t_begin); printf("%lf GFlop/s\n", flops/1e9); }
int main(int argc, char** argv) { int ret; int opt; int wait = 0; int test_loop = 0; int skip_config = 0; int verbose = 0; double wcet_ms; double duration, start; struct rt_task rt; FILE *file; progname = argv[0]; while ((opt = getopt(argc, argv, OPTSTR)) != -1) { switch (opt) { case 'w': wait = 1; break; case 'l': test_loop = 1; break; case 'd': /* manually configure delay per loop iteration * unit: microseconds */ loop_length = atof(optarg) / 1000000; skip_config = 1; break; case 'v': verbose = 1; break; case ':': usage("Argument missing."); break; case '?': default: usage("Bad argument."); break; } } if (!skip_config) configure_loop(); if (test_loop) { debug_delay_loop(); return 0; } if (argc - optind < 2) usage("Arguments missing."); if ((file = fopen(argv[optind + 0], "r")) == NULL) { fprintf(stderr, "Cannot open %s\n", argv[1]); return -1; } duration = atof(argv[optind + 1]); memset(&rt, 0, sizeof(struct rt_task)); if (parse_hime_ts_file(file, &rt) < 0) bail_out("Could not parse file\n"); if (sporadic_task_ns_semi(&rt) < 0) bail_out("could not setup rt task params"); fclose(file); if (verbose) show_loop_length(); init_litmus(); ret = task_mode(LITMUS_RT_TASK); if (ret != 0) bail_out("could not become RT task"); if (wait) { ret = wait_for_ts_release(); if (ret != 0) bail_out("wait_for_ts_release()"); } wcet_ms = ((double) rt.exec_cost ) / __NS_PER_MS; start = wctime(); while (start + duration > wctime()) { job(wcet_ms * 0.0009); /* 90% wcet, in seconds */ } return 0; }
int main () { const int n_thre = omp_get_max_threads(); fill_initial_condition(); #pragma omp parallel { int tid=2*omp_get_thread_num(); for(int x=0;x<SX;++x) { for(int y=0;y<SY;++y) { for(int z=0;z<SZ;++z) { double u,v; get_solution_at(0,x,y,z, u,v); ats(sU0,tid,x,y,z)=u; ats(sV0,tid,x,y,z)=v; } } } } std::cerr << "Setting up wall values..." << std::endl; for(int t = 0;t<T_MAX;++t){ /* #pragma omp parallel { int tid=2*omp_get_thread_num(); for(int x=SX-2;x<SX;++x) { for(int y=0;y<SY;++y) { for(int z=0;z<SZ;++z) { double u,v; get_solution_at(t,x+t,y+t,z+t, u,v); Uwx[tid][t][x-(SX-2)][y][z] = u; Vwx[tid][t][x-(SX-2)][y][z] = v; } } } for(int x=0;x<SX;++x) { for(int y=SY-2;y<SY;++y) { for(int z=0;z<SZ;++z) { double u,v; get_solution_at(t,x+t,y+t,z+t, u,v); Uwy[tid][t][x][y-(SY-2)][z] = u; Vwy[tid][t][x][y-(SY-2)][z] = v; } } } for(int x=0;x<SX;++x) { for(int y=0;y<SY;++y) { for(int z=SZ-2;z<SZ;++z) { double u,v; get_solution_at(t,x+t,y+t,z+t, u,v); Uwz[tid][t][x][y][z-(SZ-2)] = u; Vwz[tid][t][x][y][z-(SZ-2)] = v; } } } */ } for(int trial=0;trial<10;++trial) { std::cerr << "Carrying out simulation..." << std::endl; // set initial condition #pragma omp parallel { const int tid=2*omp_get_thread_num(); for(int x=0;x<SX;++x) { for(int y=0;y<SY;++y) { for(int z=0;z<SZ;++z) { ats(sU,tid,x,y,z)=ats(sU0,tid,x,y,z); ats(sV,tid,x,y,z)=ats(sV0,tid,x,y,z); } } } } double time_comp=0, time_comm=0, time_begin, time_end; //for(int heating=0;heating<10;++heating) { time_begin = wctime(); #pragma omp parallel for (int tid=0;tid<n_thre;++tid) { //const int tid=2*omp_get_thread_num(); for(int t = 0; t < T_MAX; ++t){ // destructively update the state /* const auto lap = [](Real ar[SX][SY][SZ],int x, int y, int z) { auto ret = ar[x][y+1][z+1] + ar[x+2][y+1][z+1] + ar[x+1][y][z+1] + ar[x+1][y+2][z+1] + ar[x+1][y+1][z] + ar[x+1][y+1][z+2] - 6*ar[x+1][y+1][z+1]; return ret / dx / dx; };*/ #define lap(ar,b,x,y,z) \ (ats(ar,b,x,y+1,z+1) + ats(ar,b,x+2,y+1,z+1) \ + ats(ar,b,x+1,y,z+1) + ats(ar,b,x+1,y+2,z+1) \ + ats(ar,b,x+1,y+1,z) + ats(ar,b,x+1,y+1,z+2) \ - 6*ats(ar,b,x+1,y+1,z+1)) / dx / dx #pragma omp for collapse(2) for(int x=0;x<SX-2;++x) { for(int y=0;y<SY-2;++y) { #pragma omp simd for(int z=0;z<SZ-2;++z) { Real u=ats(sU,tid,x+1,y+1,z+1) ; Real v=ats(sV,tid,x+1,y+1,z+1) ; auto du_dt = -Fe * u*v*v + Fu*(1-u) + Du * lap(sU,tid,x,y,z); auto dv_dt = Fe * u*v*v - Fv*v + Dv * lap(sV,tid,x,y,z); ats(sU,tid,x,y,z) = u+dt*du_dt; ats(sV,tid,x,y,z) = v+dt*dv_dt; } } } } } time_end = wctime(); //} double flop = 29.0 * (SX-2)*(SY-2)*(SZ-2) *T_MAX * n_thre; double bw_gb= 8.0 * 2 * 7 * (SX-2)*(SY-2)*(SZ-2) *T_MAX * n_thre; double time_elapse = time_end-time_begin; { const int t = T_MAX; double num[BANK]={0},den[BANK]={0}; #pragma omp parallel { int tid=2*omp_get_thread_num(); for(int x=0;x<SX-2;++x) { for(int y=0;y<SY-2;++y) { for(int z=0;z<SZ-2;++z) { double u,v; get_solution_at(t,x+t,y+t,z+t, u,v); num[tid] += std::abs(u-ats(sU,tid,x,y,z)); den[tid] += 1; } } } } double sum_num = 0, sum_den = 0; for(int i=0;i<BANK;++i){ sum_num += num[i]; sum_den += den[i]; } std::ostringstream msg; msg << n_thre << " " << SX << " " << SY << " " << SZ << " " << T_MAX << " " << " t: " << time_elapse << " " << time_comm << " " << time_comp << " GFlops: " << flop/time_elapse/1e9<< " GBps: " << bw_gb/time_elapse/1e9<< " error: " << (sum_num/sum_den); std::ofstream log_file("benchmark-standalone.txt", std::ios::app); std::cout << msg.str() << std::endl; log_file << msg.str() << std::endl; } } }
int main(int argc, char *argv[]) { int workers = 1; int dqsize = 100000; int verify = 0; char c; while ((c=getopt(argc, argv, "w:q:h:c")) != -1) { switch (c) { case 'w': workers = atoi(optarg); break; case 'q': dqsize = atoi(optarg); break; case 'c': verify = 1; break; case 'h': usage(argv[0]); break; default: abort(); } } if (optind == argc) { usage(argv[0]); exit(1); } int n = atoi(argv[optind]); lace_init(workers, dqsize); lace_startup(0, 0, 0); REAL *A, *B, *C1, *C2; if ((n & (n - 1)) != 0 || (n % 16) != 0) { printf("%d: matrix size must be a power of 2" " and a multiple of %d\n", n, 16); return 1; } A = alloc_matrix(n); B = alloc_matrix(n); C1 = alloc_matrix(n); C2 = alloc_matrix(n); init_matrix(n, A, n); init_matrix(n, B, n); LACE_ME; double t1=wctime(); CALL(OptimizedStrassenMultiply, C2, A, B, n, n, n, n); double t2=wctime(); if (verify) { matrixmul(n, A, n, B, n, C1, n); verify = compare_matrix(n, C1, n, C2, n); } if (verify) printf("WRONG RESULT!\n"); else { printf("Time: %f\n", t2-t1); } lace_exit(); return 0; }