int main(int argc, char *argv[]) { int i, niter, step, n3; double mflops, t, tmax, trecs[t_last+1]; logical verified; char Class; char *t_names[t_last+1]; //--------------------------------------------------------------------- // Read input file (if it exists), else take // defaults from parameters //--------------------------------------------------------------------- FILE *fp; if ((fp = fopen("timer.flag", "r")) != NULL) { timeron = true; t_names[t_total] = "total"; t_names[t_rhsx] = "rhsx"; t_names[t_rhsy] = "rhsy"; t_names[t_rhsz] = "rhsz"; t_names[t_rhs] = "rhs"; t_names[t_xsolve] = "xsolve"; t_names[t_ysolve] = "ysolve"; t_names[t_zsolve] = "zsolve"; t_names[t_rdis1] = "redist1"; t_names[t_rdis2] = "redist2"; t_names[t_tzetar] = "tzetar"; t_names[t_ninvr] = "ninvr"; t_names[t_pinvr] = "pinvr"; t_names[t_txinvr] = "txinvr"; t_names[t_add] = "add"; fclose(fp); } else { timeron = false; } printf("\n\n NAS Parallel Benchmarks (NPB3.3-SER-C) - SP Benchmark\n\n"); if ((fp = fopen("inputsp.data", "r")) != NULL) { int result; printf(" Reading from input file inputsp.data\n"); result = fscanf(fp, "%d", &niter); while (fgetc(fp) != '\n'); result = fscanf(fp, "%lf", &dt); while (fgetc(fp) != '\n'); result = fscanf(fp, "%d%d%d", &grid_points[0], &grid_points[1], &grid_points[2]); fclose(fp); } else { printf(" No input file inputsp.data. Using compiled defaults\n"); niter = NITER_DEFAULT; dt = DT_DEFAULT; grid_points[0] = PROBLEM_SIZE; grid_points[1] = PROBLEM_SIZE; grid_points[2] = PROBLEM_SIZE; } printf(" Size: %4dx%4dx%4d\n", grid_points[0], grid_points[1], grid_points[2]); printf(" Iterations: %4d dt: %10.6f\n", niter, dt); printf("\n"); if ((grid_points[0] > IMAX) || (grid_points[1] > JMAX) || (grid_points[2] > KMAX) ) { printf(" %d, %d, %d\n", grid_points[0], grid_points[1], grid_points[2]); printf(" Problem size too big for compiled array sizes\n"); return 0; } nx2 = grid_points[0] - 2; ny2 = grid_points[1] - 2; nz2 = grid_points[2] - 2; set_constants(); for (i = 1; i <= t_last; i++) { timer_clear(i); } exact_rhs(); initialize(); //--------------------------------------------------------------------- // do one time step to touch all code, and reinitialize //--------------------------------------------------------------------- adi(); initialize(); for (i = 1; i <= t_last; i++) { timer_clear(i); } timer_start(1); for (step = 1; step <= niter; step++) { if ((step % 20) == 0 || step == 1) { printf(" Time step %4d\n", step); } adi(); } timer_stop(1); tmax = timer_read(1); verify(niter, &Class, &verified); if (tmax != 0.0) { n3 = grid_points[0]*grid_points[1]*grid_points[2]; t = (grid_points[0]+grid_points[1]+grid_points[2])/3.0; mflops = (881.174 * (double)n3 - 4683.91 * (t * t) + 11484.5 * t - 19272.4) * (double)niter / (tmax*1000000.0); } else { mflops = 0.0; } print_results("SP", Class, grid_points[0], grid_points[1], grid_points[2], niter, tmax, mflops, " floating point", verified, NPBVERSION,COMPILETIME, CS1, CS2, CS3, CS4, CS5, CS6, "(none)"); //--------------------------------------------------------------------- // More timers //--------------------------------------------------------------------- if (timeron) { for (i = 1; i <= t_last; i++) { trecs[i] = timer_read(i); } if (tmax == 0.0) tmax = 1.0; printf(" SECTION Time (secs)\n"); for (i = 1; i <= t_last; i++) { printf(" %-8s:%9.3f (%6.2f%%)\n", t_names[i], trecs[i], trecs[i]*100./tmax); if (i == t_rhs) { t = trecs[t_rhsx] + trecs[t_rhsy] + trecs[t_rhsz]; printf(" --> %8s:%9.3f (%6.2f%%)\n", "sub-rhs", t, t*100./tmax); t = trecs[t_rhs] - t; printf(" --> %8s:%9.3f (%6.2f%%)\n", "rest-rhs", t, t*100./tmax); } else if (i == t_zsolve) { t = trecs[t_zsolve] - trecs[t_rdis1] - trecs[t_rdis2]; printf(" --> %8s:%9.3f (%6.2f%%)\n", "sub-zsol", t, t*100./tmax); } else if (i == t_rdis2) { t = trecs[t_rdis1] + trecs[t_rdis2]; printf(" --> %8s:%9.3f (%6.2f%%)\n", "redist", t, t*100./tmax); } } } return 0; }
int main(int argc, char **argv) #endif { int i, niter, step; double mflops, t, tmax; logical verified; char class; double tsum[t_last+2], t1[t_last+2], tming[t_last+2], tmaxg[t_last+2]; char *t_recs[t_last+2] = { "total", "rhs", "xsolve", "ysolve", "zsolve", "bpack", "exch", "xcomm", "ycomm", "zcomm", " totcomp", " totcomm" }; //--------------------------------------------------------------------- // Root node reads input file (if it exists) else takes // defaults from parameters //--------------------------------------------------------------------- printf("\n\n NAS Parallel Benchmarks (NPB3.3-OCL-MD) - SP Benchmark\n\n"); FILE *fp; fp = fopen("timer.flag", "r"); timeron = false; if (fp != NULL) { timeron = true; fclose(fp); } if ((fp = fopen("inputsp.data", "r")) != NULL) { int result; printf(" Reading from input file inputsp.data\n"); result = fscanf(fp, "%d", &niter); while (fgetc(fp) != '\n'); result = fscanf(fp, "%*f"); while (fgetc(fp) != '\n'); result = fscanf(fp, "%d%d%d", &grid_points[0], &grid_points[1], &grid_points[2]); fclose(fp); } else { printf(" No input file inputsp.data. Using compiled defaults\n"); niter = NITER_DEFAULT; grid_points[0] = PROBLEM_SIZE; grid_points[1] = PROBLEM_SIZE; grid_points[2] = PROBLEM_SIZE; } setup_opencl(argc, argv); printf(" Size: %4dx%4dx%4d\n", grid_points[0], grid_points[1], grid_points[2]); printf(" Iterations: %4d", niter); if (num_devices != MAXCELLS*MAXCELLS) printf(" WARNING: compiled for %5d devices \n", MAXCELLS*MAXCELLS); printf(" Number of active devices: %5d\n\n", num_devices); make_set(); for (i = 0; i < t_last; i++) { timer_clear(i); } set_constants(); initialize(); lhsinit(); exact_rhs(); compute_buffer_size(5); set_kernel_args(); //--------------------------------------------------------------------- // do one time step to touch all code, and reinitialize //--------------------------------------------------------------------- #ifdef MINIMD_SNUCL_OPTIMIZATIONS // set cmd queue property for(i = 0; i < num_devices; i++) { clSetCommandQueueProperty(cmd_queue[i], CL_QUEUE_AUTO_DEVICE_SELECTION | //CL_QUEUE_ITERATIVE | CL_QUEUE_COMPUTE_INTENSIVE, true, NULL); } #endif adi(); #ifdef MINIMD_SNUCL_OPTIMIZATIONS for(i = 0; i < num_devices; i++) { clSetCommandQueueProperty(cmd_queue[i], 0, true, NULL); } #endif initialize(); //--------------------------------------------------------------------- // Synchronize before placing time stamp //--------------------------------------------------------------------- for (i = 0; i < t_last; i++) { timer_clear(i); } timer_clear(0); timer_start(0); for (step = 1; step <= niter; step++) { if ((step % 20) == 0 || step == 1) { printf(" Time step %4d\n", step); } adi(); } timer_stop(0); t = timer_read(0); verify(niter, &class, &verified); tmax = t; if( tmax != 0.0 ) { mflops = (881.174*(double)( PROBLEM_SIZE*PROBLEM_SIZE*PROBLEM_SIZE ) -4683.91*(double)( PROBLEM_SIZE*PROBLEM_SIZE ) +11484.5*(double)( PROBLEM_SIZE ) -19272.4) * (double)( niter ) / (tmax*1000000.0); } else { mflops = 0.0; } c_print_results("SP", class, grid_points[0], grid_points[1], grid_points[2], niter, tmax, mflops, " floating point", verified, NPBVERSION,COMPILETIME, CS1, CS2, CS3, CS4, CS5, CS6, CS7, clu_GetDeviceTypeName(device_type), device_name, num_devices); if (timeron) { /* for (i = 0; i < t_last; i++) { t1[i] = timer_read(i); } t1[t_xsolve] = t1[t_xsolve] - t1[t_xcomm]; t1[t_ysolve] = t1[t_ysolve] - t1[t_ycomm]; t1[t_zsolve] = t1[t_zsolve] - t1[t_zcomm]; t1[t_last+2] = t1[t_xcomm]+t1[t_ycomm]+t1[t_zcomm]+t1[t_exch]; t1[t_last+1] = t1[t_total] - t1[t_last+2]; MPI_Reduce(&t1, tsum, t_last+2, dp_type, MPI_SUM, 0, comm_setup); MPI_Reduce(&t1, tming, t_last+2, dp_type, MPI_MIN, 0, comm_setup); MPI_Reduce(&t1, tmaxg, t_last+2, dp_type, MPI_MAX, 0, comm_setup); if (node == 0) { printf(" nprocs =%6d minimum maximum average\n", total_nodes); for (i = 0; i < t_last+2; i++) { tsum[i] = tsum[i] / total_nodes; printf(" timer %2d(%8s) : %10.4f %10.4f %10.4f\n", i+1, t_recs[i], tming[i], tmaxg[i], tsum[i]); } } */ } release_opencl(); return 0; }
int main(int argc, char *argv[]){ int i, j, l, m, n; /* Matrix Dimensions*/ int nx=4, ny=4, nz=4; /*parameters to be changed by user*/ int NT; /* Max Number of timesteps*/ double alpha = .01; double dt = .001; double q = .005; double dx = .01; double dz = .01; double dy = .01; double C = alpha*dt/pow(dx,2); int algo = 0; int border = 0; srand(time(NULL));/*make some noise*/ double noise = .09 * (double)(rand()/(double)RAND_MAX); welcome(&nx, &ny, &nz, &alpha, &q, &dt, &dx, &algo, &border ); /* if(argc >1){ FILE *fp; fp = fopen(argv[1], "r"); fscanf(fp, "%d %d %d %d %lf %lf %lf %d %d", &nx, &ny, &nz, &NT, &dt, &alpha, &q, &border, &algo); fclose(fp); } */ assert(alpha * dt <= (0.5 * dx * dx)); /*2 3D matrices init under gaussian conditions + boundaries*/ double ***matrix = initGaussians(nx, ny, nz, dx, dy, dz, noise); double ***matrix_np1 = initGaussians(nx, ny, nz, dx, dy, dz, noise); int when = 0; print3DMatrix(matrix, nx, ny, nz, dx, dx, dx, when); double t_start, t_end; if( algo ==0 ){ NT = 10000; t_start = clock(); forwardTime(matrix, matrix_np1, nx, ny, nz, dx, dy, dz, C, q,border, NT); t_end = clock(); } else if( algo == 1 ){ NT = 5; t_start = clock(); crank(matrix, matrix_np1, nx, ny, nz, alpha, dx, C, border, NT); t_end = clock(); } else if( algo == 2){ NT = 5; t_start = clock(); adi(matrix, matrix_np1, nx, ny, nz, alpha, dx, C, border, NT); t_end = clock(); } else{ printf("error in parameters"); return 0; } freeMats(matrix, matrix_np1, nx, ny); printf("Total Time Elapsed.....\n"); printf("%f\n",(t_end - t_start)/CLOCKS_PER_SEC); return 0; }
int main(int argc, char *argv[]) { int i, niter, step; double navg, mflops, n3; double tmax, t, trecs[t_last+1]; logical verified; char Class; char *t_names[t_last+1]; //--------------------------------------------------------------------- // Root node reads input file (if it exists) else takes // defaults from parameters //--------------------------------------------------------------------- FILE *fp; if ((fp = fopen("timer.flag", "r")) != NULL) { timeron = true; t_names[t_total] = "total"; t_names[t_rhsx] = "rhsx"; t_names[t_rhsy] = "rhsy"; t_names[t_rhsz] = "rhsz"; t_names[t_rhs] = "rhs"; t_names[t_xsolve] = "xsolve"; t_names[t_ysolve] = "ysolve"; t_names[t_zsolve] = "zsolve"; t_names[t_rdis1] = "redist1"; t_names[t_rdis2] = "redist2"; t_names[t_add] = "add"; fclose(fp); } else { timeron = false; } printf("\n\n NAS Parallel Benchmarks (NPB3.3-OMP-C) - BT Benchmark\n\n"); if ((fp = fopen("inputbt.data", "r")) != NULL) { int result; printf(" Reading from input file inputbt.data\n"); result = fscanf(fp, "%d", &niter); while (fgetc(fp) != '\n'); result = fscanf(fp, "%lf", &dt); while (fgetc(fp) != '\n'); result = fscanf(fp, "%d%d%d\n", &grid_points[0], &grid_points[1], &grid_points[2]); fclose(fp); } else { printf(" No input file inputbt.data. Using compiled defaults\n"); niter = NITER_DEFAULT; dt = DT_DEFAULT; grid_points[0] = PROBLEM_SIZE; grid_points[1] = PROBLEM_SIZE; grid_points[2] = PROBLEM_SIZE; } printf(" Size: %4dx%4dx%4d\n", grid_points[0], grid_points[1], grid_points[2]); printf(" Iterations: %4d dt: %11.7f\n", niter, dt); printf(" Number of available threads: %5d\n", omp_get_max_threads()); printf("\n"); if ( (grid_points[0] > IMAX) || (grid_points[1] > JMAX) || (grid_points[2] > KMAX) ) { printf(" %d, %d, %d\n", grid_points[0], grid_points[1], grid_points[2]); printf(" Problem size too big for compiled array sizes\n"); return 0; } set_constants(); for (i = 1; i <= t_last; i++) { timer_clear(i); } initialize(); exact_rhs(); //--------------------------------------------------------------------- // do one time step to touch all code, and reinitialize //--------------------------------------------------------------------- adi(); initialize(); for (i = 1; i <= t_last; i++) { timer_clear(i); } timer_start(1); // Do not do inlining, to avoid huge loops, scops are kept separate and are // distributed among files. //#pragma scop for (step = 1; step <= niter; step++) { adi(); } //#pragma endscop timer_stop(1); tmax = timer_read(1); verify(niter, &Class, &verified); n3 = 1.0*grid_points[0]*grid_points[1]*grid_points[2]; navg = (grid_points[0]+grid_points[1]+grid_points[2])/3.0; if(tmax != 0.0) { mflops = 1.0e-6 * (double)niter * (3478.8 * n3 - 17655.7 * (navg*navg) + 28023.7 * navg) / tmax; } else { mflops = 0.0; } print_results("BT", Class, grid_points[0], grid_points[1], grid_points[2], niter, tmax, mflops, " floating point", verified, NPBVERSION,COMPILETIME, CS1, CS2, CS3, CS4, CS5, CS6, "(none)"); //--------------------------------------------------------------------- // More timers //--------------------------------------------------------------------- if (timeron) { for (i = 1; i <= t_last; i++) { trecs[i] = timer_read(i); } if (tmax == 0.0) tmax = 1.0; printf(" SECTION Time (secs)\n"); for (i = 1; i <= t_last; i++) { printf(" %-8s:%9.3f (%6.2f%%)\n", t_names[i], trecs[i], trecs[i]*100./tmax); if (i == t_rhs) { t = trecs[t_rhsx] + trecs[t_rhsy] + trecs[t_rhsz]; printf(" --> %8s:%9.3f (%6.2f%%)\n", "sub-rhs", t, t*100./tmax); t = trecs[t_rhs] - t; printf(" --> %8s:%9.3f (%6.2f%%)\n", "rest-rhs", t, t*100./tmax); } else if (i==t_zsolve) { t = trecs[t_zsolve] - trecs[t_rdis1] - trecs[t_rdis2]; printf(" --> %8s:%9.3f (%6.2f%%)\n", "sub-zsol", t, t*100./tmax); } else if (i==t_rdis2) { t = trecs[t_rdis1] + trecs[t_rdis2]; printf(" --> %8s:%9.3f (%6.2f%%)\n", "redist", t, t*100./tmax); } } } return 0; }