cl_int clSetCommandQueueProperty_test(cl_command_queue command_queue, cl_command_queue_properties properties, cl_bool enable, cl_command_queue_properties * old_properties) { cl_int status; printf("clSetCommandQueueProperty_test: properties==%ld enable==%d\n", (long) properties, enable); status = clSetCommandQueueProperty(command_queue, properties, enable, old_properties); return status; }
int main(int argc, char **argv) #endif { int i, niter, step; double mflops, t, tmax; logical verified; char class; double tsum[t_last+2], t1[t_last+2], tming[t_last+2], tmaxg[t_last+2]; char *t_recs[t_last+2] = { "total", "rhs", "xsolve", "ysolve", "zsolve", "bpack", "exch", "xcomm", "ycomm", "zcomm", " totcomp", " totcomm" }; //--------------------------------------------------------------------- // Root node reads input file (if it exists) else takes // defaults from parameters //--------------------------------------------------------------------- printf("\n\n NAS Parallel Benchmarks (NPB3.3-OCL-MD) - SP Benchmark\n\n"); FILE *fp; fp = fopen("timer.flag", "r"); timeron = false; if (fp != NULL) { timeron = true; fclose(fp); } if ((fp = fopen("inputsp.data", "r")) != NULL) { int result; printf(" Reading from input file inputsp.data\n"); result = fscanf(fp, "%d", &niter); while (fgetc(fp) != '\n'); result = fscanf(fp, "%*f"); while (fgetc(fp) != '\n'); result = fscanf(fp, "%d%d%d", &grid_points[0], &grid_points[1], &grid_points[2]); fclose(fp); } else { printf(" No input file inputsp.data. Using compiled defaults\n"); niter = NITER_DEFAULT; grid_points[0] = PROBLEM_SIZE; grid_points[1] = PROBLEM_SIZE; grid_points[2] = PROBLEM_SIZE; } setup_opencl(argc, argv); printf(" Size: %4dx%4dx%4d\n", grid_points[0], grid_points[1], grid_points[2]); printf(" Iterations: %4d", niter); if (num_devices != MAXCELLS*MAXCELLS) printf(" WARNING: compiled for %5d devices \n", MAXCELLS*MAXCELLS); printf(" Number of active devices: %5d\n\n", num_devices); make_set(); for (i = 0; i < t_last; i++) { timer_clear(i); } set_constants(); initialize(); lhsinit(); exact_rhs(); compute_buffer_size(5); set_kernel_args(); //--------------------------------------------------------------------- // do one time step to touch all code, and reinitialize //--------------------------------------------------------------------- #ifdef MINIMD_SNUCL_OPTIMIZATIONS // set cmd queue property for(i = 0; i < num_devices; i++) { clSetCommandQueueProperty(cmd_queue[i], CL_QUEUE_AUTO_DEVICE_SELECTION | //CL_QUEUE_ITERATIVE | CL_QUEUE_COMPUTE_INTENSIVE, true, NULL); } #endif adi(); #ifdef MINIMD_SNUCL_OPTIMIZATIONS for(i = 0; i < num_devices; i++) { clSetCommandQueueProperty(cmd_queue[i], 0, true, NULL); } #endif initialize(); //--------------------------------------------------------------------- // Synchronize before placing time stamp //--------------------------------------------------------------------- for (i = 0; i < t_last; i++) { timer_clear(i); } timer_clear(0); timer_start(0); for (step = 1; step <= niter; step++) { if ((step % 20) == 0 || step == 1) { printf(" Time step %4d\n", step); } adi(); } timer_stop(0); t = timer_read(0); verify(niter, &class, &verified); tmax = t; if( tmax != 0.0 ) { mflops = (881.174*(double)( PROBLEM_SIZE*PROBLEM_SIZE*PROBLEM_SIZE ) -4683.91*(double)( PROBLEM_SIZE*PROBLEM_SIZE ) +11484.5*(double)( PROBLEM_SIZE ) -19272.4) * (double)( niter ) / (tmax*1000000.0); } else { mflops = 0.0; } c_print_results("SP", class, grid_points[0], grid_points[1], grid_points[2], niter, tmax, mflops, " floating point", verified, NPBVERSION,COMPILETIME, CS1, CS2, CS3, CS4, CS5, CS6, CS7, clu_GetDeviceTypeName(device_type), device_name, num_devices); if (timeron) { /* for (i = 0; i < t_last; i++) { t1[i] = timer_read(i); } t1[t_xsolve] = t1[t_xsolve] - t1[t_xcomm]; t1[t_ysolve] = t1[t_ysolve] - t1[t_ycomm]; t1[t_zsolve] = t1[t_zsolve] - t1[t_zcomm]; t1[t_last+2] = t1[t_xcomm]+t1[t_ycomm]+t1[t_zcomm]+t1[t_exch]; t1[t_last+1] = t1[t_total] - t1[t_last+2]; MPI_Reduce(&t1, tsum, t_last+2, dp_type, MPI_SUM, 0, comm_setup); MPI_Reduce(&t1, tming, t_last+2, dp_type, MPI_MIN, 0, comm_setup); MPI_Reduce(&t1, tmaxg, t_last+2, dp_type, MPI_MAX, 0, comm_setup); if (node == 0) { printf(" nprocs =%6d minimum maximum average\n", total_nodes); for (i = 0; i < t_last+2; i++) { tsum[i] = tsum[i] / total_nodes; printf(" timer %2d(%8s) : %10.4f %10.4f %10.4f\n", i+1, t_recs[i], tming[i], tmaxg[i], tsum[i]); } } */ } release_opencl(); return 0; }