Ejemplo n.º 1
0
cl_int
clSetCommandQueueProperty_test(cl_command_queue               command_queue,
                               cl_command_queue_properties    properties, 
                               cl_bool                        enable,
                               cl_command_queue_properties *  old_properties)
{
   cl_int status;
   printf("clSetCommandQueueProperty_test: properties==%ld enable==%d\n", (long) properties, enable);
   status = clSetCommandQueueProperty(command_queue, properties, enable, old_properties);
   return status;
}
Ejemplo n.º 2
0
int main(int argc, char **argv)
#endif
{
  int i, niter, step;
  double mflops, t, tmax;
  logical verified;
  char class;
  double tsum[t_last+2], t1[t_last+2],
         tming[t_last+2], tmaxg[t_last+2];
  char *t_recs[t_last+2] = {
       "total", "rhs", "xsolve", "ysolve", "zsolve", 
       "bpack", "exch", "xcomm", "ycomm", "zcomm",
       " totcomp", " totcomm" };

  //---------------------------------------------------------------------
  // Root node reads input file (if it exists) else takes
  // defaults from parameters
  //---------------------------------------------------------------------
  printf("\n\n NAS Parallel Benchmarks (NPB3.3-OCL-MD) - SP Benchmark\n\n");

  FILE *fp;
  fp = fopen("timer.flag", "r");
  timeron = false;
  if (fp != NULL) {
    timeron = true;
    fclose(fp);
  }

  if ((fp = fopen("inputsp.data", "r")) != NULL) {
    int result;
    printf(" Reading from input file inputsp.data\n");
    result = fscanf(fp, "%d", &niter);
    while (fgetc(fp) != '\n');
    result = fscanf(fp, "%*f");
    while (fgetc(fp) != '\n');
    result = fscanf(fp, "%d%d%d", &grid_points[0], &grid_points[1], 
                                  &grid_points[2]);
    fclose(fp);
  } else {
    printf(" No input file inputsp.data. Using compiled defaults\n");
    niter = NITER_DEFAULT;
    grid_points[0] = PROBLEM_SIZE;
    grid_points[1] = PROBLEM_SIZE;
    grid_points[2] = PROBLEM_SIZE;
  }

  setup_opencl(argc, argv);

  printf(" Size: %4dx%4dx%4d\n", 
      grid_points[0], grid_points[1], grid_points[2]);
  printf(" Iterations: %4d", niter);
  if (num_devices != MAXCELLS*MAXCELLS) 
    printf(" WARNING: compiled for %5d devices \n", MAXCELLS*MAXCELLS);
  printf(" Number of active devices: %5d\n\n", num_devices);

  make_set();

  for (i = 0; i < t_last; i++) {
    timer_clear(i);
  }

  set_constants();

  initialize();

  lhsinit();

  exact_rhs();

  compute_buffer_size(5);

  set_kernel_args();

  //---------------------------------------------------------------------
  // do one time step to touch all code, and reinitialize
  //---------------------------------------------------------------------
#ifdef MINIMD_SNUCL_OPTIMIZATIONS
  // set cmd queue property
  for(i = 0; i < num_devices; i++) {
  	clSetCommandQueueProperty(cmd_queue[i], 
			CL_QUEUE_AUTO_DEVICE_SELECTION | 
			//CL_QUEUE_ITERATIVE | 
			CL_QUEUE_COMPUTE_INTENSIVE,
			true,
			NULL);
  }
#endif
  adi();
#ifdef MINIMD_SNUCL_OPTIMIZATIONS
  for(i = 0; i < num_devices; i++) {
  	clSetCommandQueueProperty(cmd_queue[i], 
			0,
			true,
			NULL);
  }
#endif

  initialize();

  //---------------------------------------------------------------------
  // Synchronize before placing time stamp
  //---------------------------------------------------------------------
  for (i = 0; i < t_last; i++) {
    timer_clear(i);
  }

  timer_clear(0);
  timer_start(0);

  for (step = 1; step <= niter; step++) {

    if ((step % 20) == 0 || step == 1) {
      printf(" Time step %4d\n", step);
    }

    adi();

  }

  timer_stop(0);
  t = timer_read(0);

  verify(niter, &class, &verified);

  tmax = t;

  if( tmax != 0.0 ) {
    mflops = (881.174*(double)( PROBLEM_SIZE*PROBLEM_SIZE*PROBLEM_SIZE )
             -4683.91*(double)( PROBLEM_SIZE*PROBLEM_SIZE )
             +11484.5*(double)( PROBLEM_SIZE )
             -19272.4) * (double)( niter ) / (tmax*1000000.0);
  } else {
    mflops = 0.0;
  }

  c_print_results("SP", class, grid_points[0], 
      grid_points[1], grid_points[2], niter,
      tmax, mflops, "          floating point", 
      verified, NPBVERSION,COMPILETIME, CS1, CS2, CS3, CS4, CS5, 
      CS6, CS7, clu_GetDeviceTypeName(device_type), device_name, num_devices);

  if (timeron) {
/*
    for (i = 0; i < t_last; i++) {
      t1[i] = timer_read(i);
    }
    t1[t_xsolve] = t1[t_xsolve] - t1[t_xcomm];
    t1[t_ysolve] = t1[t_ysolve] - t1[t_ycomm];
    t1[t_zsolve] = t1[t_zsolve] - t1[t_zcomm];
    t1[t_last+2] = t1[t_xcomm]+t1[t_ycomm]+t1[t_zcomm]+t1[t_exch];
    t1[t_last+1] = t1[t_total]  - t1[t_last+2];

    MPI_Reduce(&t1, tsum,  t_last+2, dp_type, MPI_SUM, 0, comm_setup);
    MPI_Reduce(&t1, tming, t_last+2, dp_type, MPI_MIN, 0, comm_setup);
    MPI_Reduce(&t1, tmaxg, t_last+2, dp_type, MPI_MAX, 0, comm_setup);

    if (node == 0) {
      printf(" nprocs =%6d           minimum     maximum     average\n",
          total_nodes);
      for (i = 0; i < t_last+2; i++) {
        tsum[i] = tsum[i] / total_nodes;
          printf(" timer %2d(%8s) :  %10.4f  %10.4f  %10.4f\n",
              i+1, t_recs[i], tming[i], tmaxg[i], tsum[i]);
      }
    }
*/
  }

  release_opencl();

  return 0;
}