Esempio n. 1
0
int main(int argc, char **argv)
#endif
{
  int i, niter, step;
  double mflops, t, tmax;
  logical verified;
  char class;
  double tsum[t_last+2], t1[t_last+2],
         tming[t_last+2], tmaxg[t_last+2];
  char *t_recs[t_last+2] = {
       "total", "rhs", "xsolve", "ysolve", "zsolve", 
       "bpack", "exch", "xcomm", "ycomm", "zcomm",
       " totcomp", " totcomm" };

  //---------------------------------------------------------------------
  // Root node reads input file (if it exists) else takes
  // defaults from parameters
  //---------------------------------------------------------------------
  printf("\n\n NAS Parallel Benchmarks (NPB3.3-OCL-MD) - SP Benchmark\n\n");

  FILE *fp;
  fp = fopen("timer.flag", "r");
  timeron = false;
  if (fp != NULL) {
    timeron = true;
    fclose(fp);
  }

  if ((fp = fopen("inputsp.data", "r")) != NULL) {
    int result;
    printf(" Reading from input file inputsp.data\n");
    result = fscanf(fp, "%d", &niter);
    while (fgetc(fp) != '\n');
    result = fscanf(fp, "%*f");
    while (fgetc(fp) != '\n');
    result = fscanf(fp, "%d%d%d", &grid_points[0], &grid_points[1], 
                                  &grid_points[2]);
    fclose(fp);
  } else {
    printf(" No input file inputsp.data. Using compiled defaults\n");
    niter = NITER_DEFAULT;
    grid_points[0] = PROBLEM_SIZE;
    grid_points[1] = PROBLEM_SIZE;
    grid_points[2] = PROBLEM_SIZE;
  }

  setup_opencl(argc, argv);

  printf(" Size: %4dx%4dx%4d\n", 
      grid_points[0], grid_points[1], grid_points[2]);
  printf(" Iterations: %4d", niter);
  if (num_devices != MAXCELLS*MAXCELLS) 
    printf(" WARNING: compiled for %5d devices \n", MAXCELLS*MAXCELLS);
  printf(" Number of active devices: %5d\n\n", num_devices);

  make_set();

  for (i = 0; i < t_last; i++) {
    timer_clear(i);
  }

  set_constants();

  initialize();

  lhsinit();

  exact_rhs();

  compute_buffer_size(5);

  set_kernel_args();

  //---------------------------------------------------------------------
  // do one time step to touch all code, and reinitialize
  //---------------------------------------------------------------------
#ifdef MINIMD_SNUCL_OPTIMIZATIONS
  // set cmd queue property
  for(i = 0; i < num_devices; i++) {
  	clSetCommandQueueProperty(cmd_queue[i], 
			CL_QUEUE_AUTO_DEVICE_SELECTION | 
			//CL_QUEUE_ITERATIVE | 
			CL_QUEUE_COMPUTE_INTENSIVE,
			true,
			NULL);
  }
#endif
  adi();
#ifdef MINIMD_SNUCL_OPTIMIZATIONS
  for(i = 0; i < num_devices; i++) {
  	clSetCommandQueueProperty(cmd_queue[i], 
			0,
			true,
			NULL);
  }
#endif

  initialize();

  //---------------------------------------------------------------------
  // Synchronize before placing time stamp
  //---------------------------------------------------------------------
  for (i = 0; i < t_last; i++) {
    timer_clear(i);
  }

  timer_clear(0);
  timer_start(0);

  for (step = 1; step <= niter; step++) {

    if ((step % 20) == 0 || step == 1) {
      printf(" Time step %4d\n", step);
    }

    adi();

  }

  timer_stop(0);
  t = timer_read(0);

  verify(niter, &class, &verified);

  tmax = t;

  if( tmax != 0.0 ) {
    mflops = (881.174*(double)( PROBLEM_SIZE*PROBLEM_SIZE*PROBLEM_SIZE )
             -4683.91*(double)( PROBLEM_SIZE*PROBLEM_SIZE )
             +11484.5*(double)( PROBLEM_SIZE )
             -19272.4) * (double)( niter ) / (tmax*1000000.0);
  } else {
    mflops = 0.0;
  }

  c_print_results("SP", class, grid_points[0], 
      grid_points[1], grid_points[2], niter,
      tmax, mflops, "          floating point", 
      verified, NPBVERSION,COMPILETIME, CS1, CS2, CS3, CS4, CS5, 
      CS6, CS7, clu_GetDeviceTypeName(device_type), device_name, num_devices);

  if (timeron) {
/*
    for (i = 0; i < t_last; i++) {
      t1[i] = timer_read(i);
    }
    t1[t_xsolve] = t1[t_xsolve] - t1[t_xcomm];
    t1[t_ysolve] = t1[t_ysolve] - t1[t_ycomm];
    t1[t_zsolve] = t1[t_zsolve] - t1[t_zcomm];
    t1[t_last+2] = t1[t_xcomm]+t1[t_ycomm]+t1[t_zcomm]+t1[t_exch];
    t1[t_last+1] = t1[t_total]  - t1[t_last+2];

    MPI_Reduce(&t1, tsum,  t_last+2, dp_type, MPI_SUM, 0, comm_setup);
    MPI_Reduce(&t1, tming, t_last+2, dp_type, MPI_MIN, 0, comm_setup);
    MPI_Reduce(&t1, tmaxg, t_last+2, dp_type, MPI_MAX, 0, comm_setup);

    if (node == 0) {
      printf(" nprocs =%6d           minimum     maximum     average\n",
          total_nodes);
      for (i = 0; i < t_last+2; i++) {
        tsum[i] = tsum[i] / total_nodes;
          printf(" timer %2d(%8s) :  %10.4f  %10.4f  %10.4f\n",
              i+1, t_recs[i], tming[i], tmaxg[i], tsum[i]);
      }
    }
*/
  }

  release_opencl();

  return 0;
}
Esempio n. 2
0
int main(int argc, char *argv[])
{
  int i, niter, step, n3;
  double mflops, t, tmax, trecs[t_last+1];
  logical verified;
  char Class;
  char *t_names[t_last+1];

  //---------------------------------------------------------------------
  // Read input file (if it exists), else take
  // defaults from parameters
  //---------------------------------------------------------------------
  FILE *fp;
  if ((fp = fopen("timer.flag", "r")) != NULL) {
    timeron = true;
    t_names[t_total] = "total";
    t_names[t_rhsx] = "rhsx";
    t_names[t_rhsy] = "rhsy";
    t_names[t_rhsz] = "rhsz";
    t_names[t_rhs] = "rhs";
    t_names[t_xsolve] = "xsolve";
    t_names[t_ysolve] = "ysolve";
    t_names[t_zsolve] = "zsolve";
    t_names[t_rdis1] = "redist1";
    t_names[t_rdis2] = "redist2";
    t_names[t_tzetar] = "tzetar";
    t_names[t_ninvr] = "ninvr";
    t_names[t_pinvr] = "pinvr";
    t_names[t_txinvr] = "txinvr";
    t_names[t_add] = "add";
    fclose(fp);
  } else {
    timeron = false;
  }

  printf("\n\n NAS Parallel Benchmarks (NPB3.3-SER-C) - SP Benchmark\n\n");

  if ((fp = fopen("inputsp.data", "r")) != NULL) {
    int result;
    printf(" Reading from input file inputsp.data\n");
    result = fscanf(fp, "%d", &niter);
    while (fgetc(fp) != '\n');
    result = fscanf(fp, "%lf", &dt);
    while (fgetc(fp) != '\n');
    result = fscanf(fp, "%d%d%d", &grid_points[0], &grid_points[1], &grid_points[2]);
    fclose(fp);
  } else {
    printf(" No input file inputsp.data. Using compiled defaults\n");
    niter = NITER_DEFAULT;
    dt    = DT_DEFAULT;
    grid_points[0] = PROBLEM_SIZE;
    grid_points[1] = PROBLEM_SIZE;
    grid_points[2] = PROBLEM_SIZE;
  }

  printf(" Size: %4dx%4dx%4d\n", 
      grid_points[0], grid_points[1], grid_points[2]);
  printf(" Iterations: %4d    dt: %10.6f\n", niter, dt);
  printf("\n");

  if ((grid_points[0] > IMAX) ||
      (grid_points[1] > JMAX) ||
      (grid_points[2] > KMAX) ) {
    printf(" %d, %d, %d\n", grid_points[0], grid_points[1], grid_points[2]);
    printf(" Problem size too big for compiled array sizes\n");
    return 0;
  }
  nx2 = grid_points[0] - 2;
  ny2 = grid_points[1] - 2;
  nz2 = grid_points[2] - 2;

  set_constants();

  for (i = 1; i <= t_last; i++) {
    timer_clear(i);
  }

  exact_rhs();

  initialize();

  //---------------------------------------------------------------------
  // do one time step to touch all code, and reinitialize
  //---------------------------------------------------------------------
  adi();
  initialize();

  for (i = 1; i <= t_last; i++) {
    timer_clear(i);
  }
  timer_start(1);

  for (step = 1; step <= niter; step++) {
    if ((step % 20) == 0 || step == 1) {
      printf(" Time step %4d\n", step);
    }

    adi();
  }

  timer_stop(1);
  tmax = timer_read(1);

  verify(niter, &Class, &verified);

  if (tmax != 0.0) {
    n3 = grid_points[0]*grid_points[1]*grid_points[2];
    t = (grid_points[0]+grid_points[1]+grid_points[2])/3.0;
    mflops = (881.174 * (double)n3
             - 4683.91 * (t * t)
             + 11484.5 * t
             - 19272.4) * (double)niter / (tmax*1000000.0);
  } else {
    mflops = 0.0;
  }

  print_results("SP", Class, grid_points[0], 
                grid_points[1], grid_points[2], niter, 
                tmax, mflops, "          floating point", 
                verified, NPBVERSION,COMPILETIME, CS1, CS2, CS3, CS4, CS5, 
                CS6, "(none)");

  //---------------------------------------------------------------------
  // More timers
  //---------------------------------------------------------------------
  if (timeron) {
    for (i = 1; i <= t_last; i++) {
      trecs[i] = timer_read(i);
    }
    if (tmax == 0.0) tmax = 1.0;

    printf("  SECTION   Time (secs)\n");
    for (i = 1; i <= t_last; i++) {
      printf("  %-8s:%9.3f  (%6.2f%%)\n", 
          t_names[i], trecs[i], trecs[i]*100./tmax);
      if (i == t_rhs) {
        t = trecs[t_rhsx] + trecs[t_rhsy] + trecs[t_rhsz];
        printf("    --> %8s:%9.3f  (%6.2f%%)\n", "sub-rhs", t, t*100./tmax);
        t = trecs[t_rhs] - t;
        printf("    --> %8s:%9.3f  (%6.2f%%)\n", "rest-rhs", t, t*100./tmax);
      } else if (i == t_zsolve) {
        t = trecs[t_zsolve] - trecs[t_rdis1] - trecs[t_rdis2];
        printf("    --> %8s:%9.3f  (%6.2f%%)\n", "sub-zsol", t, t*100./tmax);
      } else if (i == t_rdis2) {
        t = trecs[t_rdis1] + trecs[t_rdis2];
        printf("    --> %8s:%9.3f  (%6.2f%%)\n", "redist", t, t*100./tmax);
      }
    }
  }

  return 0;
}
Esempio n. 3
0
int main(int argc, char *argv[])
{
  int i, niter, step;
  double navg, mflops, n3;

  double tmax, t, trecs[t_last+1];
  logical verified;
  char Class;
  char *t_names[t_last+1];

  //---------------------------------------------------------------------
  // Root node reads input file (if it exists) else takes
  // defaults from parameters
  //---------------------------------------------------------------------
  FILE *fp;
  if ((fp = fopen("timer.flag", "r")) != NULL) {
    timeron = true;
    t_names[t_total] = "total";
    t_names[t_rhsx] = "rhsx";
    t_names[t_rhsy] = "rhsy";
    t_names[t_rhsz] = "rhsz";
    t_names[t_rhs] = "rhs";
    t_names[t_xsolve] = "xsolve";
    t_names[t_ysolve] = "ysolve";
    t_names[t_zsolve] = "zsolve";
    t_names[t_rdis1] = "redist1";
    t_names[t_rdis2] = "redist2";
    t_names[t_add] = "add";
    fclose(fp);
  } else {
    timeron = false;
  }

  printf("\n\n NAS Parallel Benchmarks (NPB3.3-OMP-C) - BT Benchmark\n\n");

  if ((fp = fopen("inputbt.data", "r")) != NULL) {
    int result;
    printf(" Reading from input file inputbt.data\n");
    result = fscanf(fp, "%d", &niter);
    while (fgetc(fp) != '\n');
    result = fscanf(fp, "%lf", &dt);
    while (fgetc(fp) != '\n');
    result = fscanf(fp, "%d%d%d\n", 
        &grid_points[0], &grid_points[1], &grid_points[2]);
    fclose(fp);
  } else {
    printf(" No input file inputbt.data. Using compiled defaults\n");
    niter = NITER_DEFAULT;
    dt    = DT_DEFAULT;
    grid_points[0] = PROBLEM_SIZE;
    grid_points[1] = PROBLEM_SIZE;
    grid_points[2] = PROBLEM_SIZE;
  }

  printf(" Size: %4dx%4dx%4d\n",
      grid_points[0], grid_points[1], grid_points[2]);
  printf(" Iterations: %4d       dt: %11.7f\n", niter, dt);
  printf(" Number of available threads: %5d\n", omp_get_max_threads());
  printf("\n");

  if ( (grid_points[0] > IMAX) ||
       (grid_points[1] > JMAX) ||
       (grid_points[2] > KMAX) ) {
    printf(" %d, %d, %d\n", grid_points[0], grid_points[1], grid_points[2]);
    printf(" Problem size too big for compiled array sizes\n");
    return 0;
  }

  set_constants();

  for (i = 1; i <= t_last; i++) {
    timer_clear(i);
  }

  initialize();

  exact_rhs();

  //---------------------------------------------------------------------
  // do one time step to touch all code, and reinitialize
  //---------------------------------------------------------------------
  adi();
  initialize();

  for (i = 1; i <= t_last; i++) {
    timer_clear(i);
  }
  timer_start(1);

// Do not do inlining, to avoid huge loops, scops are kept separate and are
// distributed among files.
//#pragma scop
  for (step = 1; step <= niter; step++) {
    adi();
  }
//#pragma endscop

  timer_stop(1);
  tmax = timer_read(1);

  verify(niter, &Class, &verified);

  n3 = 1.0*grid_points[0]*grid_points[1]*grid_points[2];
  navg = (grid_points[0]+grid_points[1]+grid_points[2])/3.0;
  if(tmax != 0.0) {
    mflops = 1.0e-6 * (double)niter *
      (3478.8 * n3 - 17655.7 * (navg*navg) + 28023.7 * navg)
      / tmax;
  } else {
    mflops = 0.0;
  }
  print_results("BT", Class, grid_points[0], 
                grid_points[1], grid_points[2], niter,
                tmax, mflops, "          floating point", 
                verified, NPBVERSION,COMPILETIME, CS1, CS2, CS3, CS4, CS5, 
                CS6, "(none)");

  //---------------------------------------------------------------------
  // More timers
  //---------------------------------------------------------------------
  if (timeron) {
    for (i = 1; i <= t_last; i++) {
      trecs[i] = timer_read(i);
    }
    if (tmax == 0.0) tmax = 1.0;

    printf("  SECTION   Time (secs)\n");
    for (i = 1; i <= t_last; i++) {
      printf("  %-8s:%9.3f  (%6.2f%%)\n", 
          t_names[i], trecs[i], trecs[i]*100./tmax);
      if (i == t_rhs) {
        t = trecs[t_rhsx] + trecs[t_rhsy] + trecs[t_rhsz];
        printf("    --> %8s:%9.3f  (%6.2f%%)\n", "sub-rhs", t, t*100./tmax);
        t = trecs[t_rhs] - t;
        printf("    --> %8s:%9.3f  (%6.2f%%)\n", "rest-rhs", t, t*100./tmax);
      } else if (i==t_zsolve) {
        t = trecs[t_zsolve] - trecs[t_rdis1] - trecs[t_rdis2];
        printf("    --> %8s:%9.3f  (%6.2f%%)\n", "sub-zsol", t, t*100./tmax);
      } else if (i==t_rdis2) {
        t = trecs[t_rdis1] + trecs[t_rdis2];
        printf("    --> %8s:%9.3f  (%6.2f%%)\n", "redist", t, t*100./tmax);
      }
    }
  }

  return 0;
}