示例#1
0
文件: ft.c 项目: CoryXie/BarrelfishOS
static int realmain(void *carg)
{
    unsigned arg = (uintptr_t)carg;

/*c-------------------------------------------------------------------
c-------------------------------------------------------------------*/

    int i, ierr;
      
/*------------------------------------------------------------------
c u0, u1, u2 are the main arrays in the problem. 
c Depending on the decomposition, these arrays will have different 
c dimensions. To accomodate all possibilities, we allocate them as 
c one-dimensional arrays and pass them to subroutines for different 
c views
c  - u0 contains the initial (transformed) initial condition
c  - u1 and u2 are working arrays
c  - indexmap maps i,j,k of u0 to the correct i^2+j^2+k^2 for the
c    time evolution operator. 
c-----------------------------------------------------------------*/

/*--------------------------------------------------------------------
c Large arrays are in common so that they are allocated on the
c heap rather than the stack. This common block is not
c referenced directly anywhere else. Padding is to avoid accidental 
c cache problems, since all array sizes are powers of two.
c-------------------------------------------------------------------*/
    static dcomplex u0[NZ][NY][NX];
    static dcomplex pad1[3];
    static dcomplex u1[NZ][NY][NX];
    static dcomplex pad2[3];
    static dcomplex u2[NZ][NY][NX];
    static dcomplex pad3[3];
    static int indexmap[NZ][NY][NX];
    
    int iter;
    int nthreads = 1;
    double total_time, mflops;
    boolean verified;
    char class;

    omp_set_num_threads(arg);
/*--------------------------------------------------------------------
c Run the entire problem once to make sure all data is touched. 
c This reduces variable startup costs, which is important for such a 
c short benchmark. The other NPB 2 implementations are similar. 
c-------------------------------------------------------------------*/
    for (i = 0; i < T_MAX; i++) {
	timer_clear(i);
    }
    setup();
#pragma omp parallel
 {
    compute_indexmap(indexmap, dims[2]);
#pragma omp single
   {
    compute_initial_conditions(u1, dims[0]);
    fft_init (dims[0][0]);
   }
    fft(1, u1, u0);
 } /* end parallel */

/*--------------------------------------------------------------------
c Start over from the beginning. Note that all operations must
c be timed, in contrast to other benchmarks. 
c-------------------------------------------------------------------*/
    for (i = 0; i < T_MAX; i++) {
	timer_clear(i);
    }

    timer_start(T_TOTAL);
    if (TIMERS_ENABLED == TRUE) timer_start(T_SETUP);

#pragma omp parallel private(iter) firstprivate(niter)
  {
    compute_indexmap(indexmap, dims[2]);

#pragma omp single
   {
    compute_initial_conditions(u1, dims[0]);
    
    fft_init (dims[0][0]);
   }

    if (TIMERS_ENABLED == TRUE) {
#pragma omp master
      timer_stop(T_SETUP);
    }
    if (TIMERS_ENABLED == TRUE) {
#pragma omp master   
      timer_start(T_FFT);
    }
    fft(1, u1, u0);
    if (TIMERS_ENABLED == TRUE) {
#pragma omp master      
      timer_stop(T_FFT);
    }

    for (iter = 1; iter <= niter; iter++) {
	if (TIMERS_ENABLED == TRUE) {
#pragma omp master      
	  timer_start(T_EVOLVE);
	}
	
	evolve(u0, u1, iter, indexmap, dims[0]);
	
        if (TIMERS_ENABLED == TRUE) {
#pragma omp master      
	  timer_stop(T_EVOLVE);
	}
        if (TIMERS_ENABLED == TRUE) {
#pragma omp master      
	  timer_start(T_FFT);
	}
	
        fft(-1, u1, u2);
	
        if (TIMERS_ENABLED == TRUE) {
#pragma omp master      
	  timer_stop(T_FFT);
	}
        if (TIMERS_ENABLED == TRUE) {
#pragma omp master      
	  timer_start(T_CHECKSUM);
	}
	
        checksum(iter, u2, dims[0]);
	
        if (TIMERS_ENABLED == TRUE) {
#pragma omp master      
	  timer_stop(T_CHECKSUM);
	}
    }
    
#pragma omp single
    verify(NX, NY, NZ, niter, &verified, &class);
    
#if defined(_OPENMP)
#pragma omp master    
    nthreads = omp_get_num_threads();
#endif /* _OPENMP */    
  } /* end parallel */
  
    timer_stop(T_TOTAL);
    total_time = timer_read(T_TOTAL);

    if( total_time != 0.0) {
	mflops = 1.0e-6*(double)(NTOTAL) *
	    (14.8157+7.19641*log((double)(NTOTAL))
	     +  (5.23518+7.21113*log((double)(NTOTAL)))*niter)
	    /total_time;
    } else {
	mflops = 0.0;
    }
#ifdef BOMP
backend_create_time(arg);
#endif
printf("Computetime %d %f\n", arg, total_time);
printf("client done\n");
/*     c_print_results("FT", class, NX, NY, NZ, niter, nthreads, */
/* 		    total_time, mflops, "          floating point", verified,  */
/* 		    NPBVERSION, COMPILETIME, */
/* 		    CS1, CS2, CS3, CS4, CS5, CS6, CS7); */
    if (TIMERS_ENABLED == TRUE) print_timers();
}
示例#2
0
文件: ft.c 项目: ashwinma/multicl
int main(int argc, char *argv[])
{
  int i;
  int iter;
  double total_time, mflops;
  logical verified;
  char Class;

  if (argc == 1) {
    fprintf(stderr, "Usage: %s <kernel directory>\n", argv[0]);
    exit(-1);
  }

  //---------------------------------------------------------------------
  // Run the entire problem once to make sure all data is touched. 
  // This reduces variable startup costs, which is important for such a 
  // short benchmark. The other NPB 2 implementations are similar. 
  //---------------------------------------------------------------------
  for (i = 1; i <= T_max; i++) {
    timer_clear(i);
  }
  setup();
  setup_opencl(argc, argv);
  init_ui(&m_u0, &m_u1, &m_twiddle, dims[0], dims[1], dims[2]);
  compute_indexmap(&m_twiddle, dims[0], dims[1], dims[2]);
  compute_initial_conditions(&m_u1, dims[0], dims[1], dims[2]);
  fft_init(dims[0]);
  fft(1, &m_u1, &m_u0);

  //---------------------------------------------------------------------
  // Start over from the beginning. Note that all operations must
  // be timed, in contrast to other benchmarks. 
  //---------------------------------------------------------------------
  for (i = 1; i <= T_max; i++) {
    timer_clear(i);
  }

  timer_start(T_total);
  if (timers_enabled) timer_start(T_setup);

  DTIMER_START(T_compute_im);
  compute_indexmap(&m_twiddle, dims[0], dims[1], dims[2]);
  DTIMER_STOP(T_compute_im);

  DTIMER_START(T_compute_ics);
  compute_initial_conditions(&m_u1, dims[0], dims[1], dims[2]);
  DTIMER_STOP(T_compute_ics);

  DTIMER_START(T_fft_init);
  fft_init(dims[0]);
  DTIMER_STOP(T_fft_init);

  if (timers_enabled) timer_stop(T_setup);
  if (timers_enabled) timer_start(T_fft);
  fft(1, &m_u1, &m_u0);
  if (timers_enabled) timer_stop(T_fft);

  for (iter = 1; iter <= niter; iter++) {
    if (timers_enabled) timer_start(T_evolve);
    evolve(&m_u0, &m_u1, &m_twiddle, dims[0], dims[1], dims[2]);
    if (timers_enabled) timer_stop(T_evolve);
    if (timers_enabled) timer_start(T_fft);
    fft(-1, &m_u1, &m_u1);
    if (timers_enabled) timer_stop(T_fft);
    if (timers_enabled) timer_start(T_checksum);
    checksum(iter, &m_u1, dims[0], dims[1], dims[2]);
    if (timers_enabled) timer_stop(T_checksum);
  }

  verify(NX, NY, NZ, niter, &verified, &Class);

  timer_stop(T_total);
  total_time = timer_read(T_total);

  if (total_time != 0.0) {
    mflops = 1.0e-6 * (double)NTOTAL *
            (14.8157 + 7.19641 * log((double)NTOTAL)
            + (5.23518 + 7.21113 * log((double)NTOTAL)) * niter)
            / total_time;
  } else {
    mflops = 0.0;
  }
  c_print_results("FT", Class, NX, NY, NZ, niter,
                  total_time, mflops, "          floating point", verified, 
                  NPBVERSION, COMPILETIME, CS1, CS2, CS3, CS4, CS5, CS6, CS7,
                  clu_GetDeviceTypeName(device_type),
                  device_name);
  if (timers_enabled) print_timers();

  release_opencl();

  fflush(stdout);

  return 0;
}
示例#3
0
文件: appft.c 项目: ashwinma/multicl
void appft(int niter, double *total_time, logical *verified)
{
  int i, j, k, kt, n12, n22, n32, ii, jj, kk, ii2, ik2;
  double ap;

  dcomplex exp1[NX], exp2[NY], exp3[NZ];

  for (i = 1; i <= 15; i++) {
    timer_clear(i);
  }         

  timer_start(2);      
  compute_initial_conditions(NX, NY, NZ, xnt);

  CompExp(NX, exp1);
  CompExp(NY, exp2);
  CompExp(NZ, exp3);          
  fftXYZ(1, NX, NY, NZ, xnt, (dcomplex *)y, exp1, exp2, exp3);
  timer_stop(2);

  timer_start(1);
  if (timers_enabled) timer_start(13);

  n12 = NX / 2;
  n22 = NY / 2;
  n32 = NZ / 2;
  ap = -4.0 * ALPHA * (PI * PI);
  for (i = 0; i < NZ; i++) {
    ii = i - (i / n32) * NZ;
    ii2 = ii * ii;
    for (k = 0; k < NY; k++) {
      kk = k - (k / n22) * NY;
      ik2 = ii2 + kk*kk;
      for (j = 0; j < NX; j++) {
        jj = j - (j / n12) * NX;
        twiddle[i][k][j] = exp(ap*(double)(jj*jj + ik2));
      }
    }
  }
  if (timers_enabled) timer_stop(13);

  if (timers_enabled) timer_start(12);
  compute_initial_conditions(NX, NY, NZ, xnt);
  if (timers_enabled) timer_stop(12);
  if (timers_enabled) timer_start(15);
  fftXYZ(1, NX, NY, NZ, xnt, (dcomplex *)y, exp1, exp2, exp3);
  if (timers_enabled) timer_stop(15);

  for (kt = 1; kt <= niter; kt++) {
    if (timers_enabled) timer_start(11);
    evolve(NX, NY, NZ, xnt, y, twiddle);
    if (timers_enabled) timer_stop(11);
    if (timers_enabled) timer_start(15);
    fftXYZ(-1, NX, NY, NZ, xnt, (dcomplex *)xnt, exp1, exp2, exp3);
    if (timers_enabled) timer_stop(15);
    if (timers_enabled) timer_start(10);
    CalculateChecksum(&sums[kt], kt, NX, NY, NZ, xnt);
    if (timers_enabled) timer_stop(10);
  }

  // Verification test.
  if (timers_enabled) timer_start(14);
  verify(NX, NY, NZ, niter, sums, verified);
  if (timers_enabled) timer_stop(14);
  timer_stop(1);

  *total_time = timer_read(1);
  if (!timers_enabled) return;

  printf(" FT subroutine timers \n");
  printf(" %26s =%9.4f\n", "FT total                  ", timer_read(1));
  printf(" %26s =%9.4f\n", "WarmUp time               ", timer_read(2));
  printf(" %26s =%9.4f\n", "fftXYZ body               ", timer_read(3));
  printf(" %26s =%9.4f\n", "Swarztrauber              ", timer_read(4));
  printf(" %26s =%9.4f\n", "X time                    ", timer_read(7));
  printf(" %26s =%9.4f\n", "Y time                    ", timer_read(8));
  printf(" %26s =%9.4f\n", "Z time                    ", timer_read(9));
  printf(" %26s =%9.4f\n", "CalculateChecksum         ", timer_read(10));
  printf(" %26s =%9.4f\n", "evolve                    ", timer_read(11));
  printf(" %26s =%9.4f\n", "compute_initial_conditions", timer_read(12));
  printf(" %26s =%9.4f\n", "twiddle                   ", timer_read(13));
  printf(" %26s =%9.4f\n", "verify                    ", timer_read(14));
  printf(" %26s =%9.4f\n", "fftXYZ                    ", timer_read(15));
  printf(" %26s =%9.4f\n", "Benchmark time            ", *total_time);
}