Esempio n. 1
0
void initialize(float *a, int *n, MPI_Comm c_comm)
{
  float pi=M_PI;
  int istart[3], isize[3], osize[3],ostart[3];
  accfft_local_size_dft_r2cf(n,isize,istart,osize,ostart,c_comm);

#pragma omp parallel
  {
    float X,Y,Z;
    long int ptr;
#pragma omp for
    for (int i=0; i<isize[0]; i++){
      for (int j=0; j<isize[1]; j++){
        for (int k=0; k<isize[2]; k++){
          X=2*pi/n[0]*(i+istart[0]);
          Y=2*pi/n[1]*(j+istart[1]);
          Z=2*pi/n[2]*k;
          ptr=i*isize[1]*n[2]+j*n[2]+k;
          a[ptr]=testcase(X,Y,Z);
        }
      }
    }
  }
  return;
} // end initialize
Esempio n. 2
0
void initialize(float *a,int*n, MPI_Comm c_comm) {
    int nprocs, procid;
    MPI_Comm_rank(c_comm, &procid);
    MPI_Comm_size(c_comm,&nprocs);

    float pi=M_PI;
    // Note that n2_ is the padded version of n2 which is
    // the spatial size of the array. To access the spatial members
    // we must use n2_, because that is how it is written in memory!
    int n2_=(n[2]/2+1)*2;

    // Get the local pencil size and the allocation size
    int istart[3], isize[3], osize[3],ostart[3];
    accfft_local_size_dft_r2cf(n,isize,istart,osize,ostart,c_comm);

    #pragma omp parallel
    {
        float X,Y,Z;
        long int ptr;
        #pragma omp for
        for (int i=0; i<isize[0]; i++) {
            for (int j=0; j<isize[1]; j++) {
                for (int k=0; k<isize[2]; k++) {
                    X=2*pi/n[0]*(i+istart[0]);
                    Y=2*pi/n[1]*(j+istart[1]);
                    Z=2*pi/n[2]*k;
                    ptr=i*isize[1]*n2_+j*n2_+k;
                    a[ptr]=testcase(X,Y,Z);
                }
            }
        }
    }

    return;
} // end initialize
Esempio n. 3
0
void check_err(float* a,int*n,MPI_Comm c_comm) {
    int nprocs, procid;
    MPI_Comm_rank(c_comm, &procid);
    MPI_Comm_size(c_comm,&nprocs);

    long long int size=n[0];
    size*=n[1];
    size*=n[2];
    float pi=M_PI;

    // Note that n2_ is the padded version of n2 which is
    // the spatial size of the array. To access the spatial members
    // we must use n2_, because that is how it is written in memory!
    int n2_=(n[2]/2+1)*2;
    // Get the local pencil size and the allocation size
    int istart[3], isize[3], osize[3],ostart[3];
    accfft_local_size_dft_r2cf(n,isize,istart,osize,ostart,c_comm);

    float err=0,norm=0;
    {
        float X,Y,Z,numerical;
        long int ptr;
        for (int i=0; i<isize[0]; i++) {
            for (int j=0; j<isize[1]; j++) {
                for (int k=0; k<isize[2]; k++) {
                    X=2*pi/n[0]*(i+istart[0]);
                    Y=2*pi/n[1]*(j+istart[1]);
                    Z=2*pi/n[2]*k;
                    ptr=i*isize[1]*n2_+j*n2_+k;
                    numerical=a[ptr]/size;
                    if(numerical!=numerical) numerical=0;
                    err+=std::abs(numerical-testcase(X,Y,Z));
                    norm+=std::abs(testcase(X,Y,Z));
                    //std::cout<<"("<<i<<","<<j<<","<<k<<")  "<<numerical<<'\t'<<testcase(X,Y,Z)<<std::endl;
                }
            }
        }
    }

    float g_err=0,g_norm=0;
    MPI_Reduce(&err,&g_err,1, MPI_FLOAT, MPI_SUM,0, MPI_COMM_WORLD);
    MPI_Reduce(&norm,&g_norm,1, MPI_FLOAT, MPI_SUM,0, MPI_COMM_WORLD);
    PCOUT<<"\nL1 Error of iFF(a)-a: "<<g_err<<std::endl;
    PCOUT<<"Relative L1 Error of iFF(a)-a: "<<g_err/g_norm<<std::endl;
    if (g_err/g_norm< 1e-5)
        PCOUT<<"\nResults are CORRECT! (upto single precision)\n\n";
    else
        PCOUT<<"\nResults are NOT CORRECT!\n\n";

    return;
} // end check_err
Esempio n. 4
0
void check_err(float* a, int*n, MPI_Comm c_comm) {
	int nprocs, procid;
	MPI_Comm_rank(c_comm, &procid);
	MPI_Comm_size(c_comm, &nprocs);
	long long int size = n[0];
	size *= n[1];
	size *= n[2];
	float pi = M_PI;

	int istart[3], isize[3], osize[3], ostart[3];
	accfft_local_size_dft_r2cf(n, isize, istart, osize, ostart, c_comm);

	float err = 0, norm = 0;

	float X, Y, Z, numerical_r;
	long int ptr;
	int thid = omp_get_thread_num();
	for (int i = 0; i < isize[0]; i++) {
		for (int j = 0; j < isize[1]; j++) {
			for (int k = 0; k < isize[2]; k++) {
				X = 2 * pi / n[0] * (i + istart[0]);
				Y = 2 * pi / n[1] * (j + istart[1]);
				Z = 2 * pi / n[2] * k;
				ptr = i * isize[1] * n[2] + j * n[2] + k;
				numerical_r = a[ptr] / size;
				if (numerical_r != numerical_r)
					numerical_r = 0;
				err += std::abs(numerical_r - testcase(X, Y, Z));
				norm += std::abs(testcase(X, Y, Z));
			}
		}
	}

	float g_err = 0, g_norm = 0;
	MPI_Reduce(&err, &g_err, 1, MPI_FLOAT, MPI_SUM, 0, MPI_COMM_WORLD);
	MPI_Reduce(&norm, &g_norm, 1, MPI_FLOAT, MPI_SUM, 0, MPI_COMM_WORLD);
	PCOUT << "\nL1 Error of iFF(a)-a: " << g_err << std::endl;
	PCOUT << "Relative L1 Error of iFF(a)-a: " << g_err / g_norm << std::endl;
	if (g_err / g_norm < 1e-5)
		PCOUT << "\nResults are CORRECT! (upto single precision)\n\n";
	else
		PCOUT << "\nResults are NOT CORRECT!\n\n";

	return;
} // end check_err
Esempio n. 5
0
void grad(int *n, int nthreads) {
  int nprocs, procid;
  MPI_Comm_rank(MPI_COMM_WORLD, &procid);
  MPI_Comm_size(MPI_COMM_WORLD, &nprocs);

  /* Create Cartesian Communicator */
  int c_dims[2]={0};
  MPI_Comm c_comm;
  accfft_create_comm(MPI_COMM_WORLD,c_dims,&c_comm);

  float *data;
  Complexf *data_hat;
  double f_time=0*MPI_Wtime(),i_time=0, setup_time=0;
  int alloc_max=0;

  int isize[3],osize[3],istart[3],ostart[3];
  /* Get the local pencil size and the allocation size */
  alloc_max=accfft_local_size_dft_r2cf(n,isize,istart,osize,ostart,c_comm);

  //data=(float*)accfft_alloc(isize[0]*isize[1]*isize[2]*sizeof(float));
  data=(float*)accfft_alloc(alloc_max);
  data_hat=(Complexf*)accfft_alloc(alloc_max);

  accfft_init(nthreads);

  /* Create FFT plan */
  setup_time=-MPI_Wtime();
  accfft_planf * plan=accfft_plan_dft_3d_r2cf(n,data,(float*)data_hat,c_comm,ACCFFT_MEASURE);
  setup_time+=MPI_Wtime();


  /*  Initialize data */
  initialize(data,n,c_comm);
  MPI_Barrier(c_comm);

  float * gradx=(float*)accfft_alloc(isize[0]*isize[1]*isize[2]*sizeof(float));
  float * grady=(float*)accfft_alloc(isize[0]*isize[1]*isize[2]*sizeof(float));
  float * gradz=(float*)accfft_alloc(isize[0]*isize[1]*isize[2]*sizeof(float));
  double timings[5]={0};

  std::bitset<3> XYZ=0;
  XYZ[0]=1;
  XYZ[1]=1;
  XYZ[2]=1;
  double exec_time=-MPI_Wtime();
  accfft_gradf(gradx,grady,gradz,data,plan,&XYZ,timings);
  exec_time+=MPI_Wtime();
  /* Check err*/
  PCOUT<<">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>"<<std::endl;
  PCOUT<<">>>>>>>>Checking Gradx>>>>>>>>"<<std::endl;
  check_err_grad(gradx,n,c_comm,0);
  PCOUT<<"<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<"<<std::endl;
  PCOUT<<"<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n"<<std::endl;

  PCOUT<<">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>"<<std::endl;
  PCOUT<<">>>>>>>>Checking Grady>>>>>>>>"<<std::endl;
  check_err_grad(grady,n,c_comm,1);
  PCOUT<<"<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<"<<std::endl;
  PCOUT<<"<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n"<<std::endl;

  PCOUT<<">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>"<<std::endl;
  PCOUT<<">>>>>>>>Checking Gradz>>>>>>>>"<<std::endl;
  check_err_grad(gradz,n,c_comm,2);
  PCOUT<<"<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<"<<std::endl;
  PCOUT<<"<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n"<<std::endl;

  /* Compute some timings statistics */
  double g_setup_time,g_timings[5],g_exec_time;

  MPI_Reduce(timings,g_timings,5, MPI_DOUBLE, MPI_MAX,0, c_comm);
  MPI_Reduce(&setup_time,&g_setup_time,1, MPI_DOUBLE, MPI_MAX,0, c_comm);
  MPI_Reduce(&exec_time,&g_exec_time,1, MPI_DOUBLE, MPI_MAX,0, c_comm);

  PCOUT<<"Timing for Grad Computation for size "<<n[0]<<"*"<<n[1]<<"*"<<n[2]<<std::endl;
  PCOUT<<"Setup \t\t"<<g_setup_time<<std::endl;
  PCOUT<<"Evaluation \t"<<g_exec_time<<std::endl;

  accfft_free(data);
  accfft_free(data_hat);
  MPI_Barrier(c_comm);
  accfft_free(gradx);
  accfft_free(grady);
  accfft_free(gradz);
  accfft_destroy_plan(plan);
  accfft_cleanup();
  MPI_Comm_free(&c_comm);
  PCOUT<<"-------------------------------------------------------"<<std::endl;
  PCOUT<<"-------------------------------------------------------"<<std::endl;
  PCOUT<<"-------------------------------------------------------\n"<<std::endl;
  return ;

} // end grad
Esempio n. 6
0
void step2(int *n, int nthreads) {
    int nprocs, procid;
    MPI_Comm_rank(MPI_COMM_WORLD, &procid);
    MPI_Comm_size(MPI_COMM_WORLD, &nprocs);

    /* Create Cartesian Communicator */
    int c_dims[2];
    MPI_Comm c_comm;
    accfft_create_comm(MPI_COMM_WORLD,c_dims,&c_comm);

    float *data;
    double f_time=0*MPI_Wtime(),i_time=0, setup_time=0;
    int alloc_max=0;

    int isize[3],osize[3],istart[3],ostart[3];
    /* Get the local pencil size and the allocation size */
    alloc_max=accfft_local_size_dft_r2cf(n,isize,istart,osize,ostart,c_comm);

    data=(float*)accfft_alloc(alloc_max);

    accfft_init(nthreads);

    setup_time=-MPI_Wtime();
    /* Create FFT plan */
    accfft_planf * plan=accfft_plan_dft_3d_r2cf(n,data,data,c_comm,ACCFFT_MEASURE); // note that in and out are both data -> inplace plan
    setup_time+=MPI_Wtime();

    /* Warm Up */
    accfft_execute_r2cf(plan,data,(Complexf*)data);
    accfft_execute_r2cf(plan,data,(Complexf*)data);

    /*  Initialize data */
    initialize(data,n,c_comm); // special initialize plan for inplace transform -> difference in padding
    MPI_Barrier(c_comm);

    /* Perform forward FFT */
    f_time-=MPI_Wtime();
    accfft_execute_r2cf(plan,data,(Complexf*)data);
    f_time+=MPI_Wtime();

    MPI_Barrier(c_comm);

    /* Perform backward FFT */
    i_time-=MPI_Wtime();
    accfft_execute_c2rf(plan,(Complexf*)data,data);
    i_time+=MPI_Wtime();

    /* Check Error */
    check_err(data,n,c_comm);

    /* Compute some timings statistics */
    double g_f_time, g_i_time, g_setup_time;
    MPI_Reduce(&f_time,&g_f_time,1, MPI_DOUBLE, MPI_MAX,0, MPI_COMM_WORLD);
    MPI_Reduce(&i_time,&g_i_time,1, MPI_DOUBLE, MPI_MAX,0, MPI_COMM_WORLD);
    MPI_Reduce(&setup_time,&g_setup_time,1, MPI_DOUBLE, MPI_MAX,0, MPI_COMM_WORLD);

    PCOUT<<"Timing for FFT of size "<<n[0]<<"*"<<n[1]<<"*"<<n[2]<<std::endl;
    PCOUT<<"Setup \t"<<g_setup_time<<std::endl;
    PCOUT<<"FFT \t"<<g_f_time<<std::endl;
    PCOUT<<"IFFT \t"<<g_i_time<<std::endl;

    accfft_free(data);
    accfft_destroy_plan(plan);
    accfft_cleanup();
    MPI_Comm_free(&c_comm);
    return ;

} // end step2