Esempio n. 1
0
int mcfft3_init(int pad1           /* padding on the first axis */,
	       int nx,   int ny,  int nz   /* input data size */, 
	       int *nx2, int *ny2, int *nz2 /* padded data size */,
               int *n_local, int *o_local /* local size & start */)
/*< initialize >*/
{
  int cpuid;
  MPI_Comm_rank(MPI_COMM_WORLD, &cpuid);

  if (threads_ok) threads_ok = fftwf_init_threads();

  fftwf_mpi_init();

  if (false)
    sf_warning("Using threaded FFTW3! \n");
  if (threads_ok)
    fftwf_plan_with_nthreads(omp_get_max_threads());

  /* axis 1 */
  nk = n1 = kiss_fft_next_fast_size(nx*pad1);
  /* axis 2 */
  n2 = kiss_fft_next_fast_size(ny);
  /* axis 3 */
  n3 = kiss_fft_next_fast_size(nz);

  alloc_local = fftwf_mpi_local_size_3d(n3, n2, n1, MPI_COMM_WORLD, &local_n0, &local_0_start);

  //cc = sf_complexalloc3(n1,n2,n3);
  cc = sf_complexalloc(alloc_local);

  cfg  = fftwf_mpi_plan_dft_3d(n3,n2,n1,
                               (fftwf_complex *) cc,
                               (fftwf_complex *) cc,
                               MPI_COMM_WORLD,
                               FFTW_FORWARD, FFTW_MEASURE);

  icfg = fftwf_mpi_plan_dft_3d(n3,n2,n1,
                               (fftwf_complex *) cc, 
                               (fftwf_complex *) cc,
                               MPI_COMM_WORLD,
                               FFTW_BACKWARD, FFTW_MEASURE);

  if (NULL == cfg || NULL == icfg) sf_error("FFTW failure.");

  *nx2 = n1;
  *ny2 = n2;
  *nz2 = n3;
  *n_local = (int) local_n0;
  *o_local = (int) local_0_start;
	
  wt =  1.0/(n3*n2*n1);

  return (nk*n2*n3);
}
Esempio n. 2
0
// Setup variables for 2LPT initial condition
void lpt_init(const int nc, const void* mem, const size_t size)
{
  // nc: number of mesh per dimension

  ptrdiff_t local_nx, local_x_start;
  ptrdiff_t total_size=
    fftwf_mpi_local_size_3d(nc, nc, nc/2+1, MPI_COMM_WORLD,
			    &local_nx, &local_x_start);
  
  Local_nx= local_nx;
  Local_x_start= local_x_start;

  //
  // Allocate memory
  //

  if(mem == 0) {
    // allocate memory here
    size_t bytes= sizeof(fftwf_complex)*total_size;
    int allocation_failed= 0;

    // 1&2 displacement
    for(int axes=0; axes < 3; axes++) {
      cdisp[axes]= fftwf_alloc_complex(total_size);
      disp[axes] = (float*) cdisp[axes];
      
      cdisp2[axes]= fftwf_alloc_complex(total_size);
      disp2[axes] = (float*) cdisp2[axes];
      bytes += 2*sizeof(fftwf_complex)*total_size;
      
      allocation_failed = allocation_failed ||
	(cdisp[axes] == 0) || (cdisp2[axes] == 0);
    } 
    
    // 2LPT
    for(int i=0; i<6; i++) {
      cdigrad[i] = (fftwf_complex *) fftwf_alloc_complex(total_size);
      digrad[i] = (float*) cdigrad[i];
      
      bytes += sizeof(fftwf_complex)*total_size;
      allocation_failed = allocation_failed || (digrad[i] == 0);
    } 
    
   if(allocation_failed)
     msg_abort(2003, "Error: Failed to allocate memory for 2LPT."
	       "Tried to allocate %d Mbytes\n", (int)(bytes/(1024*1024)));
   
   msg_printf(info, "%d Mbytes allocated for LPT\n", (int)(bytes/(1024*1024)));
  }
  else {
    size_t bytes= 0;
    fftwf_complex* p= (fftwf_complex*) mem;
    
    for(int axes=0; axes<3; axes++) {
      cdisp[axes]= p;
      disp[axes]= (float*) p;
      bytes += sizeof(fftwf_complex)*total_size*2;

      p += total_size;
    }
    for(int i=0; i<6; i++) {
      cdigrad[i]= p;
      digrad[i]= (float*) p;
      bytes += sizeof(fftwf_complex)*total_size;
      p += total_size;
    }
    assert(bytes <= size);
  }

  //
  // FFTW3 plans
  //
  for(int i=0; i<6; ++i)
    Inverse_plan[i]=
      fftwf_mpi_plan_dft_c2r_3d(nc, nc, nc, cdigrad[i], digrad[i],
				MPI_COMM_WORLD, FFTW_ESTIMATE);

  Forward_plan=
    fftwf_mpi_plan_dft_r2c_3d(nc, nc, nc, digrad[3], cdigrad[3],
			      MPI_COMM_WORLD, FFTW_ESTIMATE);

  for(int i=0; i<3; ++i) {
    Disp_plan[i]=
      fftwf_mpi_plan_dft_c2r_3d(nc, nc, nc, cdisp[i], disp[i],
				MPI_COMM_WORLD, FFTW_ESTIMATE);
    Disp2_plan[i]=
      fftwf_mpi_plan_dft_c2r_3d(nc, nc, nc, cdisp2[i], disp2[i],
				MPI_COMM_WORLD, FFTW_ESTIMATE);
  }

  // FFTW_MPI_TRANSPOSED_IN/FFTW_MPI_TRANSPOSED_OUT would be faster
  // FFTW_MEASURE is probably better for multiple realization  

  // misc data
  Nmesh= nc;
  Nsample= nc;
  seedtable = malloc(Nmesh * Nmesh * sizeof(unsigned int)); assert(seedtable);
}