int mcfft3_init(int pad1 /* padding on the first axis */, int nx, int ny, int nz /* input data size */, int *nx2, int *ny2, int *nz2 /* padded data size */, int *n_local, int *o_local /* local size & start */) /*< initialize >*/ { int cpuid; MPI_Comm_rank(MPI_COMM_WORLD, &cpuid); if (threads_ok) threads_ok = fftwf_init_threads(); fftwf_mpi_init(); if (false) sf_warning("Using threaded FFTW3! \n"); if (threads_ok) fftwf_plan_with_nthreads(omp_get_max_threads()); /* axis 1 */ nk = n1 = kiss_fft_next_fast_size(nx*pad1); /* axis 2 */ n2 = kiss_fft_next_fast_size(ny); /* axis 3 */ n3 = kiss_fft_next_fast_size(nz); alloc_local = fftwf_mpi_local_size_3d(n3, n2, n1, MPI_COMM_WORLD, &local_n0, &local_0_start); //cc = sf_complexalloc3(n1,n2,n3); cc = sf_complexalloc(alloc_local); cfg = fftwf_mpi_plan_dft_3d(n3,n2,n1, (fftwf_complex *) cc, (fftwf_complex *) cc, MPI_COMM_WORLD, FFTW_FORWARD, FFTW_MEASURE); icfg = fftwf_mpi_plan_dft_3d(n3,n2,n1, (fftwf_complex *) cc, (fftwf_complex *) cc, MPI_COMM_WORLD, FFTW_BACKWARD, FFTW_MEASURE); if (NULL == cfg || NULL == icfg) sf_error("FFTW failure."); *nx2 = n1; *ny2 = n2; *nz2 = n3; *n_local = (int) local_n0; *o_local = (int) local_0_start; wt = 1.0/(n3*n2*n1); return (nk*n2*n3); }
// Setup variables for 2LPT initial condition void lpt_init(const int nc, const void* mem, const size_t size) { // nc: number of mesh per dimension ptrdiff_t local_nx, local_x_start; ptrdiff_t total_size= fftwf_mpi_local_size_3d(nc, nc, nc/2+1, MPI_COMM_WORLD, &local_nx, &local_x_start); Local_nx= local_nx; Local_x_start= local_x_start; // // Allocate memory // if(mem == 0) { // allocate memory here size_t bytes= sizeof(fftwf_complex)*total_size; int allocation_failed= 0; // 1&2 displacement for(int axes=0; axes < 3; axes++) { cdisp[axes]= fftwf_alloc_complex(total_size); disp[axes] = (float*) cdisp[axes]; cdisp2[axes]= fftwf_alloc_complex(total_size); disp2[axes] = (float*) cdisp2[axes]; bytes += 2*sizeof(fftwf_complex)*total_size; allocation_failed = allocation_failed || (cdisp[axes] == 0) || (cdisp2[axes] == 0); } // 2LPT for(int i=0; i<6; i++) { cdigrad[i] = (fftwf_complex *) fftwf_alloc_complex(total_size); digrad[i] = (float*) cdigrad[i]; bytes += sizeof(fftwf_complex)*total_size; allocation_failed = allocation_failed || (digrad[i] == 0); } if(allocation_failed) msg_abort(2003, "Error: Failed to allocate memory for 2LPT." "Tried to allocate %d Mbytes\n", (int)(bytes/(1024*1024))); msg_printf(info, "%d Mbytes allocated for LPT\n", (int)(bytes/(1024*1024))); } else { size_t bytes= 0; fftwf_complex* p= (fftwf_complex*) mem; for(int axes=0; axes<3; axes++) { cdisp[axes]= p; disp[axes]= (float*) p; bytes += sizeof(fftwf_complex)*total_size*2; p += total_size; } for(int i=0; i<6; i++) { cdigrad[i]= p; digrad[i]= (float*) p; bytes += sizeof(fftwf_complex)*total_size; p += total_size; } assert(bytes <= size); } // // FFTW3 plans // for(int i=0; i<6; ++i) Inverse_plan[i]= fftwf_mpi_plan_dft_c2r_3d(nc, nc, nc, cdigrad[i], digrad[i], MPI_COMM_WORLD, FFTW_ESTIMATE); Forward_plan= fftwf_mpi_plan_dft_r2c_3d(nc, nc, nc, digrad[3], cdigrad[3], MPI_COMM_WORLD, FFTW_ESTIMATE); for(int i=0; i<3; ++i) { Disp_plan[i]= fftwf_mpi_plan_dft_c2r_3d(nc, nc, nc, cdisp[i], disp[i], MPI_COMM_WORLD, FFTW_ESTIMATE); Disp2_plan[i]= fftwf_mpi_plan_dft_c2r_3d(nc, nc, nc, cdisp2[i], disp2[i], MPI_COMM_WORLD, FFTW_ESTIMATE); } // FFTW_MPI_TRANSPOSED_IN/FFTW_MPI_TRANSPOSED_OUT would be faster // FFTW_MEASURE is probably better for multiple realization // misc data Nmesh= nc; Nsample= nc; seedtable = malloc(Nmesh * Nmesh * sizeof(unsigned int)); assert(seedtable); }