/* * === FUNCTION ====================================================================== * Name: makeNewGrid * Arguments: int gridDim - Dimensions dimXdim of new grid. * Returns: Pointer to new grid object. * Description: Makes new grid object and sets all data points to 0. * ===================================================================================== */ Grid * makeNewGrid(int gridDimX, int gridDimY) { fftw_mpi_init() ; Grid * newGrid = malloc(sizeof(Grid)) ; ptrdiff_t localDimY, globalOffset, localDimX ; newGrid->allocScheme = fftw_mpi_local_size_2d(gridDimY, gridDimX, MPI_COMM_WORLD, &localDimY, &globalOffset); newGrid->localGridDimY = localDimY ; newGrid->localGridDimX = gridDimX ; newGrid->globalGridDimY = gridDimY ; newGrid->globalGridDimX = gridDimX ; newGrid->globalOffset = globalOffset ; newGrid->gridPoints = fftw_alloc_real(newGrid->allocScheme) ; return newGrid ; } /* ----- end of function makeNewGrid ----- */
int main(int argc, char* argv[]) { MPI_Init(&argc, &argv); fftw_mpi_init(); example_2d(); example_3d(); hypre_example_2d(); hypre_example_3d(); MPI_Finalize(); }
int main(int argc,char **args) { PetscErrorCode ierr; PetscMPIInt rank,size; PetscInt N0=50,N1=20,N=N0*N1,DIM; PetscRandom rdm; PetscScalar a; PetscReal enorm; Vec x,y,z; PetscBool view=PETSC_FALSE,use_interface=PETSC_TRUE; ierr = PetscInitialize(&argc,&args,(char*)0,help);CHKERRQ(ierr); #if !defined(PETSC_USE_COMPLEX) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP, "This example requires complex numbers"); #endif ierr = PetscOptionsBegin(PETSC_COMM_WORLD, NULL, "FFTW Options", "ex143");CHKERRQ(ierr); ierr = PetscOptionsBool("-vec_view draw", "View the vectors", "ex143", view, &view, NULL);CHKERRQ(ierr); ierr = PetscOptionsBool("-use_FFTW_interface", "Use PETSc-FFTW interface", "ex143",use_interface, &use_interface, NULL);CHKERRQ(ierr); ierr = PetscOptionsEnd();CHKERRQ(ierr); ierr = PetscOptionsGetBool(NULL,"-use_FFTW_interface",&use_interface,NULL);CHKERRQ(ierr); ierr = MPI_Comm_size(PETSC_COMM_WORLD, &size);CHKERRQ(ierr); ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRQ(ierr); ierr = PetscRandomCreate(PETSC_COMM_WORLD, &rdm);CHKERRQ(ierr); ierr = PetscRandomSetFromOptions(rdm);CHKERRQ(ierr); if (!use_interface) { /* Use mpi FFTW without PETSc-FFTW interface, 2D case only */ /*---------------------------------------------------------*/ fftw_plan fplan,bplan; fftw_complex *data_in,*data_out,*data_out2; ptrdiff_t alloc_local,local_n0,local_0_start; DIM = 2; if (!rank) { ierr = PetscPrintf(PETSC_COMM_SELF,"Use FFTW without PETSc-FFTW interface, DIM %D\n",DIM);CHKERRQ(ierr); } fftw_mpi_init(); N = N0*N1; alloc_local = fftw_mpi_local_size_2d(N0,N1,PETSC_COMM_WORLD,&local_n0,&local_0_start); data_in = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); data_out = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); data_out2 = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,1,(PetscInt)local_n0*N1,(PetscInt)N,(const PetscScalar*)data_in,&x);CHKERRQ(ierr); ierr = PetscObjectSetName((PetscObject) x, "Real Space vector");CHKERRQ(ierr); ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,1,(PetscInt)local_n0*N1,(PetscInt)N,(const PetscScalar*)data_out,&y);CHKERRQ(ierr); ierr = PetscObjectSetName((PetscObject) y, "Frequency space vector");CHKERRQ(ierr); ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,1,(PetscInt)local_n0*N1,(PetscInt)N,(const PetscScalar*)data_out2,&z);CHKERRQ(ierr); ierr = PetscObjectSetName((PetscObject) z, "Reconstructed vector");CHKERRQ(ierr); fplan = fftw_mpi_plan_dft_2d(N0,N1,data_in,data_out,PETSC_COMM_WORLD,FFTW_FORWARD,FFTW_ESTIMATE); bplan = fftw_mpi_plan_dft_2d(N0,N1,data_out,data_out2,PETSC_COMM_WORLD,FFTW_BACKWARD,FFTW_ESTIMATE); ierr = VecSetRandom(x, rdm);CHKERRQ(ierr); if (view) {ierr = VecView(x,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);} fftw_execute(fplan); if (view) {ierr = VecView(y,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);} fftw_execute(bplan); /* Compare x and z. FFTW computes an unnormalized DFT, thus z = N*x */ a = 1.0/(PetscReal)N; ierr = VecScale(z,a);CHKERRQ(ierr); if (view) {ierr = VecView(z, PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);} ierr = VecAXPY(z,-1.0,x);CHKERRQ(ierr); ierr = VecNorm(z,NORM_1,&enorm);CHKERRQ(ierr); if (enorm > 1.e-11 && !rank) { ierr = PetscPrintf(PETSC_COMM_SELF," Error norm of |x - z| %g\n",(double)enorm);CHKERRQ(ierr); } /* Free spaces */ fftw_destroy_plan(fplan); fftw_destroy_plan(bplan); fftw_free(data_in); ierr = VecDestroy(&x);CHKERRQ(ierr); fftw_free(data_out); ierr = VecDestroy(&y);CHKERRQ(ierr); fftw_free(data_out2);ierr = VecDestroy(&z);CHKERRQ(ierr); } else { /* Use PETSc-FFTW interface */ /*-------------------------------------------*/ PetscInt i,*dim,k; Mat A; N=1; for (i=1; i<5; i++) { DIM = i; ierr = PetscMalloc1(i,&dim);CHKERRQ(ierr); for (k=0; k<i; k++) { dim[k]=30; } N *= dim[i-1]; /* Create FFTW object */ if (!rank) printf("Use PETSc-FFTW interface...%d-DIM: %d\n",(int)DIM,(int)N); ierr = MatCreateFFT(PETSC_COMM_WORLD,DIM,dim,MATFFTW,&A);CHKERRQ(ierr); /* Create vectors that are compatible with parallel layout of A - must call MatCreateVecs()! */ ierr = MatCreateVecsFFTW(A,&x,&y,&z);CHKERRQ(ierr); ierr = PetscObjectSetName((PetscObject) x, "Real space vector");CHKERRQ(ierr); ierr = PetscObjectSetName((PetscObject) y, "Frequency space vector");CHKERRQ(ierr); ierr = PetscObjectSetName((PetscObject) z, "Reconstructed vector");CHKERRQ(ierr); /* Set values of space vector x */ ierr = VecSetRandom(x,rdm);CHKERRQ(ierr); if (view) {ierr = VecView(x,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);} /* Apply FFTW_FORWARD and FFTW_BACKWARD */ ierr = MatMult(A,x,y);CHKERRQ(ierr); if (view) {ierr = VecView(y,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);} ierr = MatMultTranspose(A,y,z);CHKERRQ(ierr); /* Compare x and z. FFTW computes an unnormalized DFT, thus z = N*x */ a = 1.0/(PetscReal)N; ierr = VecScale(z,a);CHKERRQ(ierr); if (view) {ierr = VecView(z,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);} ierr = VecAXPY(z,-1.0,x);CHKERRQ(ierr); ierr = VecNorm(z,NORM_1,&enorm);CHKERRQ(ierr); if (enorm > 1.e-9 && !rank) { ierr = PetscPrintf(PETSC_COMM_SELF," Error norm of |x - z| %e\n",enorm);CHKERRQ(ierr); } ierr = VecDestroy(&x);CHKERRQ(ierr); ierr = VecDestroy(&y);CHKERRQ(ierr); ierr = VecDestroy(&z);CHKERRQ(ierr); ierr = MatDestroy(&A);CHKERRQ(ierr); ierr = PetscFree(dim);CHKERRQ(ierr); } } ierr = PetscRandomDestroy(&rdm);CHKERRQ(ierr); ierr = PetscFinalize(); return 0; }
void init_fft2d_(void ) { int i,j,k; double vm2,vm1,v,vp1,vp2,vb; commx = MPI_Comm_f2c(topo_.commxc); MPI_Comm_rank(commx, &irankx); MPI_Comm_size(commx, &isizex); commyz = MPI_Comm_f2c(topo_.commyzc); MPI_Comm_rank(commyz, &irankyz); MPI_Comm_size(commyz, &isizeyz); fftw_mpi_init(); howmany = topo_.mxlc; //*********** alloc_ly = fftw_mpi_local_size_2d(my, mz, commx, &ly, &lys); /* alloc_ly=fftw_mpi_local_size_many(rnk, myz, howmany, FFTW_MPI_DEFAULT_BLOCK, commx, &ly, &lys); */ //*********** if(((ly-topo_.mylc)!=0) || topo_.npzc>1) { printf("Error,npz should equal to 1, or %d\t%d\n",irankx,ly-topo_.mylc); MPI_Abort(commx,1); } minp = fftw_alloc_complex(alloc_ly); mout = fftw_alloc_complex(alloc_ly); if( !(freq = r3tensor(topo_.mxlc, topo_.mylc*topo_.mzlc, 2)) ) printf("Malloc error!\n"); if( !(data = r3tensor(topo_.mxlc, topo_.mylc*topo_.mzlc, 2)) ) printf("Malloc error!\n"); /* if( !(dar = r3tensor(topo_.mxlc, topo_.mylc, topo_.mzlc)) ) printf("Malloc error!\n"); if( !(dai = r3tensor(topo_.mxlc, topo_.mylc, topo_.mzlc)) ) printf("Malloc error!\n"); */ //*********** mplanF = fftw_mpi_plan_dft_2d(my, mz, minp, mout, commx, FFTW_FORWARD, FFTW_MEASURE); mplanR = fftw_mpi_plan_dft_2d(my, mz, minp, mout, commx, FFTW_BACKWARD, FFTW_MEASURE); /* mplanF = fftw_mpi_plan_many_dft(rnk, myz, howmany, FFTW_MPI_DEFAULT_BLOCK,FFTW_MPI_DEFAULT_BLOCK, minp, mout, commx, FFTW_FORWARD, FFTW_MEASURE); mplanR = fftw_mpi_plan_many_dft(rnk, myz, howmany, FFTW_MPI_DEFAULT_BLOCK,FFTW_MPI_DEFAULT_BLOCK, minp, mout, commx, FFTW_BACKWARD, FFTW_MEASURE); */ //*********** //***** Solver part ****** dxs = topo_.dx0*topo_.dx0; dys = topo_.dy0*topo_.dy0; dzs = topo_.dz0*topo_.dz0; vm2=-1.0/12.0; vm1=16.0/12.0; v =-30.0/12.0; vp1=16.0/12.0; vp2=-1.0/12.0; MatCreateMPIAIJ(commyz, PETSC_DECIDE, PETSC_DECIDE, mx, mx, 5, PETSC_NULL, 5, PETSC_NULL, &A); ierr = MatGetOwnershipRange(A,&Istart,&Iend); for (Ii=Istart; Ii<Iend; Ii++) { i = Ii; j = Ii; if ((i>1)&&(i<mx-2)) { J = Ii - 2; MatSetValues(A,1,&Ii,1,&J,&vm2,INSERT_VALUES); J = Ii - 1; ierr = MatSetValues(A,1,&Ii,1,&J,&vm1,INSERT_VALUES);CHKERRQ(ierr); J = Ii; ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr); J = Ii + 1; ierr = MatSetValues(A,1,&Ii,1,&J,&vp1,INSERT_VALUES);CHKERRQ(ierr); J = Ii + 2; ierr = MatSetValues(A,1,&Ii,1,&J,&vp2,INSERT_VALUES);CHKERRQ(ierr); } if (i==0) { J = Ii; ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} if (i==1) { J = Ii - 1; vb = 11.0/12.0; ierr = MatSetValues(A,1,&Ii,1,&J,&vb,INSERT_VALUES);CHKERRQ(ierr); J = Ii ; vb = -5.0/3.0; ierr = MatSetValues(A,1,&Ii,1,&J,&vb,INSERT_VALUES); J = Ii + 1; vb = 0.5; ierr = MatSetValues(A,1,&Ii,1,&J,&vb,INSERT_VALUES); J = Ii + 2; vb = 1.0/3.0; ierr = MatSetValues(A,1,&Ii,1,&J,&vb,INSERT_VALUES); J = Ii + 3; vb = -1.0/12.0; ierr = MatSetValues(A,1,&Ii,1,&J,&vb,INSERT_VALUES); } if (i==mx-2) { J = Ii + 1; vb = 11.0/12.0; ierr = MatSetValues(A,1,&Ii,1,&J,&vb,INSERT_VALUES);CHKERRQ(ierr); J = Ii ; vb = -5.0/3.0; ierr = MatSetValues(A,1,&Ii,1,&J,&vb,INSERT_VALUES); J = Ii - 1; vb = 0.5; ierr = MatSetValues(A,1,&Ii,1,&J,&vb,INSERT_VALUES); J = Ii - 2; vb = 1.0/3.0; ierr = MatSetValues(A,1,&Ii,1,&J,&vb,INSERT_VALUES); J = Ii - 3; vb = -1.0/12.0; ierr = MatSetValues(A,1,&Ii,1,&J,&vb,INSERT_VALUES); } if (i==mx-1) {J = Ii; ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} } ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = VecCreate(commyz,&br);CHKERRQ(ierr); ierr = VecSetSizes(br,PETSC_DECIDE,mx);CHKERRQ(ierr); ierr = VecSetFromOptions(br);CHKERRQ(ierr); ierr = VecDuplicate(br,&xr);CHKERRQ(ierr); ierr = VecDuplicate(br,&bi);CHKERRQ(ierr); ierr = VecDuplicate(br,&xi);CHKERRQ(ierr); ierr = KSPCreate(commyz,&ksp);CHKERRQ(ierr); ierr = KSPSetOperators(ksp,A,A,SAME_PRECONDITIONER);CHKERRQ(ierr); ierr = KSPSetFromOptions(ksp);CHKERRQ(ierr); ierr = KSPGetPC(ksp,&pc); PCSetType(pc,PCJACOBI); ierr = KSPSetTolerances(ksp,1.e-7,1.e-50,PETSC_DEFAULT, PETSC_DEFAULT);CHKERRQ(ierr); //******* End ******* }
void init_field(int n_d, int *n, double *L, field_info *FFT) { ptrdiff_t n_x_local; ptrdiff_t i_x_start_local; ptrdiff_t n_y_transpose_local; ptrdiff_t i_y_start_transpose_local; ptrdiff_t *n_x_rank; int flag_active; int n_active; int min_size, max_size; SID_log("Initializing ", SID_LOG_OPEN); for(ptrdiff_t i_d = 0; i_d < n_d; i_d++) { if(i_d < (n_d - 1)) SID_log("%dx", SID_LOG_CONTINUE, n[i_d]); else SID_log("%d element %d-d FFT ", SID_LOG_CONTINUE, n[i_d], n_d); } SID_log("(%d byte precision)...", SID_LOG_CONTINUE, (int)sizeof(GBPREAL)); // Initialize FFT sizes FFT->n_d = n_d; FFT->n = (ptrdiff_t *)SID_calloc(sizeof(ptrdiff_t) * FFT->n_d); FFT->L = (double *)SID_calloc(sizeof(double) * FFT->n_d); FFT->n_k_local = (ptrdiff_t *)SID_calloc(sizeof(ptrdiff_t) * FFT->n_d); FFT->n_R_local = (ptrdiff_t *)SID_calloc(sizeof(ptrdiff_t) * FFT->n_d); FFT->i_R_start_local = (ptrdiff_t *)SID_calloc(sizeof(ptrdiff_t) * FFT->n_d); FFT->i_k_start_local = (ptrdiff_t *)SID_calloc(sizeof(ptrdiff_t) * FFT->n_d); FFT->i_R_stop_local = (ptrdiff_t *)SID_calloc(sizeof(ptrdiff_t) * FFT->n_d); FFT->i_k_stop_local = (ptrdiff_t *)SID_calloc(sizeof(ptrdiff_t) * FFT->n_d); for(ptrdiff_t i_d = 0; i_d < FFT->n_d; i_d++) { FFT->n[i_d] = n[i_d]; FFT->L[i_d] = L[i_d]; FFT->i_R_start_local[i_d] = 0; FFT->i_k_start_local[i_d] = 0; FFT->n_R_local[i_d] = FFT->n[i_d]; FFT->n_k_local[i_d] = FFT->n[i_d]; } FFT->n_k_local[FFT->n_d - 1] = FFT->n[FFT->n_d - 1] / 2 + 1; // Initialize FFTW // Create an integer version of FFT->n[] to pass to ..._create_plan int *n_int=(int *)SID_malloc(sizeof(int)*FFT->n_d); for(int i_d=0;i_d<FFT->n_d;i_d++) n_int[i_d]=(int)FFT->n[i_d]; #if FFTW_V2 #if USE_MPI int total_local_size_int; int n_x_local_int; int i_x_start_local_int; int n_y_transpose_local_int; int i_y_start_transpose_local_int; FFT->plan = rfftwnd_mpi_create_plan(SID.COMM_WORLD->comm, FFT->n_d, n_int, FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE); FFT->iplan = rfftwnd_mpi_create_plan(SID.COMM_WORLD->comm, FFT->n_d, n_int, FFTW_COMPLEX_TO_REAL, FFTW_ESTIMATE); rfftwnd_mpi_local_sizes(FFT->plan, &(n_x_local_int), &(i_x_start_local_int), &(n_y_transpose_local_int), &(i_y_start_transpose_local_int), &total_local_size_int); n_x_local = (ptrdiff_t)n_x_local_int; i_x_start_local = (ptrdiff_t)i_x_start_local_int; n_y_transpose_local = (ptrdiff_t)n_y_transpose_local_int; i_y_start_transpose_local = (ptrdiff_t)i_y_start_transpose_local_int; FFT->total_local_size = (size_t)total_local_size_int; #else FFT->total_local_size = 1; for(ptrdiff_t i_d = 0; i_d < FFT->n_d; i_d++) { if(i_d < FFT->n_d - 1) FFT->total_local_size *= FFT->n[i_d]; else FFT->total_local_size *= 2 * (FFT->n[i_d] / 2 + 1); } #if USE_DOUBLE FFT->plan = fftwnd_create_plan(FFT->n_d, n_int, FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE | FFTW_IN_PLACE); FFT->iplan = fftwnd_create_plan(FFT->n_d, n_int, FFTW_COMPLEX_TO_REAL, FFTW_ESTIMATE | FFTW_IN_PLACE); #else FFT->plan = rfftwnd_create_plan(FFT->n_d, n_int, FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE | FFTW_IN_PLACE); FFT->iplan = rfftwnd_create_plan(FFT->n_d, n_int, FFTW_COMPLEX_TO_REAL, FFTW_ESTIMATE | FFTW_IN_PLACE); #endif #endif #else #if USE_MPI #if USE_DOUBLE fftw_mpi_init(); FFT->total_local_size = fftw_mpi_local_size_many_transposed(FFT->n_d, FFT->n, 1, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, SID_COMM_WORLD->comm, &(n_x_local), &(i_x_start_local), &(n_y_transpose_local), &(i_y_start_transpose_local)); FFT->plan = fftw_mpi_plan_dft_r2c(FFT->n_d, FFT->n, FFT->field_local, FFT->cfield_local, SID_COMM_WORLD->comm, FFTW_ESTIMATE); FFT->iplan = fftw_mpi_plan_dft_c2r(FFT->n_d, FFT->n, FFT->cfield_local, FFT->field_local, SID_COMM_WORLD->comm, FFTW_ESTIMATE); #else fftwf_mpi_init(); FFT->total_local_size = fftwf_mpi_local_size_many_transposed(FFT->n_d, FFT->n, 1, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, SID_COMM_WORLD->comm, &(n_x_local), &(i_x_start_local), &(n_y_transpose_local), &(i_y_start_transpose_local)); FFT->plan = fftwf_mpi_plan_dft_r2c(FFT->n_d, FFT->n, FFT->field_local, FFT->cfield_local, SID_COMM_WORLD->comm, FFTW_ESTIMATE); FFT->iplan = fftwf_mpi_plan_dft_c2r(FFT->n_d, FFT->n, FFT->cfield_local, FFT->field_local, SID_COMM_WORLD->comm, FFTW_ESTIMATE); #endif #else FFT->total_local_size = 1; for(ptrdiff_t i_d=0; i_d < FFT->n_d; i_d++) { if(i_d < FFT->n_d - 1) FFT->total_local_size *= FFT->n[i_d]; else FFT->total_local_size *= 2 * (FFT->n[i_d] / 2 + 1); } #if USE_DOUBLE FFT->plan = fftw_plan_dft_r2c(FFT->n_d, FFT->n, FFT->field_local, FFT->cfield_local, FFTW_ESTIMATE); FFT->iplan = fftw_plan_dft_c2r(FFT->n_d, FFT->n, FFT->cfield_local, FFT->field_local, FFTW_ESTIMATE); #else FFT->plan = fftwf_plan_dft_r2c(FFT->n_d, FFT->n, FFT->field_local, FFT->cfield_local, FFTW_ESTIMATE); FFT->iplan = fftwf_plan_dft_c2r(FFT->n_d, FFT->n, FFT->cfield_local, FFT->field_local, FFTW_ESTIMATE); #endif #endif #endif SID_free(SID_FARG n_int); // Set empty slabs to start at 0 to make ignoring them simple. if(n_x_local == 0) i_x_start_local = 0; if(n_y_transpose_local == 0) i_y_start_transpose_local = 0; // Modify the local slab dimensions according to what FFTW chose. FFT->i_R_start_local[0] = i_x_start_local; FFT->n_R_local[0] = n_x_local; if(FFT->n_d > 1) { FFT->i_k_start_local[1] = i_y_start_transpose_local; FFT->n_k_local[1] = n_y_transpose_local; } // Allocate field #if USE_FFTW3 FFT->field_local = (gbpFFT_real *)fftwf_alloc_real(FFT->total_local_size); #else FFT->field_local = (gbpFFT_real *)SID_malloc(sizeof(gbpFFT_real)*FFT->total_local_size); #endif FFT->cfield_local = (gbpFFT_complex *)FFT->field_local; // Upper limits of slab decomposition for(ptrdiff_t i_d = 0; i_d < FFT->n_d; i_d++) { FFT->i_R_stop_local[i_d] = FFT->i_R_start_local[i_d] + FFT->n_R_local[i_d] - 1; FFT->i_k_stop_local[i_d] = FFT->i_k_start_local[i_d] + FFT->n_k_local[i_d] - 1; } // FFTW padding sizes if(FFT->n_d > 1) { FFT->pad_size_R = 2 * (FFT->n_R_local[FFT->n_d - 1] / 2 + 1) - FFT->n_R_local[FFT->n_d - 1]; FFT->pad_size_k = 0; } else { FFT->pad_size_R = 0; FFT->pad_size_k = 0; } // Number of elements (global and local) in the FFT ptrdiff_t i_d = 0; for(FFT->n_field = 1, FFT->n_field_R_local = 1, FFT->n_field_k_local = 1; i_d < FFT->n_d; i_d++) { FFT->n_field *= (size_t)FFT->n[i_d]; FFT->n_field_R_local *= (size_t)FFT->n_R_local[i_d]; FFT->n_field_k_local *= (size_t)FFT->n_k_local[i_d]; } // Clear the field clear_field(FFT); // Initialize the FFT's real-space grid FFT->R_field = (double **)SID_malloc(sizeof(double *) * FFT->n_d); FFT->dR = (double *)SID_malloc(sizeof(double *) * FFT->n_d); for(ptrdiff_t i_d = 0; i_d < FFT->n_d; i_d++) { FFT->R_field[i_d] = (double *)SID_malloc(sizeof(double) * (FFT->n[i_d] + 1)); FFT->dR[i_d] = FFT->L[i_d] / (double)(FFT->n[i_d]); for(ptrdiff_t i_i = 0; i_i < FFT->n[i_d]; i_i++) FFT->R_field[i_d][i_i] = FFT->L[i_d] * ((double)i_i / (double)(FFT->n[i_d])); FFT->R_field[i_d][FFT->n[i_d]] = FFT->L[i_d]; } // Initialize the FFT's k-space grid FFT->k_field = (double **)SID_malloc(sizeof(double *) * FFT->n_d); FFT->dk = (double *)SID_malloc(sizeof(double *) * FFT->n_d); FFT->k_Nyquist = (double *)SID_malloc(sizeof(double *) * FFT->n_d); for(ptrdiff_t i_d = 0; i_d < FFT->n_d; i_d++) { FFT->k_field[i_d] = (double *)SID_malloc(sizeof(double) * FFT->n[i_d]); FFT->dk[i_d] = TWO_PI / FFT->L[i_d]; FFT->k_Nyquist[i_d] = TWO_PI * (double)(FFT->n[i_d]) / FFT->L[i_d] / 2.; for(ptrdiff_t i_i = 0; i_i < FFT->n[i_d]; i_i++) { if(i_i >= FFT->n[i_d] / 2) FFT->k_field[i_d][i_i] = TWO_PI * (double)(i_i - FFT->n[i_d]) / FFT->L[i_d]; else FFT->k_field[i_d][i_i] = TWO_PI * (double)(i_i) / FFT->L[i_d]; } } // Flags FFT->flag_padded = GBP_FALSE; // Slab info FFT->slab.n_x_local = FFT->n_R_local[0]; FFT->slab.i_x_start_local = FFT->i_R_start_local[0]; FFT->slab.i_x_stop_local = FFT->i_R_stop_local[0]; FFT->slab.x_min_local = FFT->R_field[0][FFT->i_R_start_local[0]]; if(FFT->slab.n_x_local > 0) FFT->slab.x_max_local = FFT->R_field[0][FFT->i_R_stop_local[0] + 1]; else FFT->slab.x_max_local = FFT->slab.x_min_local; SID_Allreduce(&(FFT->slab.x_max_local), &(FFT->slab.x_max), 1, SID_DOUBLE, SID_MAX, SID_COMM_WORLD); #if USE_MPI // All ranks are not necessarily assigned any slices, so // we need to figure out what ranks are to the right and the left for // buffer exchanges n_x_rank = (ptrdiff_t *)SID_malloc(sizeof(ptrdiff_t) * SID.n_proc); n_x_rank[SID.My_rank] = (ptrdiff_t)FFT->slab.n_x_local; if(n_x_rank[SID.My_rank] > 0) flag_active = GBP_TRUE; else flag_active = GBP_FALSE; SID_Allreduce(&flag_active, &n_active, 1, SID_INT, SID_SUM, SID_COMM_WORLD); SID_Allreduce(&n_x_rank[SID.My_rank], &min_size, 1, SID_INT, SID_MIN, SID_COMM_WORLD); SID_Allreduce(&n_x_rank[SID.My_rank], &max_size, 1, SID_INT, SID_MAX, SID_COMM_WORLD); for(int i_rank = 0; i_rank < SID.n_proc; i_rank++) SID_Bcast(&(n_x_rank[i_rank]), 1, SID_INT, i_rank, SID_COMM_WORLD); FFT->slab.rank_to_right = -1; for(int i_rank = SID.My_rank + 1; i_rank < SID.My_rank + SID.n_proc && FFT->slab.rank_to_right < 0; i_rank++) { int j_rank = i_rank % SID.n_proc; if(n_x_rank[j_rank] > 0) FFT->slab.rank_to_right = j_rank; } if(FFT->slab.rank_to_right < 0) FFT->slab.rank_to_right = SID.My_rank; FFT->slab.rank_to_left = -1; for(int i_rank = SID.My_rank - 1; i_rank > SID.My_rank - SID.n_proc && FFT->slab.rank_to_left < 0; i_rank--) { int j_rank = i_rank; if(i_rank < 0) j_rank = i_rank + SID.n_proc; if(n_x_rank[j_rank] > 0) FFT->slab.rank_to_left = j_rank; } if(FFT->slab.rank_to_left < 0) FFT->slab.rank_to_left = SID.My_rank; free(n_x_rank); SID_log("(%d cores unused, min/max slab size=%d/%d)...", SID_LOG_CONTINUE, SID.n_proc - n_active, min_size, max_size); #else FFT->slab.rank_to_right = SID.My_rank; FFT->slab.rank_to_left = SID.My_rank; if(FFT->slab.n_x_local > 0) { flag_active = GBP_TRUE; n_active = 1; min_size = FFT->slab.n_x_local; max_size = FFT->slab.n_x_local; } else { flag_active = GBP_FALSE; n_active = 0; min_size = 0; max_size = 0; } #endif SID_log("Done.", SID_LOG_CLOSE); }
void init_common(void) { /* This routine will initialize everything */ int i,j,k; DEBUG_START_FUNC; #ifdef MPI_SUPPORT #ifdef FFTW3_MPI_SUPPORT fftw_mpi_init(); #endif #endif #ifdef _OPENMP if( !(fftw_init_threads()) ) ERROR_HANDLER( ERROR_CRITICAL, "Threads initialisation failed"); #endif /* We start with the coordinate system */ kx = (double *) fftw_malloc( sizeof(double) * NTOTAL_COMPLEX ); if (kx == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for kx allocation"); ky = (double *) fftw_malloc( sizeof(double) * NTOTAL_COMPLEX ); if (ky == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for ky allocation"); kz = (double *) fftw_malloc( sizeof(double) * NTOTAL_COMPLEX ); if (kz == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for kz allocation"); kxt = (double *) fftw_malloc( sizeof(double) * NTOTAL_COMPLEX ); if (kxt == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for kxt allocation"); kyt = (double *) fftw_malloc( sizeof(double) * NTOTAL_COMPLEX ); if (kyt == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for kyt allocation"); kzt = (double *) fftw_malloc( sizeof(double) * NTOTAL_COMPLEX ); if (kzt == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for kzt allocation"); k2t = (double *) fftw_malloc( sizeof(double) * NTOTAL_COMPLEX ); if (k2t == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for k2t allocation"); ik2t = (double *) fftw_malloc( sizeof(double) * NTOTAL_COMPLEX ); if (ik2t == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for ik2t allocation"); for( i = 0; i < NX_COMPLEX / NPROC; i++) { for( j = 0; j < NY_COMPLEX; j++) { for( k = 0; k < NZ_COMPLEX; k++) { kx[ IDX3D ] = (2.0 * M_PI) / param.lx * (fmod( NX_COMPLEX * rank / NPROC + i + (NX_COMPLEX / 2) , NX_COMPLEX ) - NX_COMPLEX / 2 ); #ifdef WITH_2D ky[ IDX3D ] = (2.0 * M_PI) / param.ly * j; kz[ IDX3D ] = 0.0; #else ky[ IDX3D ] = (2.0 * M_PI) / param.ly * (fmod( j + (NY_COMPLEX / 2) , NY_COMPLEX ) - NY_COMPLEX / 2 ); kz[ IDX3D ] = (2.0 * M_PI) / param.lz * k; #endif kxt[ IDX3D ]= kx[IDX3D]; kyt[ IDX3D ]= ky[IDX3D]; kzt[ IDX3D ]= kz[IDX3D]; k2t[ IDX3D ] = kxt[IDX3D] * kxt[IDX3D] + kyt[IDX3D] * kyt[IDX3D] + kzt[IDX3D] * kzt[IDX3D]; if ( k2t[IDX3D] == 0.0 ) ik2t[IDX3D] = 1.0; else ik2t[IDX3D] = 1.0 / k2t[IDX3D]; } } } kxmax = 2.0 * M_PI/ param.lx * ( (NX / 2) - 1); kymax = 2.0 * M_PI/ param.ly * ( (NY / 2) - 1); kzmax = 2.0 * M_PI/ param.lz * ( (NZ / 2) - 1); #ifdef WITH_2D kzmax = 0.0; #endif kmax=pow(kxmax*kxmax+kymax*kymax+kzmax*kzmax,0.5); /* Initialize the dealiazing mask Or the nyquist frequency mask (in case dealiasing is not required) */ mask = (double *) fftw_malloc( sizeof(double) * NTOTAL_COMPLEX ); if (mask == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for mask allocation"); for( i = 0; i < NX_COMPLEX/NPROC; i++) { for( j = 0; j < NY_COMPLEX; j++) { for( k = 0; k < NZ_COMPLEX; k++) { mask[ IDX3D ] = 1.0; if(param.antialiasing) { if( fabs( kx[ IDX3D ] ) > 2.0/3.0 * kxmax) mask[ IDX3D ] = 0.0; if( fabs( ky[ IDX3D ] ) > 2.0/3.0 * kymax) mask[ IDX3D ] = 0.0; #ifndef WITH_2D if( fabs( kz[ IDX3D ] ) > 2.0/3.0 * kzmax) mask[ IDX3D ] = 0.0; #endif } else { if ( NX_COMPLEX / NPROC * rank + i == NX_COMPLEX / 2 ) mask[ IDX3D ] = 0.0; if ( j == NY_COMPLEX / 2 ) mask[ IDX3D ] = 0.0; #ifndef WITH_2D if ( k == NZ_COMPLEX ) mask[ IDX3D ] = 0.0; #endif } } } } if(param.antialiasing) { kxmax = kxmax * 2.0 / 3.0; kymax = kymax * 2.0 / 3.0; kzmax = kzmax * 2.0 / 3.0; kmax = kmax * 2.0 / 3.0; } // Allocate fields // Complex fields w1 = (double complex *) fftw_malloc( sizeof(double complex) * NTOTAL_COMPLEX); if (w1 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for w1 allocation"); w2 = (double complex *) fftw_malloc( sizeof(double complex) * NTOTAL_COMPLEX); if (w2 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for w2 allocation"); w3 = (double complex *) fftw_malloc( sizeof(double complex) * NTOTAL_COMPLEX); if (w3 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for w3 allocation"); w4 = (double complex *) fftw_malloc( sizeof(double complex) * NTOTAL_COMPLEX); if (w4 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for w4 allocation"); w5 = (double complex *) fftw_malloc( sizeof(double complex) * NTOTAL_COMPLEX); if (w5 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for w5 allocation"); w6 = (double complex *) fftw_malloc( sizeof(double complex) * NTOTAL_COMPLEX); if (w6 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for w6 allocation"); w7 = (double complex *) fftw_malloc( sizeof(double complex) * NTOTAL_COMPLEX); if (w7 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for w7 allocation"); w8 = (double complex *) fftw_malloc( sizeof(double complex) * NTOTAL_COMPLEX); if (w8 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for w8 allocation"); w9 = (double complex *) fftw_malloc( sizeof(double complex) * NTOTAL_COMPLEX); if (w9 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for w9 allocation"); w10 = (double complex *) fftw_malloc( sizeof(double complex) * NTOTAL_COMPLEX); if (w10 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for w10 allocation"); w11 = (double complex *) fftw_malloc( sizeof(double complex) * NTOTAL_COMPLEX); if (w11 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for w11 allocation"); w12 = (double complex *) fftw_malloc( sizeof(double complex) * NTOTAL_COMPLEX); if (w12 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for w12 allocation"); w13 = (double complex *) fftw_malloc( sizeof(double complex) * NTOTAL_COMPLEX); if (w13 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for w13 allocation"); w14 = (double complex *) fftw_malloc( sizeof(double complex) * NTOTAL_COMPLEX); if (w14 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for w14 allocation"); w15 = (double complex *) fftw_malloc( sizeof(double complex) * NTOTAL_COMPLEX); if (w15 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for w15 allocation"); w16 = (double complex *) fftw_malloc( sizeof(double complex) * NTOTAL_COMPLEX); if (w16 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for w15 allocation"); w17 = (double complex *) fftw_malloc( sizeof(double complex) * NTOTAL_COMPLEX); if (w17 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for w15 allocation"); w18 = (double complex *) fftw_malloc( sizeof(double complex) * NTOTAL_COMPLEX); if (w18 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for w15 allocation"); wh1 = (double complex *) fftw_malloc( sizeof(double complex) * NTOTAL_COMPLEX); if (wh1 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for wh1 allocation"); wh2 = (double complex *) fftw_malloc( sizeof(double complex) * NTOTAL_COMPLEX); if (wh2 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for wh2 allocation"); wh3 = (double complex *) fftw_malloc( sizeof(double complex) * NX*(NY/2+1)); if (wh3 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for wh3 allocation"); wh4 = (double complex *) fftw_malloc( sizeof(double complex) * NX*(NY/2+1)*NZ); if (wh4 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for wh4 allocation"); wh5 = (double complex *) fftw_malloc( sizeof(double complex) * NX*(NY/2+1)*NZ); if (wh5 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for wh5 allocation"); // Initialize wh1,wh2,wh3; for(i=0;i<NX*(NY/2+1);i++) {wh1[i]=0; wh2[i]=0; wh3[i]=0;} /* Will use the same memory space for real and complex fields */ wr1 = (double *) w1; wr2 = (double *) w2; wr3 = (double *) w3; wr4 = (double *) w4; wr5 = (double *) w5; wr6 = (double *) w6; wr7 = (double *) w7; wr8 = (double *) w8; wr9 = (double *) w9; wr10 = (double *) w10; wr11 = (double *) w11; wr12 = (double *) w12; wr13 = (double *) w13; wr14 = (double *) w14; wr15 = (double *) w15; wr16 = (double *) w16; wr17 = (double *) w17; wr18 = (double *) w18; wrh1 = (double *) wh1; wrh2 = (double *) wh2; wrh3 = (double *) wh3; wrh4 = (double *) wh4; wrh5 = (double *) wh5; // Physic initialisation // init_real_mask(); //set Reynolds numbers using input powers AJB 08/03/12 param.reynolds = pow(10.0,param.reynolds); nu = 1.0 / param.reynolds; #ifdef BOUSSINESQ param.reynolds_th = pow(10.0,param.reynolds_th); nu_th = 1.0 / param.reynolds_th; #endif #ifdef MHD param.reynolds_m = pow(10.0,param.reynolds_m); eta = 1.0 / param.reynolds_m; #endif DEBUG_END_FUNC; return; }
int main(int narg, char **args) { MPI_Init(&narg, &args); #ifdef _USE_FFTW_FILTER fftw_mpi_init(); #endif Json::Value root; jsonParser::parseJsonInputFile(root, narg, args); int dim = jsonParser::getDimensionality(root, DEFAULT_DIMENSIONALITY); GRID grid(dim); EM_FIELD myfield; CURRENT current; std::vector<SPECIE*> species; std::vector<SPECIE*>::const_iterator spec_iterator; my_rng_generator rng; //*******************************************BEGIN GRID DEFINITION******************************************************* jsonParser::setXrange(root, &grid); jsonParser::setYrange(root, &grid); jsonParser::setZrange(root, &grid); jsonParser::setNCells(root, &grid); jsonParser::setNprocs(root, &grid); jsonParser::setStretchedGrid(root, &grid); jsonParser::setBoundaryConditions(root, &grid); jsonParser::setRadiationFriction(root, &grid); jsonParser::setMasterProc(root, &grid); grid.mpi_grid_initialize(&narg, args); jsonParser::setCourantFactor(root, &grid); jsonParser::setSimulationTime(root, &grid); jsonParser::setMovingWindow(root, &grid); srand(time(NULL)); grid.initRNG(rng, RANDOM_NUMBER_GENERATOR_SEED); grid.finalize(); jsonParser::setDumpControl(root, &grid); grid.visualDiag(); //********************************************END GRID DEFINITION******************************************************** //*******************************************BEGIN FIELD DEFINITION********************************************************* myfield.allocate(&grid); myfield.setAllValuesToZero(); jsonParser::setLaserPulses(root, &myfield); myfield.boundary_conditions(); current.allocate(&grid); current.setAllValuesToZero(); //*******************************************END FIELD DEFINITION*********************************************************** //******************** BEGIN TO READ OF user defined INPUT - PARAMETERS **************************************** bool isThereSpecial = false; bool areThereSpheres = false; std::string fileSpheresName; Json::Value special; SPHERES myspheres; if (isThereSpecial = jsonParser::setValue(special, root, "special")) { if (areThereSpheres = jsonParser::setString(&fileSpheresName, special, "spheresFile")) { readAndAllocateSpheres(myspheres, fileSpheresName, grid); selectSpheres(myspheres, grid); } } std::map<std::string, PLASMA*>::iterator pIterator; //******************** END READ OF "SPECIAL" (user defined) INPUT - PARAMETERS **************************************** //*******************************************BEGIN SPECIES DEFINITION********************************************************* std::map<std::string, PLASMA*> plasmas; jsonParser::setPlasmas(root, plasmas); if (areThereSpheres) { for (pIterator = plasmas.begin(); pIterator != plasmas.end(); pIterator++) { (pIterator)->second->params.spheres = &myspheres; } } jsonParser::setSpecies(root, species, plasmas, &grid, rng); uint64_t totPartNum = 0; for (spec_iterator = species.begin(); spec_iterator != species.end(); spec_iterator++) { totPartNum += (*spec_iterator)->printParticleNumber(); } if (grid.myid == grid.master_proc) { std::cout << "Total particle number: " << totPartNum << std::endl; } if (areThereSpheres) { delete[] myspheres.coords; } //*******************************************END SPECIES DEFINITION*********************************************************** //*******************************************BEGIN DIAG DEFINITION************************************************** std::map<std::string, outDomain*> outDomains; OUTPUT_MANAGER manager(&grid, &myfield, ¤t, species); jsonParser::setDomains(root, outDomains); jsonParser::setOutputRequests(root, manager, outDomains, species); jsonParser::setOutputDirPath(root, manager); manager.initialize(); //*******************************************END DIAG DEFINITION************************************************** grid.setDumpPath(DIRECTORY_DUMP); //@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ MAIN CYCLE (DO NOT MODIFY) @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ if (grid.myid == grid.master_proc) { printf("----- START temporal cicle -----\n"); fflush(stdout); } int dumpID = 1; grid.istep = 0; if (grid.dumpControl.doRestart) { dumpID = grid.dumpControl.restartFromDump; restartFromDump(&dumpID, &grid, &myfield, species); } while (grid.istep <= grid.getTotalNumberOfTimesteps()) { #ifdef NO_ALLOCATION manager.close(); MPI_Finalize(); exit(0); #endif grid.printTStepEvery(FREQUENCY_STDOUT_STATUS); manager.callDiags(grid.istep); myfield.openBoundariesE_1(); myfield.new_halfadvance_B(); myfield.boundary_conditions(); current.setAllValuesToZero(); for (spec_iterator = species.begin(); spec_iterator != species.end(); spec_iterator++) { (*spec_iterator)->current_deposition_standard(¤t); } current.pbc(); for (spec_iterator = species.begin(); spec_iterator != species.end(); spec_iterator++) { (*spec_iterator)->position_parallel_pbc(); } myfield.openBoundariesB(); myfield.new_advance_E(¤t); myfield.boundary_conditions(); #ifdef _USE_FFTW_FILTER myfield.fftw_filter_Efield(); myfield.boundary_conditions(); #endif myfield.openBoundariesE_2(); if (!(grid.istep % 20)) { //myfield.applyFilter(fltr_Ex|fltr_Ey, dir_x|dir_y); } myfield.new_halfadvance_B(); myfield.boundary_conditions(); for (spec_iterator = species.begin(); spec_iterator != species.end(); spec_iterator++) { if (grid.isRadiationFrictionEnabled()) { (*spec_iterator)->momenta_advance_with_friction(&myfield, grid.getLambda0()); } else { (*spec_iterator)->momenta_advance(&myfield); } } grid.time += grid.dt; moveWindow(&grid, &myfield, species); grid.istep++; if (grid.dumpControl.doDump) { if (grid.istep != 0 && !(grid.istep % ((int)(grid.dumpControl.dumpEvery / grid.dt)))) { dumpFilesForRestart(&dumpID, &grid, &myfield, species); } } } manager.close(); MPI_Finalize(); exit(0); }
int main(int argc, char **argv) { // Set up MPI // ========== ierr = MPI_Init(&argc, &argv); ierr = MPI_Comm_rank(MPI_COMM_WORLD, &ThisTask); ierr = MPI_Comm_size(MPI_COMM_WORLD, &NTask); #ifdef SINGLE_PRECISION fftwf_mpi_init(); #else fftw_mpi_init(); #endif if(argc < 2) { if(ThisTask == 0) { fprintf(stdout, "Input parameters not found\n"); fprintf(stdout, "Call with <ParameterFile>\n"); } ierr = MPI_Finalize(); exit(0); } // Read the run parameters and setup code // ====================================== int stepDistr; int subtractLPT; double da=0; read_parameterfile(argv[1]); if (UseCOLA == 1){ subtractLPT = 1; stepDistr = 0; StdDA = 0; } else{ subtractLPT = 0; stepDistr = 1; StdDA = 2; } if (StdDA == 0){ fullT = 1; nLPT = -2.5; } filter = 0; // Whether or not to smooth the forces Scale = 2.*M_PI/Box; // The force smoothing scale if(ThisTask == 0) { printf("Run Parameters\n"); printf("==============\n"); printf("Cosmology:\n"); printf(" Omega Matter(z=0) = %lf\n",Omega); printf(" Omega Baryon(z=0) = %lf\n",OmegaBaryon); printf(" Hubble Parameter(z=0) = %lf\n",HubbleParam); printf(" Sigma8(z=0) = %lf\n",Sigma8); #ifndef GAUSSIAN printf(" F_nl = %lf\n",Fnl); #endif printf(" Primordial Index = %lf\n",PrimordialIndex); printf(" Initial Redshift = %lf\n",Init_Redshift); printf(" Final Redshift = %lf\n",Final_Redshift); #ifndef GAUSSIAN printf(" F_nl Redshift = %lf\n",Fnl_Redshift); #endif printf("Simulation:\n"); printf(" Nmesh = %d\n", Nmesh); printf(" Nsample = %d\n", Nsample); printf(" Boxsize = %lf\n", Box); printf(" Buffer Size = %lf\n", Buffer); switch(WhichSpectrum) { case 0: switch (WhichTransfer) { case 1: printf(" Using Eisenstein & Hu Transfer Function\n"); break; case 2: printf(" Using Tabulated Transfer Function\n"); break; default: printf(" Using Efstathiou Transfer Function\n"); break; } break; case 1: printf(" Using Eisenstein & Hu Power Spectrum\n"); break; case 2: printf(" Using Tabulated Power Spectrum\n"); break; default: printf(" Using Efstathiou Power Spectrum\n"); break; } printf(" Number of Timesteps = %d\n",nsteps); if (UseCOLA) { printf(" Using COLA method\n\n"); } else { printf(" Using Standard PM method\n\n"); } fflush(stdout); } // Initial and final scale factors: double ai=1.0/(1.0+Init_Redshift); double af=1.0/(1.0+Final_Redshift); if (stepDistr == 0) da=(af-ai)/((double)nsteps); if (stepDistr == 1) da=(log(af)-log(ai))/((double)nsteps); if (stepDistr == 2) da=(CosmoTime(af)-CosmoTime(ai))/((double)nsteps); set_units(); if (ThisTask == 0) { printf("Initialising Transfer Function/Power Spectrum\n"); printf("=============================================\n"); } initialize_transferfunction(); initialize_powerspectrum(); initialize_ffts(); initialize_parts(); if(ThisTask == 0) { printf("Creating initial conditions\n"); printf("===========================\n"); fflush(stdout); } // Create the calculate the Zeldovich and 2LPT displacements and create the initial conditions // =========================================================================================== int i, j, k, m; unsigned int n, coord; double A=ai; // This is the scale factor which we'll be advancing below. double Di=growthD(1.0, A); // initial growth factor double Di2=growthD2(A); // initial 2nd order growth factor double Dv=DprimeQ(A,1.0); // T[D_{za}]=dD_{za}/dy double Dv2=growthD2v(A); // T[D_{2lpt}]=dD_{2lpt}/dy displacement_fields(); P = (struct part_data *) malloc((int)(ceil(NumPart*Buffer))*sizeof(struct part_data)); // Generate the initial particle positions and velocities // If subtractLPT = 0 (non-COLA), then velocity is ds/dy, which is simply the 2LPT IC. // Else set vel = 0 if we subtract LPT. This is the same as the action of the operator L_- from TZE, as initial velocities are in 2LPT. for(i=0; i<Local_np; i++) { for (j=0; j<Nsample; j++) { for (k=0; k<Nsample; k++) { coord = (i * Nsample + j) * Nsample + k; P[coord].ID = ((i + Local_p_start) * Nsample + j) * Nsample + k; for (m=0; m<3; m++) { P[coord].Dz[m] = ZA[m][coord]; P[coord].D2[m] = LPT[m][coord]; if (subtractLPT == 0) { P[coord].Vel[m]=P[coord].Dz[m]*Dv+P[coord].D2[m]*Dv2; } else { P[coord].Vel[m] = 0.0; } } P[coord].Pos[0] = periodic_wrap((i+Local_p_start)*(Box/Nsample)+P[coord].Dz[0]*Di+P[coord].D2[0]*Di2); P[coord].Pos[1] = periodic_wrap(j*(Box/Nsample)+P[coord].Dz[1]*Di+P[coord].D2[1]*Di2); P[coord].Pos[2] = periodic_wrap(k*(Box/Nsample)+P[coord].Dz[2]*Di+P[coord].D2[2]*Di2); } } } for (i=0; i<3; i++) { free(ZA[i]); free(LPT[i]); } // Now, we get to the N-Body part where we evolve with time via the Kick-Drift-Kick Method // ======================================================================================= int timeStep; double AF=0,AI,AC,AFF=0; double growth1 = Di; double growth1L2 = Di2; // The density grid and force grids and associated fftw plans #ifndef MEMORY_MODE density = (float_kind *)calloc(2*Total_size,sizeof(float_kind)); N11 = (float_kind *)calloc(2*Total_size,sizeof(float_kind)); N12 = (float_kind *)calloc(2*Total_size,sizeof(float_kind)); N13 = (float_kind *)calloc(2*Total_size,sizeof(float_kind)); P3D = (complex_kind*)density; FN11 = (complex_kind*)N11; FN12 = (complex_kind*)N12; FN13 = (complex_kind*)N13; #ifdef SINGLE_PRECISION plan = fftwf_mpi_plan_dft_r2c_3d(Nmesh,Nmesh,Nmesh,density,P3D,MPI_COMM_WORLD,FFTW_ESTIMATE); p11 = fftwf_mpi_plan_dft_c2r_3d(Nmesh,Nmesh,Nmesh,FN11,N11,MPI_COMM_WORLD,FFTW_ESTIMATE); p12 = fftwf_mpi_plan_dft_c2r_3d(Nmesh,Nmesh,Nmesh,FN12,N12,MPI_COMM_WORLD,FFTW_ESTIMATE); p13 = fftwf_mpi_plan_dft_c2r_3d(Nmesh,Nmesh,Nmesh,FN13,N13,MPI_COMM_WORLD,FFTW_ESTIMATE); #else plan = fftw_mpi_plan_dft_r2c_3d(Nmesh,Nmesh,Nmesh,density,P3D,MPI_COMM_WORLD,FFTW_ESTIMATE); p11 = fftw_mpi_plan_dft_c2r_3d(Nmesh,Nmesh,Nmesh,FN11,N11,MPI_COMM_WORLD,FFTW_ESTIMATE); p12 = fftw_mpi_plan_dft_c2r_3d(Nmesh,Nmesh,Nmesh,FN12,N12,MPI_COMM_WORLD,FFTW_ESTIMATE); p13 = fftw_mpi_plan_dft_c2r_3d(Nmesh,Nmesh,Nmesh,FN13,N13,MPI_COMM_WORLD,FFTW_ESTIMATE); #endif #endif if(ThisTask == 0) { printf("Beginning timestepping\n"); printf("======================\n"); fflush(stdout); } // AI stores the scale factor to which the velocities have been kicked to. Initially it's just A. AI=A; for (timeStep=0;timeStep<=nsteps;timeStep++){ // AFF is the scale factor to which we should drift the particle positions. // AF is the scale factor to which we should kick the particle velocities. if (stepDistr == 0) AFF=A+da; if (stepDistr == 1) AFF=A*exp(da); if (stepDistr == 2) AFF=AofTime(CosmoTime(A)+da); // half time-step for final kick if (timeStep == nsteps) { AF=A; } else { // Set to mid-point of interval. In the infinitesimal timestep limit, these choices are identical. // How one chooses the mid-point when not in that limit is really an extra degree of freedom in the code // but Tassev et al. report negligible effects from the different choices below. // Hence, this is not exported as an extra switch at this point. if (stepDistr == 0) AF=A+da*0.5; if (stepDistr == 1) AF=A*exp(da*0.5); if (stepDistr == 2) AF=AofTime((CosmoTime(AFF)+CosmoTime(A))*0.5); } if (ThisTask == 0) { printf("Iteration = %d\n------------------\n",timeStep+1); printf("a = %lf\n",A); printf("z = %lf\n",1.0/A-1.0); fflush(stdout); } // First we check whether all the particles are on the correct processor after the last time step/ // original 2LPT displacement and move them if not if (ThisTask == 0) printf("Moving particles across task boundaries...\n"); MoveParticles(); #ifdef MEMORY_MODE density = (float_kind *)calloc(2*Total_size,sizeof(float_kind)); P3D = (complex_kind*)density; #ifdef SINGLE_PRECISION plan = fftwf_mpi_plan_dft_r2c_3d(Nmesh,Nmesh,Nmesh,density,P3D,MPI_COMM_WORLD,FFTW_ESTIMATE); #else plan = fftw_mpi_plan_dft_r2c_3d(Nmesh,Nmesh,Nmesh,density,P3D,MPI_COMM_WORLD,FFTW_ESTIMATE); #endif #endif // Then we do the Cloud-in-Cell assignment to get the density grid and FFT it. if (ThisTask == 0) printf("Calculating density using Cloud-in-Cell...\n"); PtoMesh(); #ifdef MEMORY_MODE N11 = (float_kind *)calloc(2*Total_size,sizeof(float_kind)); N12 = (float_kind *)calloc(2*Total_size,sizeof(float_kind)); N13 = (float_kind *)calloc(2*Total_size,sizeof(float_kind)); FN11 = (complex_kind*)N11; FN12 = (complex_kind*)N12; FN13 = (complex_kind*)N13; #ifdef SINGLE_PRECISION p11 = fftwf_mpi_plan_dft_c2r_3d(Nmesh,Nmesh,Nmesh,FN11,N11,MPI_COMM_WORLD,FFTW_ESTIMATE); p12 = fftwf_mpi_plan_dft_c2r_3d(Nmesh,Nmesh,Nmesh,FN12,N12,MPI_COMM_WORLD,FFTW_ESTIMATE); p13 = fftwf_mpi_plan_dft_c2r_3d(Nmesh,Nmesh,Nmesh,FN13,N13,MPI_COMM_WORLD,FFTW_ESTIMATE); #else p11 = fftw_mpi_plan_dft_c2r_3d(Nmesh,Nmesh,Nmesh,FN11,N11,MPI_COMM_WORLD,FFTW_ESTIMATE); p12 = fftw_mpi_plan_dft_c2r_3d(Nmesh,Nmesh,Nmesh,FN12,N12,MPI_COMM_WORLD,FFTW_ESTIMATE); p13 = fftw_mpi_plan_dft_c2r_3d(Nmesh,Nmesh,Nmesh,FN13,N13,MPI_COMM_WORLD,FFTW_ESTIMATE); #endif #endif // This returns N11,N12,N13 which hold the components of // the vector (grad grad^{-2} density) on a grid. if (ThisTask == 0) printf("Calculating forces...\n"); Forces(); #ifdef MEMORY_MODE free(density); for (i=0; i<3; i++) Disp[i] = (float *)malloc(NumPart*sizeof(float)); #ifdef SINGLE_PRECISION fftwf_destroy_plan(plan); #else fftw_destroy_plan(plan); #endif #else for (i=0; i<3; i++) Disp[i] = (float_kind *)malloc(NumPart*sizeof(float_kind)); #endif // Now find the accelerations at the particle positions using 3-linear interpolation. if (ThisTask == 0) printf("Calculating accelerations...\n"); MtoParticles(); #ifdef MEMORY_MODE free(N11); free(N12); free(N13); #ifdef SINGLE_PRECISION fftwf_destroy_plan(p11); fftwf_destroy_plan(p12); fftwf_destroy_plan(p13); #else fftw_destroy_plan(p11); fftw_destroy_plan(p12); fftw_destroy_plan(p13); #endif #endif // Calculate the mean displacement and subtract later. if (ThisTask == 0) printf("Calculating mean of displacements...\n"); double sumDx=0,sumDy=0,sumDz=0; for(n=0; n<NumPart; n++) { sumDx += Disp[0][n]; sumDy += Disp[1][n]; sumDz += Disp[2][n]; } // Make sumDx, sumDy and sumDz global averages ierr = MPI_Allreduce(MPI_IN_PLACE,&sumDx,1,MPI_DOUBLE,MPI_SUM,MPI_COMM_WORLD); ierr = MPI_Allreduce(MPI_IN_PLACE,&sumDy,1,MPI_DOUBLE,MPI_SUM,MPI_COMM_WORLD); ierr = MPI_Allreduce(MPI_IN_PLACE,&sumDz,1,MPI_DOUBLE,MPI_SUM,MPI_COMM_WORLD); sumDx /= (double)TotNumPart; // We will subtract these below to conserve momentum. sumDy /= (double)TotNumPart; sumDz /= (double)TotNumPart; if (ThisTask == 0) { printf("Kicking the particles...\n"); fflush(stdout); } // Kick // =============== double dda; double q1,q2; double ax,ay,az; double sumx=0,sumy=0,sumz=0; double Om143=pow(Omega/(Omega+(1-Omega)*A*A*A),1./143.); if (StdDA == 0) { dda=Sphi(AI,AF,A); } else if (StdDA == 1) { dda=(AF-AI)*A/Qfactor(A); } else { dda=SphiStd(AI,AF); } q2=1.5*Omega*growth1*growth1*(1.0+7./3.*Om143)*A; // T^2[D_{2lpt}]=d^2 D_{2lpt}/dy^2 q1=1.5*Omega*growth1*A; // T^2[D_{ZA}]=d^2 D_{ZA}/dy^2 for(n=0; n<NumPart; n++) { Disp[0][n] -= sumDx; Disp[1][n] -= sumDy; Disp[2][n] -= sumDz; ax=-1.5*Omega*Disp[0][n]-subtractLPT*(P[n].Dz[0]*q1+P[n].D2[0]*q2)/A; ay=-1.5*Omega*Disp[1][n]-subtractLPT*(P[n].Dz[1]*q1+P[n].D2[1]*q2)/A; az=-1.5*Omega*Disp[2][n]-subtractLPT*(P[n].Dz[2]*q1+P[n].D2[2]*q2)/A; P[n].Vel[0] += ax*dda; P[n].Vel[1] += ay*dda; P[n].Vel[2] += az*dda; sumx += P[n].Vel[0]; sumy += P[n].Vel[1]; sumz += P[n].Vel[2]; } for (i=0; i<3; i++) free(Disp[i]); // Make sumx, sumy and sumz global averages ierr = MPI_Allreduce(MPI_IN_PLACE,&sumx,1,MPI_DOUBLE,MPI_SUM,MPI_COMM_WORLD); ierr = MPI_Allreduce(MPI_IN_PLACE,&sumy,1,MPI_DOUBLE,MPI_SUM,MPI_COMM_WORLD); ierr = MPI_Allreduce(MPI_IN_PLACE,&sumz,1,MPI_DOUBLE,MPI_SUM,MPI_COMM_WORLD); sumx /= (double)TotNumPart; // We will subtract these below to conserve momentum. sumy /= (double)TotNumPart; // Should be conserved, but just in case 3-linear interpolation makes a problem. sumz /= (double)TotNumPart; // Never checked whether this makes a difference. if (timeStep == nsteps) { if (ThisTask == 0) { printf("Iteration %d finished\n------------------\n\n", timeStep+1); printf("Timestepping finished\n\n"); fflush(stdout); } // At final timestep, add back LPT velocities if we had subtracted them. // This corresponds to L_+ operator in TZE. Dv = DprimeQ(A,1.0); // dD_{za}/dy Dv2 = growthD2v(A); // dD_{2lpt}/dy for(n=0; n<NumPart; n++) { P[n].Vel[0] += -sumx+(P[n].Dz[0]*Dv+P[n].D2[0]*Dv2)*subtractLPT; P[n].Vel[1] += -sumy+(P[n].Dz[1]*Dv+P[n].D2[1]*Dv2)*subtractLPT; P[n].Vel[2] += -sumz+(P[n].Dz[2]*Dv+P[n].D2[2]*Dv2)*subtractLPT; } goto finalize; // Sorry for "goto" :) } if (ThisTask == 0) { printf("Drifting the particles...\n"); fflush(stdout); } // Drift // ============= double dyyy; double da1,da2; AC = AF; AF = AFF; if (StdDA == 0) { dyyy=Sq(A,AF,AC); } else if (StdDA == 1) { dyyy=(AF-A)/Qfactor(AC); } else { dyyy=SqStd(A,AF); } da1=growthD(1.0, AF)-growth1; // change in D da2=growthD2(AF)-growth1L2; // change in D_{2lpt} for(n=0; n<NumPart; n++) { P[n].Pos[0] += (P[n].Vel[0]-sumx)*dyyy; P[n].Pos[1] += (P[n].Vel[1]-sumy)*dyyy; P[n].Pos[2] += (P[n].Vel[2]-sumz)*dyyy; P[n].Pos[0] = periodic_wrap(P[n].Pos[0]+subtractLPT*(P[n].Dz[0]*da1+P[n].D2[0]*da2)); P[n].Pos[1] = periodic_wrap(P[n].Pos[1]+subtractLPT*(P[n].Dz[1]*da1+P[n].D2[1]*da2)); P[n].Pos[2] = periodic_wrap(P[n].Pos[2]+subtractLPT*(P[n].Dz[2]*da1+P[n].D2[2]*da2)); } // Step in time // ================ A = AF; // WRT to the above name change, A = AFF AI = AC; // WRT to the above name change, AI = AF growth1 = growthD(1.0, A); growth1L2 = growthD2(A); if (ThisTask == 0) { printf("Iteration %d finished\n------------------\n\n", timeStep+1); fflush(stdout); } ierr = MPI_Barrier(MPI_COMM_WORLD); } // Here is the last little bit // =========================== finalize: if (ThisTask == 0) { printf("Finishing up\n"); printf("============\n"); fflush(stdout); } // Now convert velocities to v_{rsd}\equiv (ds/d\eta)/(a H(a)) velRSD(A); // Output a slice just for the sake of doing something with P. if (ThisTask == 0) { printf("Converting to RSD velocities...\n"); printf("Outputting particles...\n"); } slice(); print_spec(); fflush(stdout); free_powertable(); free_transfertable(); #ifdef GENERIC_FNL free(KernelTable); #endif free(P); free(Slab_to_task); free(Part_to_task); free(Local_nx_table); free(Local_np_table); #ifndef MEMORY_MODE free(density); free(N11); free(N12); free(N13); #ifdef SINGLE_PRECISION fftwf_destroy_plan(plan); fftwf_destroy_plan(p11); fftwf_destroy_plan(p12); fftwf_destroy_plan(p13); #else fftw_destroy_plan(plan); fftw_destroy_plan(p11); fftw_destroy_plan(p12); fftw_destroy_plan(p13); #endif #endif #ifdef SINGLE_PRECISION fftwf_mpi_cleanup(); #else fftw_mpi_cleanup(); #endif if (ThisTask == 0) printf("Done :)\n"); MPI_Finalize(); return 0; }
/*! This routines generates the FFTW-plans to carry out the parallel FFTs * later on. Some auxiliary variables are also initialized. */ void pm_init_periodic(void) { int i; int slab_to_task_local[PMGRID]; All.Asmth[0] = ASMTH * All.BoxSize / PMGRID; All.Rcut[0] = RCUT * All.Asmth[0]; /* Initialize FFTW MPI */ #ifdef FFTW3 fftw_mpi_init(); #endif #ifdef FFTW3 /* If using FFTW3, don't create plans yet, just figure out the local array sizes */ fftsize_complex = fftw_mpi_local_size_3d_transposed(PMGRID, PMGRID, 0.5*PMGRID2, MPI_COMM_WORLD, &nslab_x, &slabstart_x, &nslab_y, &slabstart_y); fftsize_real = 2.*fftsize_complex; fftw_plan_exists = false; #else /* Set up the FFTW plan files. */ fft_forward_plan = rfftw3d_mpi_create_plan(MPI_COMM_WORLD, PMGRID, PMGRID, PMGRID, FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE | FFTW_IN_PLACE); fft_inverse_plan = rfftw3d_mpi_create_plan(MPI_COMM_WORLD, PMGRID, PMGRID, PMGRID, FFTW_COMPLEX_TO_REAL, FFTW_ESTIMATE | FFTW_IN_PLACE); /* Workspace out the ranges on each processor. */ rfftwnd_mpi_local_sizes(fft_forward_plan, &nslab_x, &slabstart_x, &nslab_y, &slabstart_y, &fftsize); #endif for(i = 0; i < PMGRID; i++) slab_to_task_local[i] = 0; for(i = 0; i < nslab_x; i++) slab_to_task_local[slabstart_x + i] = ThisTask; MPI_Allreduce(slab_to_task_local, slab_to_task, PMGRID, MPI_INT, MPI_SUM, MPI_COMM_WORLD); MPI_Allreduce(&nslab_x, &smallest_slab, 1, MPI_INT, MPI_MIN, MPI_COMM_WORLD); slabs_per_task = malloc(NTask * sizeof(int)); MPI_Allgather(&nslab_x, 1, MPI_INT, slabs_per_task, 1, MPI_INT, MPI_COMM_WORLD); if(ThisTask == 0) { for(i = 0; i < NTask; i++) printf("Task=%d FFT-Slabs=%d\n", i, slabs_per_task[i]); } first_slab_of_task = malloc(NTask * sizeof(int)); MPI_Allgather(&slabstart_x, 1, MPI_INT, first_slab_of_task, 1, MPI_INT, MPI_COMM_WORLD); meshmin_list = malloc(3 * NTask * sizeof(int)); meshmax_list = malloc(3 * NTask * sizeof(int)); to_slab_fac = PMGRID / All.BoxSize; MPI_Allreduce(&fftsize, &maxfftsize, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD); }