int main(int argc, char **argv) { int np[3]; ptrdiff_t n[4], N[4]; ptrdiff_t alloc_local; ptrdiff_t local_ni[4], local_i_start[4]; ptrdiff_t local_no[4], local_o_start[4]; double err, *in, *out; pfft_plan plan_forw=NULL, plan_back=NULL; MPI_Comm comm_cart_3d; pfft_r2r_kind kinds_forw[4], kinds_back[4]; /* Set size of FFT and process mesh */ n[0] = 13; n[1] = 14; n[2] = 19; n[3] = 17; np[0] = 2; np[1] = 2; np[2] = 2; /* Set FFTW kinds of 1d R2R trafos */ kinds_forw[0] = PFFT_REDFT00; kinds_back[0] = PFFT_REDFT00; kinds_forw[1] = PFFT_REDFT01; kinds_back[1] = PFFT_REDFT10; kinds_forw[2] = PFFT_RODFT00; kinds_back[2] = PFFT_RODFT00; kinds_forw[3] = PFFT_RODFT10; kinds_back[3] = PFFT_RODFT01; /* Set logical DFT sizes corresponding to FFTW manual: * for REDFT00 N=2*(n-1), for RODFT00 N=2*(n+1), otherwise N=2*n */ N[0] = 2*(n[0]-1); N[1] = 2*n[1]; N[2] = 2*(n[2]+1); N[3] = 2*n[3]; /* Initialize MPI and PFFT */ MPI_Init(&argc, &argv); pfft_init(); /* Create three-dimensional process grid of size np[0] x np[1] x np[2], if possible */ if( pfft_create_procmesh(3, MPI_COMM_WORLD, np, &comm_cart_3d) ){ pfft_fprintf(MPI_COMM_WORLD, stderr, "Error: This test file only works with %d processes.\n", np[0]*np[1]*np[2]); MPI_Finalize(); return 1; } /* Get parameters of data distribution */ alloc_local = pfft_local_size_r2r(4, n, comm_cart_3d, PFFT_TRANSPOSED_NONE, local_ni, local_i_start, local_no, local_o_start); /* Allocate memory */ in = pfft_alloc_real(alloc_local); out = pfft_alloc_real(alloc_local); /* Plan parallel forward FFT */ plan_forw = pfft_plan_r2r( 4, n, in, out, comm_cart_3d, kinds_forw, PFFT_TRANSPOSED_NONE| PFFT_MEASURE| PFFT_DESTROY_INPUT); /* Plan parallel backward FFT */ plan_back = pfft_plan_r2r( 4, n, out, in, comm_cart_3d, kinds_back, PFFT_TRANSPOSED_NONE| PFFT_MEASURE| PFFT_DESTROY_INPUT); /* Initialize input with random numbers */ pfft_init_input_real(4, n, local_ni, local_i_start, in); /* execute parallel forward FFT */ pfft_execute(plan_forw); /* clear the old input */ pfft_clear_input_real(4, n, local_ni, local_i_start, in); /* execute parallel backward FFT */ pfft_execute(plan_back); /* Scale data */ for(ptrdiff_t l=0; l < local_ni[0] * local_ni[1] * local_ni[2] * local_ni[3]; l++) in[l] /= (N[0]*N[1]*N[2]*N[3]); /* Print error of back transformed data */ MPI_Barrier(MPI_COMM_WORLD); err = pfft_check_output_real(4, n, local_ni, local_i_start, in, comm_cart_3d); pfft_printf(comm_cart_3d, "Error after one forward and backward trafo of size n=(%td, %td, %td, %td):\n", n[0], n[1], n[2], n[3]); pfft_printf(comm_cart_3d, "maxerror = %6.2e;\n", err); /* free mem and finalize */ pfft_destroy_plan(plan_forw); pfft_destroy_plan(plan_back); MPI_Comm_free(&comm_cart_3d); pfft_free(in); pfft_free(out); MPI_Finalize(); return 0; }
int main(int argc, char **argv){ ptrdiff_t n[3], gc_below[3], gc_above[3]; ptrdiff_t local_ni[3], local_i_start[3]; ptrdiff_t local_no[3], local_o_start[3]; ptrdiff_t local_ngc[3], local_gc_start[3]; ptrdiff_t alloc_local, alloc_local_gc; int np[3], rnk_self, size, patience, verbose; unsigned pfft_flags=0; double err; MPI_Comm comm_cart_3d; pfft_complex *data; pfft_gcplan ths; MPI_Init(&argc, &argv); pfft_init(); MPI_Comm_rank(MPI_COMM_WORLD, &rnk_self); MPI_Comm_size(MPI_COMM_WORLD, &size); /* default values */ n[0] = n[1] = n[2] = 2; /* n[0] = 3; n[1] = 5; n[2] = 7;*/ np[0]=1; np[1]=1; np[2] = 3; verbose = 1; for(int t=0; t<3; t++){ gc_below[t] = 0; gc_above[t] = 0; } gc_below[0] = 0; gc_above[0] = 2; /* set values by commandline */ init_parameters(argc, argv, n, np, gc_below, gc_above, &patience, &verbose); switch(patience){ case 0: pfft_flags = PFFT_ESTIMATE; break; case 2: pfft_flags = PFFT_PATIENT; break; case 3: pfft_flags = PFFT_EXHAUSTIVE; break; default: pfft_flags = PFFT_MEASURE; } /* Create three-dimensional process grid of size np[0] x np[1] x np[2], if possible */ if( pfft_create_procmesh(3, MPI_COMM_WORLD, np, &comm_cart_3d) ){ pfft_fprintf(MPI_COMM_WORLD, stderr, "Error: This test file only works with %d processes.\n", np[0]*np[1]*np[2]); MPI_Finalize(); return 1; } /* Get parameters of data distribution */ alloc_local = pfft_local_size_dft_3d(n, comm_cart_3d, PFFT_TRANSPOSED_NONE, local_ni, local_i_start, local_no, local_o_start); alloc_local_gc = pfft_local_size_gc_3d( local_ni, local_i_start, alloc_local, gc_below, gc_above, local_ngc, local_gc_start); /* Allocate memory */ data = pfft_alloc_complex(alloc_local_gc); /* Plan parallel ghost cell send */ ths = pfft_plan_cgc_3d(n, gc_below, gc_above, data, comm_cart_3d, PFFT_GC_NONTRANSPOSED); /* Initialize input with random numbers */ pfft_init_input_complex_3d(n, local_ni, local_i_start, data); /* check gcell input */ if(verbose) pfft_apr_complex_3d(data, local_ni, local_i_start, "gcell input", comm_cart_3d); /* Execute parallel ghost cell send */ pfft_exchange(ths); /* check output */ if(verbose) pfft_apr_complex_3d(data, local_ngc, local_gc_start, "exchanged gcells", comm_cart_3d); /* Execute adjoint parallel ghost cell send */ pfft_reduce(ths); /* check input */ if(verbose) pfft_apr_complex_3d(data, local_no, local_o_start, "reduced gcells", comm_cart_3d); /* Scale data */ for(ptrdiff_t l=0; l < local_ni[0] * local_ni[1] * local_ni[2]; l++) data[l] /= 4; /* Print error of back transformed data */ MPI_Barrier(comm_cart_3d); err = pfft_check_output_complex_3d(n, local_ni, local_i_start, data, comm_cart_3d); pfft_printf(comm_cart_3d, "Error after one forward and backward trafo of size n=(%td, %td, %td):\n", n[0], n[1], n[2]); pfft_printf(comm_cart_3d, "maxerror = %6.2e;\n", err); /* free mem and finalize */ pfft_destroy_gcplan(ths); MPI_Comm_free(&comm_cart_3d); pfft_free(data); MPI_Finalize(); return 0; }
int main(int argc, char **argv) { int np[3]; ptrdiff_t n[4]; ptrdiff_t alloc_local; ptrdiff_t local_ni[4], local_i_start[4]; ptrdiff_t local_no[4], local_o_start[4]; double err; pfft_complex *in, *out; pfft_plan plan_forw=NULL, plan_back=NULL; MPI_Comm comm_cart_3d; /* Set size of FFT and process mesh */ n[0] = 13; n[1] = 14; n[2] = 19; n[3] = 17; np[0] = 2; np[1] = 2; np[2] = 2; /* Initialize MPI and PFFT */ MPI_Init(&argc, &argv); pfft_init(); /* Create three-dimensional process grid of size np[0] x np[1] x np[2], if possible */ if( pfft_create_procmesh(3, MPI_COMM_WORLD, np, &comm_cart_3d) ){ pfft_fprintf(MPI_COMM_WORLD, stderr, "Error: This test file only works with %d processes.\n", np[0]*np[1]*np[2]); MPI_Finalize(); return 1; } /* Get parameters of data distribution */ alloc_local = pfft_local_size_dft(4, n, comm_cart_3d, PFFT_TRANSPOSED_NONE, local_ni, local_i_start, local_no, local_o_start); /* Allocate memory */ in = pfft_alloc_complex(alloc_local); out = pfft_alloc_complex(alloc_local); /* Plan parallel forward FFT */ plan_forw = pfft_plan_dft( 4, n, in, out, comm_cart_3d, PFFT_FORWARD, PFFT_TRANSPOSED_NONE| PFFT_MEASURE| PFFT_DESTROY_INPUT); /* Plan parallel backward FFT */ plan_back = pfft_plan_dft( 4, n, out, in, comm_cart_3d, PFFT_BACKWARD, PFFT_TRANSPOSED_NONE| PFFT_MEASURE| PFFT_DESTROY_INPUT); /* Initialize input with random numbers */ pfft_init_input_complex(4, n, local_ni, local_i_start, in); /* execute parallel forward FFT */ pfft_execute(plan_forw); /* clear the old input */ pfft_clear_input_complex(4, n, local_ni, local_i_start, in); /* execute parallel backward FFT */ pfft_execute(plan_back); /* Scale data */ for(ptrdiff_t l=0; l < local_ni[0] * local_ni[1] * local_ni[2] * local_ni[3]; l++) in[l] /= (n[0]*n[1]*n[2]*n[3]); /* Print error of back transformed data */ err = pfft_check_output_complex(4, n, local_ni, local_i_start, in, comm_cart_3d); pfft_printf(comm_cart_3d, "Error after one forward and backward trafo of size n=(%td, %td, %td, %td):\n", n[0], n[1], n[2], n[3]); pfft_printf(comm_cart_3d, "maxerror = %6.2e;\n", err); /* free mem and finalize */ pfft_destroy_plan(plan_forw); pfft_destroy_plan(plan_back); MPI_Comm_free(&comm_cart_3d); pfft_free(in); pfft_free(out); MPI_Finalize(); return 0; }
void pm_init(PM * pm, PMInit * init, MPI_Comm comm) { pm->init = *init; pm->mem = _libfastpm_get_gmem(); /* initialize the domain */ MPI_Comm_rank(comm, &pm->ThisTask); MPI_Comm_size(comm, &pm->NTask); int Ny = init->NprocY; int Nx; if(Ny <= 0) { Ny = 1; Nx = pm->NTask; if(!init->use_fftw) { for(; Ny * Ny < pm->NTask; Ny ++) continue; for(; Ny >= 1; Ny--) { if (pm->NTask % Ny == 0) break; continue; } } } else { if(pm->NTask % Ny != 0) { fastpm_raise(-1, "NprocY(%d) and NTask(%d) is incompatible\n", Ny, pm->NTask); } } Nx = pm->NTask / Ny; pm->Nproc[0] = Nx; pm->Nproc[1] = Ny; if(init->use_fftw) { if(Ny != 1) { fastpm_raise(-1, "FFTW requires Ny == 1; Ny = %d\n", Ny); } } int d; pm->Norm = 1.0; pm->Volume = 1.0; for(d = 0; d < 3; d ++) { pm->Nmesh[d] = init->Nmesh; pm->BoxSize[d] = init->BoxSize; pm->Below[d] = 0; pm->Above[d] = 1; pm->CellSize[d] = pm->BoxSize[d] / pm->Nmesh[d]; pm->InvCellSize[d] = 1.0 / pm->CellSize[d]; pm->Norm *= pm->Nmesh[d]; pm->Volume *= pm->BoxSize[d]; } pfft_create_procmesh(2, comm, pm->Nproc, &pm->Comm2D); if(init->use_fftw) { pm->allocsize = 2 * fftw_local_size_dft_r2c( 3, pm->Nmesh, pm->Comm2D, (pm->init.transposed?FFTW_MPI_TRANSPOSED_OUT:0), pm->IRegion.size, pm->IRegion.start, pm->ORegion.size, pm->ORegion.start); } else { pm->allocsize = 2 * pfft_local_size_dft_r2c( 3, pm->Nmesh, pm->Comm2D, (pm->init.transposed?PFFT_TRANSPOSED_OUT:0) | PFFT_PADDED_R2C, pm->IRegion.size, pm->IRegion.start, pm->ORegion.size, pm->ORegion.start); } /* Note that we need to fix up the padded size of the real data; * and transpose with strides , */ pm->IRegion.strides[2] = 1; pm->IRegion.strides[1] = pm->IRegion.size[2]; pm->IRegion.strides[0] = pm->IRegion.size[1] * pm->IRegion.strides[1]; pm->IRegion.total = pm->IRegion.size[0] * pm->IRegion.strides[0]; /* remove padding from the view */ pm->IRegion.size[2] = pm->Nmesh[2]; if(pm->init.transposed) { if(pm->init.use_fftw) { /* FFTW transposed, y, x, z */ pm->ORegion.strides[2] = 1; pm->ORegion.strides[0] = pm->ORegion.size[2]; pm->ORegion.strides[1] = pm->ORegion.size[0] * pm->ORegion.strides[0]; pm->ORegion.total = pm->ORegion.size[1] * pm->ORegion.strides[1]; } else { /* PFFT transposed, y, z, x */ pm->ORegion.strides[0] = 1; pm->ORegion.strides[2] = pm->ORegion.size[0]; pm->ORegion.strides[1] = pm->ORegion.size[2] * pm->ORegion.strides[2]; pm->ORegion.total = pm->ORegion.size[1] * pm->ORegion.strides[1]; } } else { /* non-transposed */ pm->ORegion.strides[2] = 1; pm->ORegion.strides[1] = pm->ORegion.size[2]; pm->ORegion.strides[0] = pm->ORegion.size[1] * pm->ORegion.strides[1]; pm->ORegion.total = pm->ORegion.size[0] * pm->ORegion.strides[0]; } for(d = 0; d < 2; d ++) { MPI_Comm projected; int remain_dims[2] = {0, 0}; remain_dims[d] = 1; pm->Grid.edges_int[d] = malloc(sizeof(pm->Grid.edges_int[0][0]) * (pm->Nproc[d] + 1)); pm->Grid.edges_float[d] = malloc(sizeof(pm->Grid.edges_float[0][0]) * (pm->Nproc[d] + 1)); pm->Grid.MeshtoCart[d] = malloc(sizeof(int) * pm->Nmesh[d]); MPI_Cart_sub(pm->Comm2D, remain_dims, &projected); MPI_Allgather(&pm->IRegion.start[d], 1, MPI_PTRDIFF, pm->Grid.edges_int[d], 1, MPI_PTRDIFF, projected); int ntask; MPI_Comm_size(projected, &ntask); MPI_Comm_free(&projected); int j; for(j = 0; j < pm->Nproc[d]; j ++) { pm->Grid.edges_float[d][j] = 1.0 * pm->Grid.edges_int[d][j] / pm->Nmesh[d] * pm->BoxSize[d]; } /* Last edge is at the edge of the box */ pm->Grid.edges_float[d][j] = pm->BoxSize[d]; pm->Grid.edges_int[d][j] = pm->Nmesh[d]; /* fill in the look up table */ for(j = 0; j < pm->Nproc[d]; j ++) { int i; for(i = pm->Grid.edges_int[d][j]; i < pm->Grid.edges_int[d][j+1]; i ++) { pm->Grid.MeshtoCart[d][i] = j; } } } FastPMFloat * canvas = pm_alloc(pm); FastPMFloat * workspace = pm_alloc(pm); if(pm->init.use_fftw) { pm->r2c = plan_dft_r2c_fftw( 3, pm->Nmesh, (void*) workspace, (void*) canvas, pm->Comm2D, (pm->init.transposed?FFTW_MPI_TRANSPOSED_OUT:0) | FFTW_ESTIMATE | FFTW_DESTROY_INPUT ); pm->c2r = plan_dft_c2r_fftw( 3, pm->Nmesh, (void*) canvas, (void*) canvas, pm->Comm2D, (pm->init.transposed?FFTW_MPI_TRANSPOSED_IN:0) | FFTW_ESTIMATE | FFTW_DESTROY_INPUT ); } else { pm->r2c = plan_dft_r2c( 3, pm->Nmesh, (void*) workspace, (void*) canvas, pm->Comm2D, PFFT_FORWARD, (pm->init.transposed?PFFT_TRANSPOSED_OUT:0) | PFFT_PADDED_R2C | PFFT_ESTIMATE | PFFT_TUNE //| PFFT_MEASURE | PFFT_DESTROY_INPUT ); pm->c2r = plan_dft_c2r( 3, pm->Nmesh, (void*) workspace, (void*) workspace, pm->Comm2D, PFFT_BACKWARD, (pm->init.transposed?PFFT_TRANSPOSED_IN:0) | PFFT_PADDED_C2R | PFFT_ESTIMATE //| PFFT_MEASURE | PFFT_TUNE | PFFT_DESTROY_INPUT ); } pm_free(pm, workspace); pm_free(pm, canvas); for(d = 0; d < 3; d++) { pm->MeshtoK[d] = malloc(pm->Nmesh[d] * sizeof(double)); int i; for(i = 0; i < pm->Nmesh[d]; i++) { int ii = i; if(ii >= pm->Nmesh[d] / 2) { ii -= pm->Nmesh[d]; } pm->MeshtoK[d][i] = ii * 2 * M_PI / pm->BoxSize[d]; } } }