int main(int argc, char **argv) { int np[2]; ptrdiff_t n[4]; ptrdiff_t alloc_local; ptrdiff_t local_ni[4], local_i_start[4]; ptrdiff_t local_no[4], local_o_start[4]; double err, *in; pfft_complex *out; pfft_plan plan_forw=NULL, plan_back=NULL; MPI_Comm comm_cart_2d; /* Set size of FFT and process mesh */ n[0] = 13; n[1] = 14; n[2] = 19; n[3] = 17; np[0] = 2; np[1] = 2; /* Initialize MPI and PFFT */ MPI_Init(&argc, &argv); pfft_init(); /* Create two-dimensional process grid of size np[0] x np[1], if possible */ if( pfft_create_procmesh_2d(MPI_COMM_WORLD, np[0], np[1], &comm_cart_2d) ){ pfft_fprintf(MPI_COMM_WORLD, stderr, "Error: This test file only works with %d processes.\n", np[0]*np[1]); MPI_Finalize(); return 1; } /* Get parameters of data distribution */ alloc_local = pfft_local_size_dft_r2c(4, n, comm_cart_2d, PFFT_TRANSPOSED_OUT, local_ni, local_i_start, local_no, local_o_start); /* Allocate memory */ in = pfft_alloc_real(2 * alloc_local); out = pfft_alloc_complex(alloc_local); /* Plan parallel forward FFT */ plan_forw = pfft_plan_dft_r2c( 4, n, in, out, comm_cart_2d, PFFT_FORWARD, PFFT_TRANSPOSED_OUT| PFFT_MEASURE| PFFT_DESTROY_INPUT); /* Plan parallel backward FFT */ plan_back = pfft_plan_dft_c2r( 4, n, out, in, comm_cart_2d, PFFT_BACKWARD, PFFT_TRANSPOSED_IN| PFFT_MEASURE| PFFT_DESTROY_INPUT); /* Initialize input with random numbers */ pfft_init_input_real(4, n, local_ni, local_i_start, in); /* execute parallel forward FFT */ pfft_execute(plan_forw); /* execute parallel backward FFT */ pfft_execute(plan_back); /* Scale data */ for(ptrdiff_t l=0; l < local_ni[0] * local_ni[1] * local_ni[2] * local_ni[3]; l++) in[l] /= (n[0]*n[1]*n[2]*n[3]); /* Print error of back transformed data */ MPI_Barrier(MPI_COMM_WORLD); err = pfft_check_output_real(4, n, local_ni, local_i_start, in, comm_cart_2d); pfft_printf(comm_cart_2d, "Error after one forward and backward trafo of size n=(%td, %td, %td, %td):\n", n[0], n[1], n[2], n[3]); pfft_printf(comm_cart_2d, "maxerror = %6.2e;\n", err); /* free mem and finalize */ pfft_destroy_plan(plan_forw); pfft_destroy_plan(plan_back); MPI_Comm_free(&comm_cart_2d); pfft_free(in); pfft_free(out); MPI_Finalize(); return 0; }
void pm_init(PM * pm, PMInit * init, MPI_Comm comm) { pm->init = *init; pm->mem = _libfastpm_get_gmem(); /* initialize the domain */ MPI_Comm_rank(comm, &pm->ThisTask); MPI_Comm_size(comm, &pm->NTask); int Ny = init->NprocY; int Nx; if(Ny <= 0) { Ny = 1; Nx = pm->NTask; if(!init->use_fftw) { for(; Ny * Ny < pm->NTask; Ny ++) continue; for(; Ny >= 1; Ny--) { if (pm->NTask % Ny == 0) break; continue; } } } else { if(pm->NTask % Ny != 0) { fastpm_raise(-1, "NprocY(%d) and NTask(%d) is incompatible\n", Ny, pm->NTask); } } Nx = pm->NTask / Ny; pm->Nproc[0] = Nx; pm->Nproc[1] = Ny; if(init->use_fftw) { if(Ny != 1) { fastpm_raise(-1, "FFTW requires Ny == 1; Ny = %d\n", Ny); } } int d; pm->Norm = 1.0; pm->Volume = 1.0; for(d = 0; d < 3; d ++) { pm->Nmesh[d] = init->Nmesh; pm->BoxSize[d] = init->BoxSize; pm->Below[d] = 0; pm->Above[d] = 1; pm->CellSize[d] = pm->BoxSize[d] / pm->Nmesh[d]; pm->InvCellSize[d] = 1.0 / pm->CellSize[d]; pm->Norm *= pm->Nmesh[d]; pm->Volume *= pm->BoxSize[d]; } pfft_create_procmesh(2, comm, pm->Nproc, &pm->Comm2D); if(init->use_fftw) { pm->allocsize = 2 * fftw_local_size_dft_r2c( 3, pm->Nmesh, pm->Comm2D, (pm->init.transposed?FFTW_MPI_TRANSPOSED_OUT:0), pm->IRegion.size, pm->IRegion.start, pm->ORegion.size, pm->ORegion.start); } else { pm->allocsize = 2 * pfft_local_size_dft_r2c( 3, pm->Nmesh, pm->Comm2D, (pm->init.transposed?PFFT_TRANSPOSED_OUT:0) | PFFT_PADDED_R2C, pm->IRegion.size, pm->IRegion.start, pm->ORegion.size, pm->ORegion.start); } /* Note that we need to fix up the padded size of the real data; * and transpose with strides , */ pm->IRegion.strides[2] = 1; pm->IRegion.strides[1] = pm->IRegion.size[2]; pm->IRegion.strides[0] = pm->IRegion.size[1] * pm->IRegion.strides[1]; pm->IRegion.total = pm->IRegion.size[0] * pm->IRegion.strides[0]; /* remove padding from the view */ pm->IRegion.size[2] = pm->Nmesh[2]; if(pm->init.transposed) { if(pm->init.use_fftw) { /* FFTW transposed, y, x, z */ pm->ORegion.strides[2] = 1; pm->ORegion.strides[0] = pm->ORegion.size[2]; pm->ORegion.strides[1] = pm->ORegion.size[0] * pm->ORegion.strides[0]; pm->ORegion.total = pm->ORegion.size[1] * pm->ORegion.strides[1]; } else { /* PFFT transposed, y, z, x */ pm->ORegion.strides[0] = 1; pm->ORegion.strides[2] = pm->ORegion.size[0]; pm->ORegion.strides[1] = pm->ORegion.size[2] * pm->ORegion.strides[2]; pm->ORegion.total = pm->ORegion.size[1] * pm->ORegion.strides[1]; } } else { /* non-transposed */ pm->ORegion.strides[2] = 1; pm->ORegion.strides[1] = pm->ORegion.size[2]; pm->ORegion.strides[0] = pm->ORegion.size[1] * pm->ORegion.strides[1]; pm->ORegion.total = pm->ORegion.size[0] * pm->ORegion.strides[0]; } for(d = 0; d < 2; d ++) { MPI_Comm projected; int remain_dims[2] = {0, 0}; remain_dims[d] = 1; pm->Grid.edges_int[d] = malloc(sizeof(pm->Grid.edges_int[0][0]) * (pm->Nproc[d] + 1)); pm->Grid.edges_float[d] = malloc(sizeof(pm->Grid.edges_float[0][0]) * (pm->Nproc[d] + 1)); pm->Grid.MeshtoCart[d] = malloc(sizeof(int) * pm->Nmesh[d]); MPI_Cart_sub(pm->Comm2D, remain_dims, &projected); MPI_Allgather(&pm->IRegion.start[d], 1, MPI_PTRDIFF, pm->Grid.edges_int[d], 1, MPI_PTRDIFF, projected); int ntask; MPI_Comm_size(projected, &ntask); MPI_Comm_free(&projected); int j; for(j = 0; j < pm->Nproc[d]; j ++) { pm->Grid.edges_float[d][j] = 1.0 * pm->Grid.edges_int[d][j] / pm->Nmesh[d] * pm->BoxSize[d]; } /* Last edge is at the edge of the box */ pm->Grid.edges_float[d][j] = pm->BoxSize[d]; pm->Grid.edges_int[d][j] = pm->Nmesh[d]; /* fill in the look up table */ for(j = 0; j < pm->Nproc[d]; j ++) { int i; for(i = pm->Grid.edges_int[d][j]; i < pm->Grid.edges_int[d][j+1]; i ++) { pm->Grid.MeshtoCart[d][i] = j; } } } FastPMFloat * canvas = pm_alloc(pm); FastPMFloat * workspace = pm_alloc(pm); if(pm->init.use_fftw) { pm->r2c = plan_dft_r2c_fftw( 3, pm->Nmesh, (void*) workspace, (void*) canvas, pm->Comm2D, (pm->init.transposed?FFTW_MPI_TRANSPOSED_OUT:0) | FFTW_ESTIMATE | FFTW_DESTROY_INPUT ); pm->c2r = plan_dft_c2r_fftw( 3, pm->Nmesh, (void*) canvas, (void*) canvas, pm->Comm2D, (pm->init.transposed?FFTW_MPI_TRANSPOSED_IN:0) | FFTW_ESTIMATE | FFTW_DESTROY_INPUT ); } else { pm->r2c = plan_dft_r2c( 3, pm->Nmesh, (void*) workspace, (void*) canvas, pm->Comm2D, PFFT_FORWARD, (pm->init.transposed?PFFT_TRANSPOSED_OUT:0) | PFFT_PADDED_R2C | PFFT_ESTIMATE | PFFT_TUNE //| PFFT_MEASURE | PFFT_DESTROY_INPUT ); pm->c2r = plan_dft_c2r( 3, pm->Nmesh, (void*) workspace, (void*) workspace, pm->Comm2D, PFFT_BACKWARD, (pm->init.transposed?PFFT_TRANSPOSED_IN:0) | PFFT_PADDED_C2R | PFFT_ESTIMATE //| PFFT_MEASURE | PFFT_TUNE | PFFT_DESTROY_INPUT ); } pm_free(pm, workspace); pm_free(pm, canvas); for(d = 0; d < 3; d++) { pm->MeshtoK[d] = malloc(pm->Nmesh[d] * sizeof(double)); int i; for(i = 0; i < pm->Nmesh[d]; i++) { int ii = i; if(ii >= pm->Nmesh[d] / 2) { ii -= pm->Nmesh[d]; } pm->MeshtoK[d][i] = ii * 2 * M_PI / pm->BoxSize[d]; } } }