int gmx_parallel_3dfft_complex2real(gmx_parallel_3dfft_t pfft_setup, void * data) { int i,j,k; int nx,ny,nzc; int local_x_start,local_nx; int local_y_start,local_ny; t_complex * work; t_complex * cdata; work = pfft_setup->work; cdata = data; nx = pfft_setup->nx; ny = pfft_setup->ny; nzc = pfft_setup->nzc; gmx_parallel_3dfft_limits(pfft_setup, &local_x_start, &local_nx, &local_y_start, &local_ny); return 0; }
int gmx_parallel_3dfft(gmx_parallel_3dfft_t pfft_setup, enum gmx_fft_direction dir, void * in_data, void * out_data) { int i,j,k; int nx,ny,nz,nzc,nzr; int local_x_start,local_nx; int local_y_start,local_ny; t_complex * work; real * rdata; t_complex * cdata; t_complex * ctmp; work = pfft_setup->work; /* When we do in-place FFTs the data need to be embedded in the z-dimension, * so there is room for the complex data. This means the direct space * _grid_ (not data) dimensions will be nx*ny*(nzc*2), where nzc=nz/2+1. * If we do out-of-place transforms the direct space dimensions are simply * nx*ny*nz, and no embedding is used. * The complex dimensions are always ny*nx*nzc (note the transpose). * * The direct space _grid_ dimension is nzr. */ nx = pfft_setup->nx; ny = pfft_setup->ny; nz = pfft_setup->nz; nzc = pfft_setup->nzc; if(in_data == out_data) { nzr = 2*nzc; } else { nzr = nz; } gmx_parallel_3dfft_limits(pfft_setup, &local_x_start, &local_nx, &local_y_start, &local_ny); if(dir == GMX_FFT_REAL_TO_COMPLEX) { rdata = (real *)in_data + local_x_start*ny*nzr; cdata = (t_complex *)out_data + local_x_start*ny*nzc; /* Perform nx local 2D real-to-complex FFTs in the yz slices. * When the input data is "embedded" for 3D-in-place transforms, this * must also be done in-place to get the data embedding right. * * Note that rdata==cdata when we work in-place. */ for(i=0;i<local_nx;i++) { gmx_fft_2d_real(pfft_setup->fft_yz, GMX_FFT_REAL_TO_COMPLEX, rdata + i*ny*nzr, cdata + i*ny*nzc); } /* Transpose to temporary work array */ gmx_parallel_transpose_xy(cdata, work, nx, ny, pfft_setup->local_slab, pfft_setup->slab2grid_x, pfft_setup->slab2grid_y, nzc, pfft_setup->nnodes, pfft_setup->node2slab, pfft_setup->aav, pfft_setup->comm); /* Transpose from temporary work array in order YXZ to * the output array in order YZX. */ /* output cdata changes when nx or ny not divisible by nnodes */ cdata = (t_complex *)out_data + local_y_start*nx*nzc; for(j=0;j<local_ny;j++) { gmx_fft_transpose_2d(work + j*nzc*nx, cdata + j*nzc*nx, nx, nzc); } /* Perform local_ny*nzc complex FFTs along the x dimension */ for(i=0;i<local_ny*nzc;i++) { gmx_fft_1d(pfft_setup->fft_x, GMX_FFT_FORWARD, cdata + i*nx, work + i*nx); } /* Transpose back from YZX to YXZ. */ for(j=0;j<local_ny;j++) { gmx_fft_transpose_2d(work + j*nzc*nx, cdata + j*nzc*nx, nzc, nx); } } else if(dir == GMX_FFT_COMPLEX_TO_REAL) { cdata = (t_complex *)in_data + local_y_start*nx*nzc; rdata = (real *)out_data + local_x_start*ny*nzr; /* If we are working in-place it doesn't matter that we destroy * input data. Otherwise we use an extra temporary workspace array. */ if(in_data == out_data) { ctmp = cdata; } else { ctmp = pfft_setup->work2; } /* Transpose from YXZ to YZX. */ for(j=0;j<local_ny;j++) { gmx_fft_transpose_2d(cdata + j*nzc*nx, work + j*nzc*nx, nx, nzc); } /* Perform local_ny*nzc complex FFTs along the x dimension */ for(i=0;i<local_ny*nzc;i++) { gmx_fft_1d(pfft_setup->fft_x, GMX_FFT_BACKWARD, work + i*nx, ctmp + i*nx); } /* Transpose from YZX to YXZ. */ for(j=0;j<local_ny;j++) { gmx_fft_transpose_2d(ctmp + j*nzc*nx, work + j*nzc*nx, nzc, nx); } if(in_data == out_data) { /* output cdata changes when nx or ny not divisible by nnodes */ ctmp = (t_complex *)in_data + local_x_start*ny*nzc; } gmx_parallel_transpose_xy(work, ctmp, ny, nx, pfft_setup->local_slab, pfft_setup->slab2grid_y, pfft_setup->slab2grid_x, nzc, pfft_setup->nnodes, pfft_setup->node2slab, pfft_setup->aav, pfft_setup->comm); /* Perform nx local 2D complex-to-real FFTs in the yz slices. * The 3D FFT is done in-place, so we need to do this in-place too in order * to get the data organization right. */ for(i=0;i<local_nx;i++) { gmx_fft_2d_real(pfft_setup->fft_yz, GMX_FFT_COMPLEX_TO_REAL, ctmp + i*ny*nzc, rdata + i*ny*nzr); } } else { gmx_fatal(FARGS,"Incorrect FFT direction."); } /* Skip the YX backtranspose to save communication! Grid is now YXZ */ return 0; }
t_fftgrid *mk_fftgrid(int nx, int ny, int nz, int *node2slab, int *slab2grid_x, t_commrec * cr, bool bReproducible) { /* parallel runs with non-parallel ffts haven't been tested yet */ int nnodes; int x1,y1,maxlocalsize; t_fftgrid * grid; int flags; nnodes = 1; #ifdef GMX_MPI if (cr && cr->nnodes > 1) { MPI_Comm_size(cr->mpi_comm_mygroup,&nnodes); } #endif snew(grid,1); grid->nx = nx; grid->ny = ny; grid->nz = nz; grid->nxyz = nx*ny*nz; grid->bParallel = (nnodes > 1); if (grid->bParallel) { grid->la2r = (nz/2+1)*2; } else { grid->la2r = nz; } grid->la2c = (nz/2+1); grid->la12r = ny*grid->la2r; if (grid->bParallel) { grid->la12c = nx*grid->la2c; } else { grid->la12c = ny*grid->la2c; } /* This code assumes that the when the grid is not divisble by nnodes, * the maximum difference in local grid sizes is 1. */ x1 = (nx % nnodes == 0 ? 0 : 1); y1 = (ny % nnodes == 0 ? 0 : 1); grid->nptr = (nx + x1)*(ny + y1)*grid->la2c*2; if (grid->bParallel) { #ifdef GMX_MPI gmx_parallel_3dfft_init(&grid->mpi_fft_setup,nx,ny,nz, node2slab,slab2grid_x,cr->mpi_comm_mygroup, bReproducible); gmx_parallel_3dfft_limits(grid->mpi_fft_setup, &(grid->pfft.local_x_start), &(grid->pfft.local_nx), &(grid->pfft.local_y_start_after_transpose), &(grid->pfft.local_ny_after_transpose)); #else gmx_fatal(FARGS,"Parallel FFT supported with MPI only!"); #endif } else { gmx_fft_init_3d_real(&grid->fft_setup,nx,ny,nz,bReproducible ? GMX_FFT_FLAG_CONSERVATIVE : GMX_FFT_FLAG_NONE); } grid->ptr = (real *)gmx_alloc_aligned(grid->nptr*sizeof(*(grid->ptr))); #ifdef GMX_MPI if (grid->bParallel && debug) { print_parfft(debug,"Plan", &grid->pfft); } if (grid->bParallel) { maxlocalsize = max((nx/nnodes + x1)*ny*grid->la2c*2, (ny/nnodes + y1)*nx*grid->la2c*2); grid->workspace = (real *) gmx_alloc_aligned(maxlocalsize*sizeof(*(grid->workspace))); } else { grid->workspace = gmx_alloc_aligned(grid->nptr*sizeof(*(grid->workspace))); } #else /* no MPI */ grid->workspace = (real *)gmx_alloc_aligned(grid->nptr*sizeof(*(grid->workspace))); #endif return grid; }