int gmx_parallel_3dfft(gmx_parallel_3dfft_t pfft_setup, enum gmx_fft_direction dir, void * in_data, void * out_data) { int i,j,k; int nx,ny,nz,nzc,nzr; int local_x_start,local_nx; int local_y_start,local_ny; t_complex * work; real * rdata; t_complex * cdata; t_complex * ctmp; work = pfft_setup->work; /* When we do in-place FFTs the data need to be embedded in the z-dimension, * so there is room for the complex data. This means the direct space * _grid_ (not data) dimensions will be nx*ny*(nzc*2), where nzc=nz/2+1. * If we do out-of-place transforms the direct space dimensions are simply * nx*ny*nz, and no embedding is used. * The complex dimensions are always ny*nx*nzc (note the transpose). * * The direct space _grid_ dimension is nzr. */ nx = pfft_setup->nx; ny = pfft_setup->ny; nz = pfft_setup->nz; nzc = pfft_setup->nzc; if(in_data == out_data) { nzr = 2*nzc; } else { nzr = nz; } gmx_parallel_3dfft_limits(pfft_setup, &local_x_start, &local_nx, &local_y_start, &local_ny); if(dir == GMX_FFT_REAL_TO_COMPLEX) { rdata = (real *)in_data + local_x_start*ny*nzr; cdata = (t_complex *)out_data + local_x_start*ny*nzc; /* Perform nx local 2D real-to-complex FFTs in the yz slices. * When the input data is "embedded" for 3D-in-place transforms, this * must also be done in-place to get the data embedding right. * * Note that rdata==cdata when we work in-place. */ for(i=0;i<local_nx;i++) { gmx_fft_2d_real(pfft_setup->fft_yz, GMX_FFT_REAL_TO_COMPLEX, rdata + i*ny*nzr, cdata + i*ny*nzc); } /* Transpose to temporary work array */ gmx_parallel_transpose_xy(cdata, work, nx, ny, pfft_setup->local_slab, pfft_setup->slab2grid_x, pfft_setup->slab2grid_y, nzc, pfft_setup->nnodes, pfft_setup->node2slab, pfft_setup->aav, pfft_setup->comm); /* Transpose from temporary work array in order YXZ to * the output array in order YZX. */ /* output cdata changes when nx or ny not divisible by nnodes */ cdata = (t_complex *)out_data + local_y_start*nx*nzc; for(j=0;j<local_ny;j++) { gmx_fft_transpose_2d(work + j*nzc*nx, cdata + j*nzc*nx, nx, nzc); } /* Perform local_ny*nzc complex FFTs along the x dimension */ for(i=0;i<local_ny*nzc;i++) { gmx_fft_1d(pfft_setup->fft_x, GMX_FFT_FORWARD, cdata + i*nx, work + i*nx); } /* Transpose back from YZX to YXZ. */ for(j=0;j<local_ny;j++) { gmx_fft_transpose_2d(work + j*nzc*nx, cdata + j*nzc*nx, nzc, nx); } } else if(dir == GMX_FFT_COMPLEX_TO_REAL) { cdata = (t_complex *)in_data + local_y_start*nx*nzc; rdata = (real *)out_data + local_x_start*ny*nzr; /* If we are working in-place it doesn't matter that we destroy * input data. Otherwise we use an extra temporary workspace array. */ if(in_data == out_data) { ctmp = cdata; } else { ctmp = pfft_setup->work2; } /* Transpose from YXZ to YZX. */ for(j=0;j<local_ny;j++) { gmx_fft_transpose_2d(cdata + j*nzc*nx, work + j*nzc*nx, nx, nzc); } /* Perform local_ny*nzc complex FFTs along the x dimension */ for(i=0;i<local_ny*nzc;i++) { gmx_fft_1d(pfft_setup->fft_x, GMX_FFT_BACKWARD, work + i*nx, ctmp + i*nx); } /* Transpose from YZX to YXZ. */ for(j=0;j<local_ny;j++) { gmx_fft_transpose_2d(ctmp + j*nzc*nx, work + j*nzc*nx, nzc, nx); } if(in_data == out_data) { /* output cdata changes when nx or ny not divisible by nnodes */ ctmp = (t_complex *)in_data + local_x_start*ny*nzc; } gmx_parallel_transpose_xy(work, ctmp, ny, nx, pfft_setup->local_slab, pfft_setup->slab2grid_y, pfft_setup->slab2grid_x, nzc, pfft_setup->nnodes, pfft_setup->node2slab, pfft_setup->aav, pfft_setup->comm); /* Perform nx local 2D complex-to-real FFTs in the yz slices. * The 3D FFT is done in-place, so we need to do this in-place too in order * to get the data organization right. */ for(i=0;i<local_nx;i++) { gmx_fft_2d_real(pfft_setup->fft_yz, GMX_FFT_COMPLEX_TO_REAL, ctmp + i*ny*nzc, rdata + i*ny*nzr); } } else { gmx_fatal(FARGS,"Incorrect FFT direction."); } /* Skip the YX backtranspose to save communication! Grid is now YXZ */ return 0; }
int gmx_fft_2d_real (gmx_fft_t fft, enum gmx_fft_direction dir, void * in_data, void * out_data) { int i, j, nx, ny, nyc; t_complex * data; real * work; real * p1; real * p2; nx = fft->n; ny = fft->next->n; /* Number of complex elements in y direction */ nyc = (ny/2+1); work = fft->work+4*nx; if (dir == GMX_FFT_REAL_TO_COMPLEX) { /* If we are doing an in-place transform the 2D array is already * properly padded by the user, and we are all set. * * For out-of-place there is no array padding, but FFTPACK only * does in-place FFTs internally, so we need to start by copying * data from the input to the padded (larger) output array. */ if (in_data != out_data) { p1 = (real *)in_data; p2 = (real *)out_data; for (i = 0; i < nx; i++) { for (j = 0; j < ny; j++) { p2[i*nyc*2+j] = p1[i*ny+j]; } } } data = static_cast<t_complex *>(out_data); /* y real-to-complex FFTs */ for (i = 0; i < nx; i++) { gmx_fft_1d_real(fft->next, GMX_FFT_REAL_TO_COMPLEX, data+i*nyc, data+i*nyc); } /* Transform to get X data in place */ gmx_fft_transpose_2d(data, data, nx, nyc); /* Complex-to-complex X FFTs */ for (i = 0; i < nyc; i++) { gmx_fft_1d(fft, GMX_FFT_FORWARD, data+i*nx, data+i*nx); } /* Transpose back */ gmx_fft_transpose_2d(data, data, nyc, nx); } else if (dir == GMX_FFT_COMPLEX_TO_REAL) { /* An in-place complex-to-real transform is straightforward, * since the output array must be large enough for the padding to fit. * * For out-of-place complex-to-real transforms we cannot just copy * data to the output array, since it is smaller than the input. * In this case there's nothing to do but employing temporary work data, * starting at work+4*nx and using nx*nyc*2 elements. */ if (in_data != out_data) { memcpy(work, in_data, sizeof(t_complex)*nx*nyc); data = reinterpret_cast<t_complex *>(work); } else { /* in-place */ data = reinterpret_cast<t_complex *>(out_data); } /* Transpose to get X arrays */ gmx_fft_transpose_2d(data, data, nx, nyc); /* Do X iFFTs */ for (i = 0; i < nyc; i++) { gmx_fft_1d(fft, GMX_FFT_BACKWARD, data+i*nx, data+i*nx); } /* Transpose to get Y arrays */ gmx_fft_transpose_2d(data, data, nyc, nx); /* Do Y iFFTs */ for (i = 0; i < nx; i++) { gmx_fft_1d_real(fft->next, GMX_FFT_COMPLEX_TO_REAL, data+i*nyc, data+i*nyc); } if (in_data != out_data) { /* Output (pointed to by data) is now in padded format. * Pack it into out_data if we were doing an out-of-place transform. */ p1 = (real *)data; p2 = (real *)out_data; for (i = 0; i < nx; i++) { for (j = 0; j < ny; j++) { p2[i*ny+j] = p1[i*nyc*2+j]; } } } } else { gmx_fatal(FARGS, "FFT plan mismatch - bad plan or direction."); return EINVAL; } return 0; }