Ejemplo n.º 1
0
int
gmx_parallel_3dfft(gmx_parallel_3dfft_t    pfft_setup,
                   enum gmx_fft_direction  dir,
                   void *                  in_data,
                   void *                  out_data)
{
    int          i,j,k;
    int          nx,ny,nz,nzc,nzr;
    int          local_x_start,local_nx;
    int          local_y_start,local_ny;    
    t_complex *  work;
    real *       rdata;
    t_complex *  cdata;
    t_complex *  ctmp;
    
    work    = pfft_setup->work;
    
    /* When we do in-place FFTs the data need to be embedded in the z-dimension,
     * so there is room for the complex data. This means the direct space
     * _grid_ (not data) dimensions will be nx*ny*(nzc*2), where nzc=nz/2+1.
     * If we do out-of-place transforms the direct space dimensions are simply
     * nx*ny*nz, and no embedding is used.
     * The complex dimensions are always ny*nx*nzc (note the transpose).
     *
     * The direct space _grid_ dimension is nzr.
     */
    
    nx  = pfft_setup->nx;
    ny  = pfft_setup->ny;
    nz  = pfft_setup->nz;
    nzc = pfft_setup->nzc;
    
    if(in_data == out_data)
    {
        nzr = 2*nzc;
    }
    else
    {
        nzr = nz;
    }

    gmx_parallel_3dfft_limits(pfft_setup,
                              &local_x_start,
                              &local_nx,
                              &local_y_start,
                              &local_ny);

    if(dir == GMX_FFT_REAL_TO_COMPLEX)
    {
        rdata =      (real *)in_data  + local_x_start*ny*nzr;
        cdata = (t_complex *)out_data + local_x_start*ny*nzc;
        
        /* Perform nx local 2D real-to-complex FFTs in the yz slices.
         * When the input data is "embedded" for 3D-in-place transforms, this
         * must also be done in-place to get the data embedding right.
         * 
         * Note that rdata==cdata when we work in-place. 
         */
        for(i=0;i<local_nx;i++)
        {
            gmx_fft_2d_real(pfft_setup->fft_yz,
                            GMX_FFT_REAL_TO_COMPLEX,
                            rdata + i*ny*nzr,
                            cdata + i*ny*nzc);
        }
        
        /* Transpose to temporary work array */
        gmx_parallel_transpose_xy(cdata,
                                  work,
                                  nx,
                                  ny,
                                  pfft_setup->local_slab,
                                  pfft_setup->slab2grid_x,
                                  pfft_setup->slab2grid_y,
                                  nzc,
                                  pfft_setup->nnodes,
                                  pfft_setup->node2slab,
                                  pfft_setup->aav,
                                  pfft_setup->comm);

        /* Transpose from temporary work array in order YXZ to
         * the output array in order YZX. 
         */ 
        /* output cdata changes when nx or ny not divisible by nnodes */
        cdata = (t_complex *)out_data + local_y_start*nx*nzc;
        for(j=0;j<local_ny;j++)
        {
            gmx_fft_transpose_2d(work  + j*nzc*nx,
                                 cdata + j*nzc*nx,
                                 nx,
                                 nzc);
        }

        /* Perform local_ny*nzc complex FFTs along the x dimension */
        for(i=0;i<local_ny*nzc;i++)
        {
            gmx_fft_1d(pfft_setup->fft_x,
                       GMX_FFT_FORWARD,
                       cdata + i*nx,
                       work  + i*nx);
        }    
    
        /* Transpose back from YZX to YXZ. */
        for(j=0;j<local_ny;j++)
        {
            gmx_fft_transpose_2d(work  + j*nzc*nx,
                                 cdata + j*nzc*nx,
                                 nzc,
                                 nx);
        }
    }
    else if(dir == GMX_FFT_COMPLEX_TO_REAL)
    {
        cdata = (t_complex *)in_data  + local_y_start*nx*nzc;
        rdata =      (real *)out_data + local_x_start*ny*nzr;
        
        /* If we are working in-place it doesn't matter that we destroy
         * input data. Otherwise we use an extra temporary workspace array.
         */
        if(in_data == out_data)
        {
            ctmp = cdata;
        }
        else
        {
            ctmp = pfft_setup->work2;
        }
                
        /* Transpose from YXZ to YZX. */
        for(j=0;j<local_ny;j++)
        {
            gmx_fft_transpose_2d(cdata + j*nzc*nx,
                                 work  + j*nzc*nx,
                                 nx,
                                 nzc);
        }
        
        /* Perform local_ny*nzc complex FFTs along the x dimension */
        for(i=0;i<local_ny*nzc;i++)
        {
            gmx_fft_1d(pfft_setup->fft_x,
                       GMX_FFT_BACKWARD,
                       work + i*nx,
                       ctmp + i*nx);
        }    
        
        /* Transpose from YZX to YXZ. */
        for(j=0;j<local_ny;j++)
        {
            gmx_fft_transpose_2d(ctmp + j*nzc*nx,
                                 work + j*nzc*nx,
                                 nzc,
                                 nx);
        }
        
        if(in_data == out_data)
        {
            /* output cdata changes when nx or ny not divisible by nnodes */
           ctmp = (t_complex *)in_data + local_x_start*ny*nzc;
        }
        gmx_parallel_transpose_xy(work,
                                  ctmp,
                                  ny,
                                  nx,
                                  pfft_setup->local_slab,
                                  pfft_setup->slab2grid_y,
                                  pfft_setup->slab2grid_x,
                                  nzc,
                                  pfft_setup->nnodes,
                                  pfft_setup->node2slab,
                                  pfft_setup->aav,
                                  pfft_setup->comm);
        
        
        /* Perform nx local 2D complex-to-real FFTs in the yz slices.
         * The 3D FFT is done in-place, so we need to do this in-place too in order
         * to get the data organization right.
         */
        for(i=0;i<local_nx;i++)
        {
            gmx_fft_2d_real(pfft_setup->fft_yz,
                            GMX_FFT_COMPLEX_TO_REAL,
                            ctmp  + i*ny*nzc,
                            rdata + i*ny*nzr);
        }
    }
    else
    {
        gmx_fatal(FARGS,"Incorrect FFT direction.");
    }
    
    /* Skip the YX backtranspose to save communication! Grid is now YXZ */
    return 0;
}
Ejemplo n.º 2
0
int
gmx_fft_2d_real          (gmx_fft_t                  fft,
                          enum gmx_fft_direction     dir,
                          void *                     in_data,
                          void *                     out_data)
{
    int                i, j, nx, ny, nyc;
    t_complex     *    data;
    real       *       work;
    real       *       p1;
    real       *       p2;

    nx = fft->n;
    ny = fft->next->n;
    /* Number of complex elements in y direction */
    nyc = (ny/2+1);

    work = fft->work+4*nx;

    if (dir == GMX_FFT_REAL_TO_COMPLEX)
    {
        /* If we are doing an in-place transform the 2D array is already
         * properly padded by the user, and we are all set.
         *
         * For out-of-place there is no array padding, but FFTPACK only
         * does in-place FFTs internally, so we need to start by copying
         * data from the input to the padded (larger) output array.
         */
        if (in_data != out_data)
        {
            p1 = (real *)in_data;
            p2 = (real *)out_data;

            for (i = 0; i < nx; i++)
            {
                for (j = 0; j < ny; j++)
                {
                    p2[i*nyc*2+j] = p1[i*ny+j];
                }
            }
        }
        data = static_cast<t_complex *>(out_data);

        /* y real-to-complex FFTs */
        for (i = 0; i < nx; i++)
        {
            gmx_fft_1d_real(fft->next, GMX_FFT_REAL_TO_COMPLEX, data+i*nyc, data+i*nyc);
        }

        /* Transform to get X data in place */
        gmx_fft_transpose_2d(data, data, nx, nyc);

        /* Complex-to-complex X FFTs */
        for (i = 0; i < nyc; i++)
        {
            gmx_fft_1d(fft, GMX_FFT_FORWARD, data+i*nx, data+i*nx);
        }

        /* Transpose back */
        gmx_fft_transpose_2d(data, data, nyc, nx);

    }
    else if (dir == GMX_FFT_COMPLEX_TO_REAL)
    {
        /* An in-place complex-to-real transform is straightforward,
         * since the output array must be large enough for the padding to fit.
         *
         * For out-of-place complex-to-real transforms we cannot just copy
         * data to the output array, since it is smaller than the input.
         * In this case there's nothing to do but employing temporary work data,
         * starting at work+4*nx and using nx*nyc*2 elements.
         */
        if (in_data != out_data)
        {
            memcpy(work, in_data, sizeof(t_complex)*nx*nyc);
            data = reinterpret_cast<t_complex *>(work);
        }
        else
        {
            /* in-place */
            data = reinterpret_cast<t_complex *>(out_data);
        }

        /* Transpose to get X arrays */
        gmx_fft_transpose_2d(data, data, nx, nyc);

        /* Do X iFFTs */
        for (i = 0; i < nyc; i++)
        {
            gmx_fft_1d(fft, GMX_FFT_BACKWARD, data+i*nx, data+i*nx);
        }

        /* Transpose to get Y arrays */
        gmx_fft_transpose_2d(data, data, nyc, nx);

        /* Do Y iFFTs */
        for (i = 0; i < nx; i++)
        {
            gmx_fft_1d_real(fft->next, GMX_FFT_COMPLEX_TO_REAL, data+i*nyc, data+i*nyc);
        }

        if (in_data != out_data)
        {
            /* Output (pointed to by data) is now in padded format.
             * Pack it into out_data if we were doing an out-of-place transform.
             */
            p1 = (real *)data;
            p2 = (real *)out_data;

            for (i = 0; i < nx; i++)
            {
                for (j = 0; j < ny; j++)
                {
                    p2[i*ny+j] = p1[i*nyc*2+j];
                }
            }
        }
    }
    else
    {
        gmx_fatal(FARGS, "FFT plan mismatch - bad plan or direction.");
        return EINVAL;
    }

    return 0;
}