示例#1
0
int
gmx_parallel_3dfft_complex2real(gmx_parallel_3dfft_t    pfft_setup,
                                void *                  data)
{
    int          i,j,k;
    int          nx,ny,nzc;
    int          local_x_start,local_nx;
    int          local_y_start,local_ny;    
    t_complex *  work;
    t_complex *  cdata;
    
    work    = pfft_setup->work;
    cdata   = data;

    nx  = pfft_setup->nx;
    ny  = pfft_setup->ny;
    nzc = pfft_setup->nzc;
    
    gmx_parallel_3dfft_limits(pfft_setup,
                              &local_x_start,
                              &local_nx,
                              &local_y_start,
                              &local_ny);

    
    
    return 0;    
}
示例#2
0
int
gmx_parallel_3dfft(gmx_parallel_3dfft_t    pfft_setup,
                   enum gmx_fft_direction  dir,
                   void *                  in_data,
                   void *                  out_data)
{
    int          i,j,k;
    int          nx,ny,nz,nzc,nzr;
    int          local_x_start,local_nx;
    int          local_y_start,local_ny;    
    t_complex *  work;
    real *       rdata;
    t_complex *  cdata;
    t_complex *  ctmp;
    
    work    = pfft_setup->work;
    
    /* When we do in-place FFTs the data need to be embedded in the z-dimension,
     * so there is room for the complex data. This means the direct space
     * _grid_ (not data) dimensions will be nx*ny*(nzc*2), where nzc=nz/2+1.
     * If we do out-of-place transforms the direct space dimensions are simply
     * nx*ny*nz, and no embedding is used.
     * The complex dimensions are always ny*nx*nzc (note the transpose).
     *
     * The direct space _grid_ dimension is nzr.
     */
    
    nx  = pfft_setup->nx;
    ny  = pfft_setup->ny;
    nz  = pfft_setup->nz;
    nzc = pfft_setup->nzc;
    
    if(in_data == out_data)
    {
        nzr = 2*nzc;
    }
    else
    {
        nzr = nz;
    }

    gmx_parallel_3dfft_limits(pfft_setup,
                              &local_x_start,
                              &local_nx,
                              &local_y_start,
                              &local_ny);

    if(dir == GMX_FFT_REAL_TO_COMPLEX)
    {
        rdata =      (real *)in_data  + local_x_start*ny*nzr;
        cdata = (t_complex *)out_data + local_x_start*ny*nzc;
        
        /* Perform nx local 2D real-to-complex FFTs in the yz slices.
         * When the input data is "embedded" for 3D-in-place transforms, this
         * must also be done in-place to get the data embedding right.
         * 
         * Note that rdata==cdata when we work in-place. 
         */
        for(i=0;i<local_nx;i++)
        {
            gmx_fft_2d_real(pfft_setup->fft_yz,
                            GMX_FFT_REAL_TO_COMPLEX,
                            rdata + i*ny*nzr,
                            cdata + i*ny*nzc);
        }
        
        /* Transpose to temporary work array */
        gmx_parallel_transpose_xy(cdata,
                                  work,
                                  nx,
                                  ny,
                                  pfft_setup->local_slab,
                                  pfft_setup->slab2grid_x,
                                  pfft_setup->slab2grid_y,
                                  nzc,
                                  pfft_setup->nnodes,
                                  pfft_setup->node2slab,
                                  pfft_setup->aav,
                                  pfft_setup->comm);

        /* Transpose from temporary work array in order YXZ to
         * the output array in order YZX. 
         */ 
        /* output cdata changes when nx or ny not divisible by nnodes */
        cdata = (t_complex *)out_data + local_y_start*nx*nzc;
        for(j=0;j<local_ny;j++)
        {
            gmx_fft_transpose_2d(work  + j*nzc*nx,
                                 cdata + j*nzc*nx,
                                 nx,
                                 nzc);
        }

        /* Perform local_ny*nzc complex FFTs along the x dimension */
        for(i=0;i<local_ny*nzc;i++)
        {
            gmx_fft_1d(pfft_setup->fft_x,
                       GMX_FFT_FORWARD,
                       cdata + i*nx,
                       work  + i*nx);
        }    
    
        /* Transpose back from YZX to YXZ. */
        for(j=0;j<local_ny;j++)
        {
            gmx_fft_transpose_2d(work  + j*nzc*nx,
                                 cdata + j*nzc*nx,
                                 nzc,
                                 nx);
        }
    }
    else if(dir == GMX_FFT_COMPLEX_TO_REAL)
    {
        cdata = (t_complex *)in_data  + local_y_start*nx*nzc;
        rdata =      (real *)out_data + local_x_start*ny*nzr;
        
        /* If we are working in-place it doesn't matter that we destroy
         * input data. Otherwise we use an extra temporary workspace array.
         */
        if(in_data == out_data)
        {
            ctmp = cdata;
        }
        else
        {
            ctmp = pfft_setup->work2;
        }
                
        /* Transpose from YXZ to YZX. */
        for(j=0;j<local_ny;j++)
        {
            gmx_fft_transpose_2d(cdata + j*nzc*nx,
                                 work  + j*nzc*nx,
                                 nx,
                                 nzc);
        }
        
        /* Perform local_ny*nzc complex FFTs along the x dimension */
        for(i=0;i<local_ny*nzc;i++)
        {
            gmx_fft_1d(pfft_setup->fft_x,
                       GMX_FFT_BACKWARD,
                       work + i*nx,
                       ctmp + i*nx);
        }    
        
        /* Transpose from YZX to YXZ. */
        for(j=0;j<local_ny;j++)
        {
            gmx_fft_transpose_2d(ctmp + j*nzc*nx,
                                 work + j*nzc*nx,
                                 nzc,
                                 nx);
        }
        
        if(in_data == out_data)
        {
            /* output cdata changes when nx or ny not divisible by nnodes */
           ctmp = (t_complex *)in_data + local_x_start*ny*nzc;
        }
        gmx_parallel_transpose_xy(work,
                                  ctmp,
                                  ny,
                                  nx,
                                  pfft_setup->local_slab,
                                  pfft_setup->slab2grid_y,
                                  pfft_setup->slab2grid_x,
                                  nzc,
                                  pfft_setup->nnodes,
                                  pfft_setup->node2slab,
                                  pfft_setup->aav,
                                  pfft_setup->comm);
        
        
        /* Perform nx local 2D complex-to-real FFTs in the yz slices.
         * The 3D FFT is done in-place, so we need to do this in-place too in order
         * to get the data organization right.
         */
        for(i=0;i<local_nx;i++)
        {
            gmx_fft_2d_real(pfft_setup->fft_yz,
                            GMX_FFT_COMPLEX_TO_REAL,
                            ctmp  + i*ny*nzc,
                            rdata + i*ny*nzr);
        }
    }
    else
    {
        gmx_fatal(FARGS,"Incorrect FFT direction.");
    }
    
    /* Skip the YX backtranspose to save communication! Grid is now YXZ */
    return 0;
}
示例#3
0
t_fftgrid *mk_fftgrid(int          nx,
                      int          ny,
                      int          nz,
                      int          *node2slab,
                      int          *slab2grid_x,
                      t_commrec *  cr,
                      bool         bReproducible)
{
/* parallel runs with non-parallel ffts haven't been tested yet */
    int           nnodes;
    int           x1,y1,maxlocalsize;
    t_fftgrid *   grid;
    int           flags;
    
    nnodes = 1;
#ifdef GMX_MPI
    if (cr && cr->nnodes > 1) {
        MPI_Comm_size(cr->mpi_comm_mygroup,&nnodes);
    }
#endif
    
    snew(grid,1);
    grid->nx   = nx;
    grid->ny   = ny;
    grid->nz   = nz;
    grid->nxyz = nx*ny*nz;
    grid->bParallel = (nnodes > 1);
    
    if (grid->bParallel)
    {
        grid->la2r = (nz/2+1)*2;
    }
    else
    {
        grid->la2r = nz;  
    }
    
    grid->la2c = (nz/2+1);    
    
    grid->la12r = ny*grid->la2r;
    
    if (grid->bParallel)
    {
        grid->la12c = nx*grid->la2c;
    }
    else
    {
        grid->la12c = ny*grid->la2c;
    }
    
    /* This code assumes that the when the grid is not divisble by nnodes,
     * the maximum difference in local grid sizes is 1.
     */
    x1 = (nx % nnodes == 0 ? 0 : 1);
    y1 = (ny % nnodes == 0 ? 0 : 1);
    
    grid->nptr = (nx + x1)*(ny + y1)*grid->la2c*2;
    
    if (grid->bParallel) 
    {
#ifdef GMX_MPI
        gmx_parallel_3dfft_init(&grid->mpi_fft_setup,nx,ny,nz,
                                node2slab,slab2grid_x,cr->mpi_comm_mygroup,
                                bReproducible);
        
        gmx_parallel_3dfft_limits(grid->mpi_fft_setup,
                                  &(grid->pfft.local_x_start),
                                  &(grid->pfft.local_nx),
                                  &(grid->pfft.local_y_start_after_transpose),
                                  &(grid->pfft.local_ny_after_transpose));
#else
        gmx_fatal(FARGS,"Parallel FFT supported with MPI only!");
#endif
    }
    else 
    {
        gmx_fft_init_3d_real(&grid->fft_setup,nx,ny,nz,bReproducible ? GMX_FFT_FLAG_CONSERVATIVE : GMX_FFT_FLAG_NONE);
    }
    grid->ptr = (real *)gmx_alloc_aligned(grid->nptr*sizeof(*(grid->ptr)));
    
#ifdef GMX_MPI
    if (grid->bParallel && debug) 
    {
        print_parfft(debug,"Plan", &grid->pfft);
    }
    if (grid->bParallel)
    {
        maxlocalsize = max((nx/nnodes + x1)*ny*grid->la2c*2,
                           (ny/nnodes + y1)*nx*grid->la2c*2);
        grid->workspace = (real *)
            gmx_alloc_aligned(maxlocalsize*sizeof(*(grid->workspace)));
    }
    else
    {
        grid->workspace =
            gmx_alloc_aligned(grid->nptr*sizeof(*(grid->workspace)));
    }
#else /* no MPI */
    grid->workspace = (real *)gmx_alloc_aligned(grid->nptr*sizeof(*(grid->workspace)));
#endif

    return grid;
}