//check eriks mail for one thing left to do int main(int argc,char** argv) { int prank,i,j,k; MPI_Init(&argc,&argv); MPI_Comm_rank(MPI_COMM_WORLD,&prank); debug=stderr; if (argc!=6) { if (prank==0) printf("Usage: gmx_test_3dfft N M K P1 P2\n"); return 1; } gmx_parallel_3dfft_t pfft_setup; ivec ndata = {atoi(argv[1]),atoi(argv[2]),atoi(argv[3])}; real * real_data; t_complex * complex_data; MPI_Comm comm[2]; ivec local_ndata; ivec local_offset; ivec local_size; int P[]= {atoi(argv[4]),atoi(argv[5])}; real* compare; split_communicator(MPI_COMM_WORLD,comm,P); gmx_parallel_3dfft_init (&pfft_setup, ndata, &real_data, &complex_data, comm, 0, 0); //last two: slab2index, bReprodusible gmx_parallel_3dfft_real_limits(pfft_setup,local_ndata,local_offset,local_size); snew(compare,ndata[0]*ndata[1]*ndata[2]); srand(time(0)+prank); if (debug) { fprintf(debug,"local_ndata: %d %d %d\n",local_ndata[0],local_ndata[1],local_ndata[2]); fprintf(debug,"local_size: %d %d %d\n",local_size[0],local_size[1],local_size[2]); fprintf(debug,"local_offset: %d %d %d\n",local_offset[0],local_offset[1],local_offset[2]); } for (i=0; i<local_ndata[0]; i++) { for (j=0; j<local_ndata[1]; j++) { for (k=0; k<local_ndata[2]; k++) { compare[i*local_ndata[1]*local_ndata[2]+j*local_ndata[2]+k]=real_data[i*local_size[1]*local_size[2]+j*local_size[2]+k]=((real)rand())/RAND_MAX; } } } gmx_parallel_3dfft_execute(pfft_setup,GMX_FFT_REAL_TO_COMPLEX,0,0); gmx_parallel_3dfft_execute(pfft_setup,GMX_FFT_COMPLEX_TO_REAL,0,0); for (i=0; i<local_ndata[0]; i++) { for (j=0; j<local_ndata[1]; j++) { for (k=0; k<local_ndata[2]; k++) { if (fabs(compare[i*local_ndata[1]*local_ndata[2]+j*local_ndata[2]+k] - real_data[i*local_size[1]*local_size[2]+j*local_size[2]+k]/(ndata[0]*ndata[1]*ndata[2]))>2*ndata[0]*ndata[1]*ndata[2]*GMX_REAL_EPS) { printf("error: %d %d %d: %f %f\n",i,j,k,compare[i*local_ndata[1]*local_ndata[2]+j*local_ndata[2]+k], real_data[i*local_size[1]*local_size[2]+j*local_size[2]+k]/(ndata[0]*ndata[1]*ndata[2])); } } } } gmx_parallel_3dfft_destroy(pfft_setup); MPI_Finalize(); return 0; }
t_fftgrid *mk_fftgrid(int nx, int ny, int nz, int *node2slab, int *slab2grid_x, t_commrec * cr, bool bReproducible) { /* parallel runs with non-parallel ffts haven't been tested yet */ int nnodes; int x1,y1,maxlocalsize; t_fftgrid * grid; int flags; nnodes = 1; #ifdef GMX_MPI if (cr && cr->nnodes > 1) { MPI_Comm_size(cr->mpi_comm_mygroup,&nnodes); } #endif snew(grid,1); grid->nx = nx; grid->ny = ny; grid->nz = nz; grid->nxyz = nx*ny*nz; grid->bParallel = (nnodes > 1); if (grid->bParallel) { grid->la2r = (nz/2+1)*2; } else { grid->la2r = nz; } grid->la2c = (nz/2+1); grid->la12r = ny*grid->la2r; if (grid->bParallel) { grid->la12c = nx*grid->la2c; } else { grid->la12c = ny*grid->la2c; } /* This code assumes that the when the grid is not divisble by nnodes, * the maximum difference in local grid sizes is 1. */ x1 = (nx % nnodes == 0 ? 0 : 1); y1 = (ny % nnodes == 0 ? 0 : 1); grid->nptr = (nx + x1)*(ny + y1)*grid->la2c*2; if (grid->bParallel) { #ifdef GMX_MPI gmx_parallel_3dfft_init(&grid->mpi_fft_setup,nx,ny,nz, node2slab,slab2grid_x,cr->mpi_comm_mygroup, bReproducible); gmx_parallel_3dfft_limits(grid->mpi_fft_setup, &(grid->pfft.local_x_start), &(grid->pfft.local_nx), &(grid->pfft.local_y_start_after_transpose), &(grid->pfft.local_ny_after_transpose)); #else gmx_fatal(FARGS,"Parallel FFT supported with MPI only!"); #endif } else { gmx_fft_init_3d_real(&grid->fft_setup,nx,ny,nz,bReproducible ? GMX_FFT_FLAG_CONSERVATIVE : GMX_FFT_FLAG_NONE); } grid->ptr = (real *)gmx_alloc_aligned(grid->nptr*sizeof(*(grid->ptr))); #ifdef GMX_MPI if (grid->bParallel && debug) { print_parfft(debug,"Plan", &grid->pfft); } if (grid->bParallel) { maxlocalsize = max((nx/nnodes + x1)*ny*grid->la2c*2, (ny/nnodes + y1)*nx*grid->la2c*2); grid->workspace = (real *) gmx_alloc_aligned(maxlocalsize*sizeof(*(grid->workspace))); } else { grid->workspace = gmx_alloc_aligned(grid->nptr*sizeof(*(grid->workspace))); } #else /* no MPI */ grid->workspace = (real *)gmx_alloc_aligned(grid->nptr*sizeof(*(grid->workspace))); #endif return grid; }