int copy_fftgrid_to_pmegrid(struct gmx_pme_t *pme, const real *fftgrid, real *pmegrid, int grid_index, int nthread, int thread) { ivec local_fft_ndata, local_fft_offset, local_fft_size; ivec local_pme_size; int ixy0, ixy1, ixy, ix, iy, iz; int pmeidx, fftidx; #ifdef PME_TIME_THREADS gmx_cycles_t c1; static double cs1 = 0; static int cnt = 0; #endif #ifdef PME_TIME_THREADS c1 = omp_cyc_start(); #endif /* Dimensions should be identical for A/B grid, so we just use A here */ gmx_parallel_3dfft_real_limits(pme->pfft_setup[grid_index], local_fft_ndata, local_fft_offset, local_fft_size); local_pme_size[0] = pme->pmegrid_nx; local_pme_size[1] = pme->pmegrid_ny; local_pme_size[2] = pme->pmegrid_nz; /* The fftgrid is always 'justified' to the lower-left corner of the PME grid, the offset is identical, and the PME grid always has more data (due to overlap) */ ixy0 = ((thread )*local_fft_ndata[XX]*local_fft_ndata[YY])/nthread; ixy1 = ((thread+1)*local_fft_ndata[XX]*local_fft_ndata[YY])/nthread; for (ixy = ixy0; ixy < ixy1; ixy++) { ix = ixy/local_fft_ndata[YY]; iy = ixy - ix*local_fft_ndata[YY]; pmeidx = (ix*local_pme_size[YY] + iy)*local_pme_size[ZZ]; fftidx = (ix*local_fft_size[YY] + iy)*local_fft_size[ZZ]; for (iz = 0; iz < local_fft_ndata[ZZ]; iz++) { pmegrid[pmeidx+iz] = fftgrid[fftidx+iz]; } } #ifdef PME_TIME_THREADS c1 = omp_cyc_end(c1); cs1 += (double)c1; cnt++; if (cnt % 20 == 0) { printf("copy %.2f\n", cs1*1e-9); } #endif return 0; }
//check eriks mail for one thing left to do int main(int argc,char** argv) { int prank,i,j,k; MPI_Init(&argc,&argv); MPI_Comm_rank(MPI_COMM_WORLD,&prank); debug=stderr; if (argc!=6) { if (prank==0) printf("Usage: gmx_test_3dfft N M K P1 P2\n"); return 1; } gmx_parallel_3dfft_t pfft_setup; ivec ndata = {atoi(argv[1]),atoi(argv[2]),atoi(argv[3])}; real * real_data; t_complex * complex_data; MPI_Comm comm[2]; ivec local_ndata; ivec local_offset; ivec local_size; int P[]= {atoi(argv[4]),atoi(argv[5])}; real* compare; split_communicator(MPI_COMM_WORLD,comm,P); gmx_parallel_3dfft_init (&pfft_setup, ndata, &real_data, &complex_data, comm, 0, 0); //last two: slab2index, bReprodusible gmx_parallel_3dfft_real_limits(pfft_setup,local_ndata,local_offset,local_size); snew(compare,ndata[0]*ndata[1]*ndata[2]); srand(time(0)+prank); if (debug) { fprintf(debug,"local_ndata: %d %d %d\n",local_ndata[0],local_ndata[1],local_ndata[2]); fprintf(debug,"local_size: %d %d %d\n",local_size[0],local_size[1],local_size[2]); fprintf(debug,"local_offset: %d %d %d\n",local_offset[0],local_offset[1],local_offset[2]); } for (i=0; i<local_ndata[0]; i++) { for (j=0; j<local_ndata[1]; j++) { for (k=0; k<local_ndata[2]; k++) { compare[i*local_ndata[1]*local_ndata[2]+j*local_ndata[2]+k]=real_data[i*local_size[1]*local_size[2]+j*local_size[2]+k]=((real)rand())/RAND_MAX; } } } gmx_parallel_3dfft_execute(pfft_setup,GMX_FFT_REAL_TO_COMPLEX,0,0); gmx_parallel_3dfft_execute(pfft_setup,GMX_FFT_COMPLEX_TO_REAL,0,0); for (i=0; i<local_ndata[0]; i++) { for (j=0; j<local_ndata[1]; j++) { for (k=0; k<local_ndata[2]; k++) { if (fabs(compare[i*local_ndata[1]*local_ndata[2]+j*local_ndata[2]+k] - real_data[i*local_size[1]*local_size[2]+j*local_size[2]+k]/(ndata[0]*ndata[1]*ndata[2]))>2*ndata[0]*ndata[1]*ndata[2]*GMX_REAL_EPS) { printf("error: %d %d %d: %f %f\n",i,j,k,compare[i*local_ndata[1]*local_ndata[2]+j*local_ndata[2]+k], real_data[i*local_size[1]*local_size[2]+j*local_size[2]+k]/(ndata[0]*ndata[1]*ndata[2])); } } } } gmx_parallel_3dfft_destroy(pfft_setup); MPI_Finalize(); return 0; }
/* This function is called from gmx_pme_do() only from debugging code that is commented out. */ void dump_local_fftgrid(struct gmx_pme_t *pme, const real *fftgrid) { ivec local_fft_ndata, local_fft_offset, local_fft_size; gmx_parallel_3dfft_real_limits(pme->pfft_setup[PME_GRID_QA], local_fft_ndata, local_fft_offset, local_fft_size); dump_grid(stderr, pme->pmegrid_start_ix, pme->pmegrid_start_iy, pme->pmegrid_start_iz, pme->pmegrid_nx-pme->pme_order+1, pme->pmegrid_ny-pme->pme_order+1, pme->pmegrid_nz-pme->pme_order+1, local_fft_size[YY], local_fft_size[ZZ], fftgrid); }
int copy_pmegrid_to_fftgrid(struct gmx_pme_t *pme, real *pmegrid, real *fftgrid, int grid_index) { ivec local_fft_ndata, local_fft_offset, local_fft_size; ivec local_pme_size; int ix, iy, iz; int pmeidx, fftidx; /* Dimensions should be identical for A/B grid, so we just use A here */ gmx_parallel_3dfft_real_limits(pme->pfft_setup[grid_index], local_fft_ndata, local_fft_offset, local_fft_size); local_pme_size[0] = pme->pmegrid_nx; local_pme_size[1] = pme->pmegrid_ny; local_pme_size[2] = pme->pmegrid_nz; /* The fftgrid is always 'justified' to the lower-left corner of the PME grid, the offset is identical, and the PME grid always has more data (due to overlap) */ { #ifdef DEBUG_PME FILE *fp, *fp2; char fn[STRLEN]; real val; sprintf(fn, "pmegrid%d.pdb", pme->nodeid); fp = gmx_ffopen(fn, "w"); sprintf(fn, "pmegrid%d.txt", pme->nodeid); fp2 = gmx_ffopen(fn, "w"); #endif for (ix = 0; ix < local_fft_ndata[XX]; ix++) { for (iy = 0; iy < local_fft_ndata[YY]; iy++) { for (iz = 0; iz < local_fft_ndata[ZZ]; iz++) { pmeidx = ix*(local_pme_size[YY]*local_pme_size[ZZ])+iy*(local_pme_size[ZZ])+iz; fftidx = ix*(local_fft_size[YY]*local_fft_size[ZZ])+iy*(local_fft_size[ZZ])+iz; fftgrid[fftidx] = pmegrid[pmeidx]; #ifdef DEBUG_PME val = 100*pmegrid[pmeidx]; if (pmegrid[pmeidx] != 0) { gmx_fprintf_pdb_atomline(fp, epdbATOM, pmeidx, "CA", ' ', "GLY", ' ', pmeidx, ' ', 5.0*ix, 5.0*iy, 5.0*iz, 1.0, val, ""); } if (pmegrid[pmeidx] != 0) { fprintf(fp2, "%-12s %5d %5d %5d %12.5e\n", "qgrid", pme->pmegrid_start_ix + ix, pme->pmegrid_start_iy + iy, pme->pmegrid_start_iz + iz, pmegrid[pmeidx]); } #endif } } } #ifdef DEBUG_PME gmx_ffclose(fp); gmx_ffclose(fp2); #endif } return 0; }