/*! Calculates the long-range non-periodic potential using the PM method. The * potential is Gaussian filtered with Asmth, given in mesh-cell units. We * carry out a CIC charge assignment, and compute the potenial by Fourier * transform methods. The CIC kernel is deconvolved. */ int pmpotential_nonperiodic(int grnr) { double dx, dy, dz; double fac, to_slab_fac; double re, im, pot; int i, j, slab, level, sendTask, recvTask, flag, flagsum; int x, y, z, ip; int slab_x, slab_y, slab_z; int slab_xx, slab_yy, slab_zz; int meshmin[3], meshmax[3], sendmin, sendmax, recvmin, recvmax; int dimx, dimy, dimz, recv_dimx, recv_dimy, recv_dimz; MPI_Status status; if(ThisTask == 0) printf("Starting non-periodic PM-potential calculation.\n"); fac = All.G / pow(All.TotalMeshSize[grnr], 4) * pow(All.TotalMeshSize[grnr] / GRID, 3); /* to get potential */ to_slab_fac = GRID / All.TotalMeshSize[grnr]; /* first, establish the extension of the local patch in GRID (for binning) */ for(j = 0; j < 3; j++) { meshmin[j] = GRID; meshmax[j] = 0; } for(i = 0, flag = 0; i < NumPart; i++) { if (P[i].Type == 0 && P[i].ID < 0) /*SINK*/ continue; #ifdef PLACEHIGHRESREGION if(grnr == 0 || (grnr == 1 && P[i].Type == 0 && P[i].Mass*1.0e10/All.HubbleParam < All.RefinementMass || grnr == 1 && P[i].Type == 1 && P[i].Mass*1.0e10/All.HubbleParam < (All.Omega0/All.OmegaBaryon-1.0)*All.RefinementMass)) #endif { for(j = 0; j < 3; j++) { if(P[i].Pos[j] < All.Xmintot[grnr][j] || P[i].Pos[j] > All.Xmaxtot[grnr][j]) { if(flag == 0) { printf ("Particle Id=%d on task=%d with coordinates (%g|%g|%g) lies outside PM mesh.\nStopping\n", (int)P[i].ID, ThisTask, P[i].Pos[0], P[i].Pos[1], P[i].Pos[2]); fflush(stdout); } flag++; break; } } } if(flag > 0) continue; if(P[i].Pos[0] >= All.Corner[grnr][0] && P[i].Pos[0] < All.UpperCorner[grnr][0]) if(P[i].Pos[1] >= All.Corner[grnr][1] && P[i].Pos[1] < All.UpperCorner[grnr][1]) if(P[i].Pos[2] >= All.Corner[grnr][2] && P[i].Pos[2] < All.UpperCorner[grnr][2]) { for(j = 0; j < 3; j++) { slab = to_slab_fac * (P[i].Pos[j] - All.Corner[grnr][j]); if(slab < meshmin[j]) meshmin[j] = slab; if(slab > meshmax[j]) meshmax[j] = slab; } } } MPI_Allreduce(&flag, &flagsum, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); if(flagsum > 0) { if(ThisTask == 0) { printf("In total %d particles were outside allowed range.\n", flagsum); fflush(stdout); } return 1; /* error - need to return because particle were outside allowed range */ } MPI_Allgather(meshmin, 3, MPI_INT, meshmin_list, 3, MPI_INT, MPI_COMM_WORLD); MPI_Allgather(meshmax, 3, MPI_INT, meshmax_list, 3, MPI_INT, MPI_COMM_WORLD); dimx = meshmax[0] - meshmin[0] + 2; dimy = meshmax[1] - meshmin[1] + 2; dimz = meshmax[2] - meshmin[2] + 2; force_treefree(); pm_init_nonperiodic_allocate((dimx + 4) * (dimy + 4) * (dimz + 4)); for(i = 0; i < dimx * dimy * dimz; i++) workspace[i] = 0; for(i = 0; i < NumPart; i++) { if (P[i].Type == 0 && P[i].ID < 0) /*SINK*/ continue; if(P[i].Pos[0] < All.Corner[grnr][0] || P[i].Pos[0] >= All.UpperCorner[grnr][0]) continue; if(P[i].Pos[1] < All.Corner[grnr][1] || P[i].Pos[1] >= All.UpperCorner[grnr][1]) continue; if(P[i].Pos[2] < All.Corner[grnr][2] || P[i].Pos[2] >= All.UpperCorner[grnr][2]) continue; slab_x = to_slab_fac * (P[i].Pos[0] - All.Corner[grnr][0]); dx = to_slab_fac * (P[i].Pos[0] - All.Corner[grnr][0]) - slab_x; slab_x -= meshmin[0]; slab_xx = slab_x + 1; slab_y = to_slab_fac * (P[i].Pos[1] - All.Corner[grnr][1]); dy = to_slab_fac * (P[i].Pos[1] - All.Corner[grnr][1]) - slab_y; slab_y -= meshmin[1]; slab_yy = slab_y + 1; slab_z = to_slab_fac * (P[i].Pos[2] - All.Corner[grnr][2]); dz = to_slab_fac * (P[i].Pos[2] - All.Corner[grnr][2]) - slab_z; slab_z -= meshmin[2]; slab_zz = slab_z + 1; workspace[(slab_x * dimy + slab_y) * dimz + slab_z] += P[i].Mass * (1.0 - dx) * (1.0 - dy) * (1.0 - dz); workspace[(slab_x * dimy + slab_yy) * dimz + slab_z] += P[i].Mass * (1.0 - dx) * dy * (1.0 - dz); workspace[(slab_x * dimy + slab_y) * dimz + slab_zz] += P[i].Mass * (1.0 - dx) * (1.0 - dy) * dz; workspace[(slab_x * dimy + slab_yy) * dimz + slab_zz] += P[i].Mass * (1.0 - dx) * dy * dz; workspace[(slab_xx * dimy + slab_y) * dimz + slab_z] += P[i].Mass * (dx) * (1.0 - dy) * (1.0 - dz); workspace[(slab_xx * dimy + slab_yy) * dimz + slab_z] += P[i].Mass * (dx) * dy * (1.0 - dz); workspace[(slab_xx * dimy + slab_y) * dimz + slab_zz] += P[i].Mass * (dx) * (1.0 - dy) * dz; workspace[(slab_xx * dimy + slab_yy) * dimz + slab_zz] += P[i].Mass * (dx) * dy * dz; } for(i = 0; i < fftsize; i++) /* clear local density field */ rhogrid[i] = 0; for(level = 0; level < (1 << PTask); level++) /* note: for level=0, target is the same task */ { sendTask = ThisTask; recvTask = ThisTask ^ level; if(recvTask < NTask) { /* check how much we have to send */ sendmin = 2 * GRID; sendmax = -1; for(slab_x = meshmin[0]; slab_x < meshmax[0] + 2; slab_x++) if(slab_to_task[slab_x] == recvTask) { if(slab_x < sendmin) sendmin = slab_x; if(slab_x > sendmax) sendmax = slab_x; } if(sendmax == -1) sendmin = 0; /* check how much we have to receive */ recvmin = 2 * GRID; recvmax = -1; for(slab_x = meshmin_list[3 * recvTask]; slab_x < meshmax_list[3 * recvTask] + 2; slab_x++) if(slab_to_task[slab_x] == sendTask) { if(slab_x < recvmin) recvmin = slab_x; if(slab_x > recvmax) recvmax = slab_x; } if(recvmax == -1) recvmin = 0; if((recvmax - recvmin) >= 0 || (sendmax - sendmin) >= 0) /* ok, we have a contribution to the slab */ { recv_dimx = meshmax_list[3 * recvTask + 0] - meshmin_list[3 * recvTask + 0] + 2; recv_dimy = meshmax_list[3 * recvTask + 1] - meshmin_list[3 * recvTask + 1] + 2; recv_dimz = meshmax_list[3 * recvTask + 2] - meshmin_list[3 * recvTask + 2] + 2; if(level > 0) { MPI_Sendrecv(workspace + (sendmin - meshmin[0]) * dimy * dimz, (sendmax - sendmin + 1) * dimy * dimz * sizeof(fftw_real), MPI_BYTE, recvTask, TAG_NONPERIOD_C, forcegrid, (recvmax - recvmin + 1) * recv_dimy * recv_dimz * sizeof(fftw_real), MPI_BYTE, recvTask, TAG_NONPERIOD_C, MPI_COMM_WORLD, &status); } else { memcpy(forcegrid, workspace + (sendmin - meshmin[0]) * dimy * dimz, (sendmax - sendmin + 1) * dimy * dimz * sizeof(fftw_real)); } for(slab_x = recvmin; slab_x <= recvmax; slab_x++) { slab_xx = slab_x - first_slab_of_task[ThisTask]; if(slab_xx >= 0 && slab_xx < slabs_per_task[ThisTask]) { for(slab_y = meshmin_list[3 * recvTask + 1]; slab_y <= meshmax_list[3 * recvTask + 1] + 1; slab_y++) { slab_yy = slab_y; for(slab_z = meshmin_list[3 * recvTask + 2]; slab_z <= meshmax_list[3 * recvTask + 2] + 1; slab_z++) { slab_zz = slab_z; rhogrid[GRID * GRID2 * slab_xx + GRID2 * slab_yy + slab_zz] += forcegrid[((slab_x - recvmin) * recv_dimy + (slab_y - meshmin_list[3 * recvTask + 1])) * recv_dimz + (slab_z - meshmin_list[3 * recvTask + 2])]; } } } } } } } /* Do the FFT of the density field */ rfftwnd_mpi(fft_forward_plan, 1, rhogrid, workspace, FFTW_TRANSPOSED_ORDER); /* multiply with the Fourier transform of the Green's function (kernel) */ for(y = 0; y < nslab_y; y++) for(x = 0; x < GRID; x++) for(z = 0; z < GRID / 2 + 1; z++) { ip = GRID * (GRID / 2 + 1) * y + (GRID / 2 + 1) * x + z; re = fft_of_rhogrid[ip].re * fft_of_kernel[grnr][ip].re - fft_of_rhogrid[ip].im * fft_of_kernel[grnr][ip].im; im = fft_of_rhogrid[ip].re * fft_of_kernel[grnr][ip].im + fft_of_rhogrid[ip].im * fft_of_kernel[grnr][ip].re; fft_of_rhogrid[ip].re = fac * re; fft_of_rhogrid[ip].im = fac * im; } /* get the potential by inverse FFT */ rfftwnd_mpi(fft_inverse_plan, 1, rhogrid, workspace, FFTW_TRANSPOSED_ORDER); /* Now rhogrid holds the potential */ /* construct the potential for the local patch */ /* if we have a high-res mesh, establish the extension of the local patch in GRID (for reading out the * forces) */ #ifdef PLACEHIGHRESREGION if(grnr == 1) { for(j = 0; j < 3; j++) { meshmin[j] = GRID; meshmax[j] = 0; } for(i = 0; i < NumPart; i++) { if (P[i].Type == 0 && P[i].ID < 0) /*SINK*/ continue; if(!(P[i].Type == 0 && P[i].Mass*1.0e10/All.HubbleParam < All.RefinementMass || P[i].Type == 1 && P[i].Mass*1.0e10/All.HubbleParam < (All.Omega0/All.OmegaBaryon-1.0)*All.RefinementMass)) continue; if(P[i].Pos[0] >= All.Corner[grnr][0] && P[i].Pos[0] < All.UpperCorner[grnr][0]) if(P[i].Pos[1] >= All.Corner[grnr][1] && P[i].Pos[1] < All.UpperCorner[grnr][1]) if(P[i].Pos[2] >= All.Corner[grnr][2] && P[i].Pos[2] < All.UpperCorner[grnr][2]) { for(j = 0; j < 3; j++) { slab = to_slab_fac * (P[i].Pos[j] - All.Corner[grnr][j]); if(slab < meshmin[j]) meshmin[j] = slab; if(slab > meshmax[j]) meshmax[j] = slab; } } } MPI_Allgather(meshmin, 3, MPI_INT, meshmin_list, 3, MPI_INT, MPI_COMM_WORLD); MPI_Allgather(meshmax, 3, MPI_INT, meshmax_list, 3, MPI_INT, MPI_COMM_WORLD); } #endif dimx = meshmax[0] - meshmin[0] + 6; dimy = meshmax[1] - meshmin[1] + 6; dimz = meshmax[2] - meshmin[2] + 6; for(j = 0; j < 3; j++) { if(meshmin[j] < 2) endrun(131231); if(meshmax[j] > GRID / 2 - 3) endrun(131288); } for(level = 0; level < (1 << PTask); level++) /* note: for level=0, target is the same task */ { sendTask = ThisTask; recvTask = ThisTask ^ level; if(recvTask < NTask) { /* check how much we have to send */ sendmin = 2 * GRID; sendmax = -GRID; for(slab_x = meshmin_list[3 * recvTask] - 2; slab_x < meshmax_list[3 * recvTask] + 4; slab_x++) if(slab_to_task[slab_x] == sendTask) { if(slab_x < sendmin) sendmin = slab_x; if(slab_x > sendmax) sendmax = slab_x; } if(sendmax == -GRID) sendmin = sendmax + 1; /* check how much we have to receive */ recvmin = 2 * GRID; recvmax = -GRID; for(slab_x = meshmin[0] - 2; slab_x < meshmax[0] + 4; slab_x++) if(slab_to_task[slab_x] == recvTask) { if(slab_x < recvmin) recvmin = slab_x; if(slab_x > recvmax) recvmax = slab_x; } if(recvmax == -GRID) recvmin = recvmax + 1; if((recvmax - recvmin) >= 0 || (sendmax - sendmin) >= 0) /* ok, we have a contribution to the slab */ { recv_dimx = meshmax_list[3 * recvTask + 0] - meshmin_list[3 * recvTask + 0] + 6; recv_dimy = meshmax_list[3 * recvTask + 1] - meshmin_list[3 * recvTask + 1] + 6; recv_dimz = meshmax_list[3 * recvTask + 2] - meshmin_list[3 * recvTask + 2] + 6; /* prepare what we want to send */ if(sendmax - sendmin >= 0) { for(slab_x = sendmin; slab_x <= sendmax; slab_x++) { slab_xx = slab_x - first_slab_of_task[ThisTask]; for(slab_y = meshmin_list[3 * recvTask + 1] - 2; slab_y < meshmax_list[3 * recvTask + 1] + 4; slab_y++) { slab_yy = slab_y; for(slab_z = meshmin_list[3 * recvTask + 2] - 2; slab_z < meshmax_list[3 * recvTask + 2] + 4; slab_z++) { slab_zz = slab_z; forcegrid[((slab_x - sendmin) * recv_dimy + (slab_y - (meshmin_list[3 * recvTask + 1] - 2))) * recv_dimz + slab_z - (meshmin_list[3 * recvTask + 2] - 2)] = rhogrid[GRID * GRID2 * slab_xx + GRID2 * slab_yy + slab_zz]; } } } } if(level > 0) { MPI_Sendrecv(forcegrid, (sendmax - sendmin + 1) * recv_dimy * recv_dimz * sizeof(fftw_real), MPI_BYTE, recvTask, TAG_NONPERIOD_D, workspace + (recvmin - (meshmin[0] - 2)) * dimy * dimz, (recvmax - recvmin + 1) * dimy * dimz * sizeof(fftw_real), MPI_BYTE, recvTask, TAG_NONPERIOD_D, MPI_COMM_WORLD, &status); } else { memcpy(workspace + (recvmin - (meshmin[0] - 2)) * dimy * dimz, forcegrid, (recvmax - recvmin + 1) * dimy * dimz * sizeof(fftw_real)); } } } } dimx = meshmax[0] - meshmin[0] + 2; dimy = meshmax[1] - meshmin[1] + 2; dimz = meshmax[2] - meshmin[2] + 2; recv_dimx = meshmax[0] - meshmin[0] + 6; recv_dimy = meshmax[1] - meshmin[1] + 6; recv_dimz = meshmax[2] - meshmin[2] + 6; for(x = 0; x < meshmax[0] - meshmin[0] + 2; x++) for(y = 0; y < meshmax[1] - meshmin[1] + 2; y++) for(z = 0; z < meshmax[2] - meshmin[2] + 2; z++) { forcegrid[(x * dimy + y) * dimz + z] = workspace[((x + 2) * recv_dimy + (y + 2)) * recv_dimz + (z + 2)]; } /* read out the potential */ for(i = 0; i < NumPart; i++) { if (P[i].Type == 0 && P[i].ID < 0) /*SINK*/ continue; #ifdef PLACEHIGHRESREGION if(grnr == 1) if(!(P[i].Type == 0 && P[i].Mass*1.0e10/All.HubbleParam < All.RefinementMass || P[i].Type == 1 && P[i].Mass*1.0e10/All.HubbleParam < (All.Omega0/All.OmegaBaryon-1.0)*All.RefinementMass)) continue; #endif slab_x = to_slab_fac * (P[i].Pos[0] - All.Corner[grnr][0]); dx = to_slab_fac * (P[i].Pos[0] - All.Corner[grnr][0]) - slab_x; slab_x -= meshmin[0]; slab_xx = slab_x + 1; slab_y = to_slab_fac * (P[i].Pos[1] - All.Corner[grnr][1]); dy = to_slab_fac * (P[i].Pos[1] - All.Corner[grnr][1]) - slab_y; slab_y -= meshmin[1]; slab_yy = slab_y + 1; slab_z = to_slab_fac * (P[i].Pos[2] - All.Corner[grnr][2]); dz = to_slab_fac * (P[i].Pos[2] - All.Corner[grnr][2]) - slab_z; slab_z -= meshmin[2]; slab_zz = slab_z + 1; pot = forcegrid[(slab_x * dimy + slab_y) * dimz + slab_z] * (1.0 - dx) * (1.0 - dy) * (1.0 - dz); pot += forcegrid[(slab_x * dimy + slab_yy) * dimz + slab_z] * (1.0 - dx) * dy * (1.0 - dz); pot += forcegrid[(slab_x * dimy + slab_y) * dimz + slab_zz] * (1.0 - dx) * (1.0 - dy) * dz; pot += forcegrid[(slab_x * dimy + slab_yy) * dimz + slab_zz] * (1.0 - dx) * dy * dz; pot += forcegrid[(slab_xx * dimy + slab_y) * dimz + slab_z] * (dx) * (1.0 - dy) * (1.0 - dz); pot += forcegrid[(slab_xx * dimy + slab_yy) * dimz + slab_z] * (dx) * dy * (1.0 - dz); pot += forcegrid[(slab_xx * dimy + slab_y) * dimz + slab_zz] * (dx) * (1.0 - dy) * dz; pot += forcegrid[(slab_xx * dimy + slab_yy) * dimz + slab_zz] * (dx) * dy * dz; P[i].Potential += pot; } pm_init_nonperiodic_free(); force_treeallocate(All.TreeAllocFactor * All.MaxPart, All.MaxPart); All.NumForcesSinceLastDomainDecomp = 1 + All.TotNumPart * All.TreeDomainUpdateFrequency; if(ThisTask == 0) printf("done PM-potential.\n"); return 0; }
/*! Calculates the long-range potential using the PM method. The potential is * Gaussian filtered with Asmth, given in mesh-cell units. We carry out a CIC * charge assignment, and compute the potenial by Fourier transform * methods. The CIC kernel is deconvolved. */ void pmpotential_periodic(void) { double k2, kx, ky, kz, smth; double dx, dy, dz; double fx, fy, fz, ff; double asmth2, fac; int i, j, slab, level, sendTask, recvTask; int x, y, z, ip; int slab_x, slab_y, slab_z; int slab_xx, slab_yy, slab_zz; int meshmin[3], meshmax[3], sendmin, sendmax, recvmin, recvmax; int rep, ncont, cont_sendmin[2], cont_sendmax[2], cont_recvmin[2], cont_recvmax[2]; int dimx, dimy, dimz, recv_dimx, recv_dimy, recv_dimz; #ifndef NOMPI MPI_Status status; #endif if(ThisTask == 0) { printf("Starting periodic PM calculation.\n"); fflush(stdout); } asmth2 = (2 * M_PI) * All.Asmth[0] / All.BoxSize; asmth2 *= asmth2; fac = All.G / (M_PI * All.BoxSize); /* to get potential */ force_treefree(); /* first, establish the extension of the local patch in the PMGRID */ for(j = 0; j < 3; j++) { meshmin[j] = PMGRID; meshmax[j] = 0; } for(i = 0; i < NumPart; i++) { for(j = 0; j < 3; j++) { slab = to_slab_fac * P[i].Pos[j]; if(slab >= PMGRID) slab = PMGRID - 1; if(slab < meshmin[j]) meshmin[j] = slab; if(slab > meshmax[j]) meshmax[j] = slab; } } #ifndef NOMPI MPI_Allgather(meshmin, 3, MPI_INT, meshmin_list, 3, MPI_INT, MPI_COMM_WORLD); MPI_Allgather(meshmax, 3, MPI_INT, meshmax_list, 3, MPI_INT, MPI_COMM_WORLD); #else for( i = 0; i < 3; i++){ mesmmin_list[0+i] = meshmin[i]; meshmax_list[0+i] = meshmax[i]; } #endif dimx = meshmax[0] - meshmin[0] + 2; dimy = meshmax[1] - meshmin[1] + 2; dimz = meshmax[2] - meshmin[2] + 2; pm_init_periodic_allocate((dimx + 4) * (dimy + 4) * (dimz + 4)); for(i = 0; i < dimx * dimy * dimz; i++) workspace[i] = 0; for(i = 0; i < NumPart; i++) { slab_x = to_slab_fac * P[i].Pos[0]; if(slab_x >= PMGRID) slab_x = PMGRID - 1; dx = to_slab_fac * P[i].Pos[0] - slab_x; slab_x -= meshmin[0]; slab_xx = slab_x + 1; slab_y = to_slab_fac * P[i].Pos[1]; if(slab_y >= PMGRID) slab_y = PMGRID - 1; dy = to_slab_fac * P[i].Pos[1] - slab_y; slab_y -= meshmin[1]; slab_yy = slab_y + 1; slab_z = to_slab_fac * P[i].Pos[2]; if(slab_z >= PMGRID) slab_z = PMGRID - 1; dz = to_slab_fac * P[i].Pos[2] - slab_z; slab_z -= meshmin[2]; slab_zz = slab_z + 1; workspace[(slab_x * dimy + slab_y) * dimz + slab_z] += P[i].Mass * (1.0 - dx) * (1.0 - dy) * (1.0 - dz); workspace[(slab_x * dimy + slab_yy) * dimz + slab_z] += P[i].Mass * (1.0 - dx) * dy * (1.0 - dz); workspace[(slab_x * dimy + slab_y) * dimz + slab_zz] += P[i].Mass * (1.0 - dx) * (1.0 - dy) * dz; workspace[(slab_x * dimy + slab_yy) * dimz + slab_zz] += P[i].Mass * (1.0 - dx) * dy * dz; workspace[(slab_xx * dimy + slab_y) * dimz + slab_z] += P[i].Mass * (dx) * (1.0 - dy) * (1.0 - dz); workspace[(slab_xx * dimy + slab_yy) * dimz + slab_z] += P[i].Mass * (dx) * dy * (1.0 - dz); workspace[(slab_xx * dimy + slab_y) * dimz + slab_zz] += P[i].Mass * (dx) * (1.0 - dy) * dz; workspace[(slab_xx * dimy + slab_yy) * dimz + slab_zz] += P[i].Mass * (dx) * dy * dz; } for(i = 0; i < fftsize; i++) /* clear local density field */ rhogrid[i] = 0; for(level = 0; level < (1 << PTask); level++) /* note: for level=0, target is the same task */ { sendTask = ThisTask; recvTask = ThisTask ^ level; if(recvTask < NTask) { /* check how much we have to send */ sendmin = 2 * PMGRID; sendmax = -1; for(slab_x = meshmin[0]; slab_x < meshmax[0] + 2; slab_x++) if(slab_to_task[slab_x % PMGRID] == recvTask) { if(slab_x < sendmin) sendmin = slab_x; if(slab_x > sendmax) sendmax = slab_x; } if(sendmax == -1) sendmin = 0; /* check how much we have to receive */ recvmin = 2 * PMGRID; recvmax = -1; for(slab_x = meshmin_list[3 * recvTask]; slab_x < meshmax_list[3 * recvTask] + 2; slab_x++) if(slab_to_task[slab_x % PMGRID] == sendTask) { if(slab_x < recvmin) recvmin = slab_x; if(slab_x > recvmax) recvmax = slab_x; } if(recvmax == -1) recvmin = 0; if((recvmax - recvmin) >= 0 || (sendmax - sendmin) >= 0) /* ok, we have a contribution to the slab */ { recv_dimx = meshmax_list[3 * recvTask + 0] - meshmin_list[3 * recvTask + 0] + 2; recv_dimy = meshmax_list[3 * recvTask + 1] - meshmin_list[3 * recvTask + 1] + 2; recv_dimz = meshmax_list[3 * recvTask + 2] - meshmin_list[3 * recvTask + 2] + 2; if(level > 0) { #ifndef NOMPI MPI_Sendrecv(workspace + (sendmin - meshmin[0]) * dimy * dimz, (sendmax - sendmin + 1) * dimy * dimz * sizeof(fftw_real), MPI_BYTE, recvTask, TAG_PERIODIC_C, forcegrid, (recvmax - recvmin + 1) * recv_dimy * recv_dimz * sizeof(fftw_real), MPI_BYTE, recvTask, TAG_PERIODIC_C, MPI_COMM_WORLD, &status); #else memcpy(forcegrid, workspace + (sendmin - meshmin[0]) * dimy * dimz, (sendmax - sendmin + 1) * dimy * dimz * sizeof(fftw_real)) #endif } else {
/*! Calculates the long-range potential using the PM method. The potential is * Gaussian filtered with Asmth, given in mesh-cell units. We carry out a CIC * charge assignment, and compute the potenial by Fourier transform * methods. The CIC kernel is deconvolved. */ void pmpotential_periodic(void) { double k2, kx, ky, kz, smth; double dx, dy, dz; double fx, fy, fz, ff; double asmth2, fac; int i, j, slab, level, sendTask, recvTask; int x, y, z, ip; int slab_x, slab_y, slab_z; int slab_xx, slab_yy, slab_zz; int meshmin[3], meshmax[3], sendmin, sendmax, recvmin, recvmax; int rep, ncont, cont_sendmin[2], cont_sendmax[2], cont_recvmin[2], cont_recvmax[2]; int dimx, dimy, dimz, recv_dimx, recv_dimy, recv_dimz; MPI_Status status; if(ThisTask == 0) { printf("Starting periodic PM calculation.\n"); fflush(stdout); } asmth2 = (2 * M_PI) * All.Asmth[0] / All.BoxSize; asmth2 *= asmth2; fac = All.G / (M_PI * All.BoxSize); /* to get potential */ force_treefree(); /* first, establish the extension of the local patch in the PMGRID */ for(j = 0; j < 3; j++) { meshmin[j] = PMGRID; meshmax[j] = 0; } for(i = 0; i < NumPart; i++) { for(j = 0; j < 3; j++) { slab = to_slab_fac * P[i].Pos[j]; if(slab >= PMGRID) slab = PMGRID - 1; if(slab < meshmin[j]) meshmin[j] = slab; if(slab > meshmax[j]) meshmax[j] = slab; } } MPI_Allgather(meshmin, 3, MPI_INT, meshmin_list, 3, MPI_INT, MPI_COMM_WORLD); MPI_Allgather(meshmax, 3, MPI_INT, meshmax_list, 3, MPI_INT, MPI_COMM_WORLD); dimx = meshmax[0] - meshmin[0] + 2; dimy = meshmax[1] - meshmin[1] + 2; dimz = meshmax[2] - meshmin[2] + 2; pm_init_periodic_allocate((dimx + 4) * (dimy + 4) * (dimz + 4)); #ifdef FFTW3 /* Create plan for in-place r2c DFT */ fft_forward_plan = fftw_mpi_plan_dft_r2c_3d(PMGRID, PMGRID, PMGRID, rhogrid, fft_of_rhogrid, MPI_COMM_WORLD, FFTW_ESTIMATE | FFTW_MPI_TRANSPOSED_OUT); fft_inverse_plan = fftw_mpi_plan_dft_c2r_3d(PMGRID, PMGRID, PMGRID, fft_of_rhogrid, rhogrid, MPI_COMM_WORLD, FFTW_ESTIMATE | FFTW_MPI_TRANSPOSED_IN); #endif for(i = 0; i < dimx * dimy * dimz; i++) workspace[i] = 0; for(i = 0; i < NumPart; i++) { slab_x = to_slab_fac * P[i].Pos[0]; if(slab_x >= PMGRID) slab_x = PMGRID - 1; dx = to_slab_fac * P[i].Pos[0] - slab_x; slab_x -= meshmin[0]; slab_xx = slab_x + 1; slab_y = to_slab_fac * P[i].Pos[1]; if(slab_y >= PMGRID) slab_y = PMGRID - 1; dy = to_slab_fac * P[i].Pos[1] - slab_y; slab_y -= meshmin[1]; slab_yy = slab_y + 1; slab_z = to_slab_fac * P[i].Pos[2]; if(slab_z >= PMGRID) slab_z = PMGRID - 1; dz = to_slab_fac * P[i].Pos[2] - slab_z; slab_z -= meshmin[2]; slab_zz = slab_z + 1; workspace[(slab_x * dimy + slab_y) * dimz + slab_z] += P[i].Mass * (1.0 - dx) * (1.0 - dy) * (1.0 - dz); workspace[(slab_x * dimy + slab_yy) * dimz + slab_z] += P[i].Mass * (1.0 - dx) * dy * (1.0 - dz); workspace[(slab_x * dimy + slab_y) * dimz + slab_zz] += P[i].Mass * (1.0 - dx) * (1.0 - dy) * dz; workspace[(slab_x * dimy + slab_yy) * dimz + slab_zz] += P[i].Mass * (1.0 - dx) * dy * dz; workspace[(slab_xx * dimy + slab_y) * dimz + slab_z] += P[i].Mass * (dx) * (1.0 - dy) * (1.0 - dz); workspace[(slab_xx * dimy + slab_yy) * dimz + slab_z] += P[i].Mass * (dx) * dy * (1.0 - dz); workspace[(slab_xx * dimy + slab_y) * dimz + slab_zz] += P[i].Mass * (dx) * (1.0 - dy) * dz; workspace[(slab_xx * dimy + slab_yy) * dimz + slab_zz] += P[i].Mass * (dx) * dy * dz; } for(i = 0; i < fftsize; i++) /* clear local density field */ rhogrid[i] = 0; for(level = 0; level < (1 << PTask); level++) /* note: for level=0, target is the same task */ { sendTask = ThisTask; recvTask = ThisTask ^ level; if(recvTask < NTask) { /* check how much we have to send */ sendmin = 2 * PMGRID; sendmax = -1; for(slab_x = meshmin[0]; slab_x < meshmax[0] + 2; slab_x++) if(slab_to_task[slab_x % PMGRID] == recvTask) { if(slab_x < sendmin) sendmin = slab_x; if(slab_x > sendmax) sendmax = slab_x; } if(sendmax == -1) sendmin = 0; /* check how much we have to receive */ recvmin = 2 * PMGRID; recvmax = -1; for(slab_x = meshmin_list[3 * recvTask]; slab_x < meshmax_list[3 * recvTask] + 2; slab_x++) if(slab_to_task[slab_x % PMGRID] == sendTask) { if(slab_x < recvmin) recvmin = slab_x; if(slab_x > recvmax) recvmax = slab_x; } if(recvmax == -1) recvmin = 0; if((recvmax - recvmin) >= 0 || (sendmax - sendmin) >= 0) /* ok, we have a contribution to the slab */ { recv_dimx = meshmax_list[3 * recvTask + 0] - meshmin_list[3 * recvTask + 0] + 2; recv_dimy = meshmax_list[3 * recvTask + 1] - meshmin_list[3 * recvTask + 1] + 2; recv_dimz = meshmax_list[3 * recvTask + 2] - meshmin_list[3 * recvTask + 2] + 2; if(level > 0) { MPI_Sendrecv(workspace + (sendmin - meshmin[0]) * dimy * dimz, (sendmax - sendmin + 1) * dimy * dimz * sizeof(fftw_real), MPI_BYTE, recvTask, TAG_PERIODIC_C, forcegrid, (recvmax - recvmin + 1) * recv_dimy * recv_dimz * sizeof(fftw_real), MPI_BYTE, recvTask, TAG_PERIODIC_C, MPI_COMM_WORLD, &status); } else { memcpy(forcegrid, workspace + (sendmin - meshmin[0]) * dimy * dimz, (sendmax - sendmin + 1) * dimy * dimz * sizeof(fftw_real)); } for(slab_x = recvmin; slab_x <= recvmax; slab_x++) { slab_xx = (slab_x % PMGRID) - first_slab_of_task[ThisTask]; if(slab_xx >= 0 && slab_xx < slabs_per_task[ThisTask]) { for(slab_y = meshmin_list[3 * recvTask + 1]; slab_y <= meshmax_list[3 * recvTask + 1] + 1; slab_y++) { slab_yy = slab_y; if(slab_yy >= PMGRID) slab_yy -= PMGRID; for(slab_z = meshmin_list[3 * recvTask + 2]; slab_z <= meshmax_list[3 * recvTask + 2] + 1; slab_z++) { slab_zz = slab_z; if(slab_zz >= PMGRID) slab_zz -= PMGRID; rhogrid[PMGRID * PMGRID2 * slab_xx + PMGRID2 * slab_yy + slab_zz] += forcegrid[((slab_x - recvmin) * recv_dimy + (slab_y - meshmin_list[3 * recvTask + 1])) * recv_dimz + (slab_z - meshmin_list[3 * recvTask + 2])]; } } } } } } } /* Do the FFT of the density field */ #ifdef FFTW3 fftw_execute(fft_forward_plan); #else rfftwnd_mpi(fft_forward_plan, 1, rhogrid, workspace, FFTW_TRANSPOSED_ORDER); #endif /* multiply with Green's function for the potential */ for(y = slabstart_y; y < slabstart_y + nslab_y; y++) for(x = 0; x < PMGRID; x++) for(z = 0; z < PMGRID / 2 + 1; z++) { if(x > PMGRID / 2) kx = x - PMGRID; else kx = x; if(y > PMGRID / 2) ky = y - PMGRID; else ky = y; if(z > PMGRID / 2) kz = z - PMGRID; else kz = z; k2 = kx * kx + ky * ky + kz * kz; if(k2 > 0) { smth = -exp(-k2 * asmth2) / k2 * fac; /* do deconvolution */ fx = fy = fz = 1; if(kx != 0) { fx = (M_PI * kx) / PMGRID; fx = sin(fx) / fx; } if(ky != 0) { fy = (M_PI * ky) / PMGRID; fy = sin(fy) / fy; } if(kz != 0) { fz = (M_PI * kz) / PMGRID; fz = sin(fz) / fz; } ff = 1 / (fx * fy * fz); smth *= ff * ff * ff * ff; /* end deconvolution */ ip = PMGRID * (PMGRID / 2 + 1) * (y - slabstart_y) + (PMGRID / 2 + 1) * x + z; c_re(fft_of_rhogrid[ip]) *= smth; c_im(fft_of_rhogrid[ip]) *= smth; } } if(slabstart_y == 0) c_re(fft_of_rhogrid[0]) = c_im(fft_of_rhogrid[0]) = 0.0; /* Do the FFT to get the potential */ #ifdef FFTW3 fftw_execute(fft_inverse_plan); /* Now normalize the output */ for(i = 0; i < fftsize; i++) rhogrid[i] = rhogrid[i] / (PMGRID*PMGRID*PMGRID); #else rfftwnd_mpi(fft_inverse_plan, 1, rhogrid, workspace, FFTW_TRANSPOSED_ORDER); #endif /* note: "rhogrid" now contains the potential */ dimx = meshmax[0] - meshmin[0] + 6; dimy = meshmax[1] - meshmin[1] + 6; dimz = meshmax[2] - meshmin[2] + 6; for(level = 0; level < (1 << PTask); level++) /* note: for level=0, target is the same task */ { sendTask = ThisTask; recvTask = ThisTask ^ level; if(recvTask < NTask) { /* check how much we have to send */ sendmin = 2 * PMGRID; sendmax = -PMGRID; for(slab_x = meshmin_list[3 * recvTask] - 2; slab_x < meshmax_list[3 * recvTask] + 4; slab_x++) if(slab_to_task[(slab_x + PMGRID) % PMGRID] == sendTask) { if(slab_x < sendmin) sendmin = slab_x; if(slab_x > sendmax) sendmax = slab_x; } if(sendmax == -PMGRID) sendmin = sendmax + 1; /* check how much we have to receive */ recvmin = 2 * PMGRID; recvmax = -PMGRID; for(slab_x = meshmin[0] - 2; slab_x < meshmax[0] + 4; slab_x++) if(slab_to_task[(slab_x + PMGRID) % PMGRID] == recvTask) { if(slab_x < recvmin) recvmin = slab_x; if(slab_x > recvmax) recvmax = slab_x; } if(recvmax == -PMGRID) recvmin = recvmax + 1; if((recvmax - recvmin) >= 0 || (sendmax - sendmin) >= 0) /* ok, we have a contribution to the slab */ { recv_dimx = meshmax_list[3 * recvTask + 0] - meshmin_list[3 * recvTask + 0] + 6; recv_dimy = meshmax_list[3 * recvTask + 1] - meshmin_list[3 * recvTask + 1] + 6; recv_dimz = meshmax_list[3 * recvTask + 2] - meshmin_list[3 * recvTask + 2] + 6; ncont = 1; cont_sendmin[0] = sendmin; cont_sendmax[0] = sendmax; cont_sendmin[1] = sendmax + 1; cont_sendmax[1] = sendmax; cont_recvmin[0] = recvmin; cont_recvmax[0] = recvmax; cont_recvmin[1] = recvmax + 1; cont_recvmax[1] = recvmax; for(slab_x = sendmin; slab_x <= sendmax; slab_x++) { if(slab_to_task[(slab_x + PMGRID) % PMGRID] != ThisTask) { /* non-contiguous */ cont_sendmax[0] = slab_x - 1; while(slab_to_task[(slab_x + PMGRID) % PMGRID] != ThisTask) slab_x++; cont_sendmin[1] = slab_x; ncont++; } } for(slab_x = recvmin; slab_x <= recvmax; slab_x++) { if(slab_to_task[(slab_x + PMGRID) % PMGRID] != recvTask) { /* non-contiguous */ cont_recvmax[0] = slab_x - 1; while(slab_to_task[(slab_x + PMGRID) % PMGRID] != recvTask) slab_x++; cont_recvmin[1] = slab_x; if(ncont == 1) ncont++; } } for(rep = 0; rep < ncont; rep++) { sendmin = cont_sendmin[rep]; sendmax = cont_sendmax[rep]; recvmin = cont_recvmin[rep]; recvmax = cont_recvmax[rep]; /* prepare what we want to send */ if(sendmax - sendmin >= 0) { for(slab_x = sendmin; slab_x <= sendmax; slab_x++) { slab_xx = ((slab_x + PMGRID) % PMGRID) - first_slab_of_task[ThisTask]; for(slab_y = meshmin_list[3 * recvTask + 1] - 2; slab_y < meshmax_list[3 * recvTask + 1] + 4; slab_y++) { slab_yy = (slab_y + PMGRID) % PMGRID; for(slab_z = meshmin_list[3 * recvTask + 2] - 2; slab_z < meshmax_list[3 * recvTask + 2] + 4; slab_z++) { slab_zz = (slab_z + PMGRID) % PMGRID; forcegrid[((slab_x - sendmin) * recv_dimy + (slab_y - (meshmin_list[3 * recvTask + 1] - 2))) * recv_dimz + slab_z - (meshmin_list[3 * recvTask + 2] - 2)] = rhogrid[PMGRID * PMGRID2 * slab_xx + PMGRID2 * slab_yy + slab_zz]; } } } } if(level > 0) { MPI_Sendrecv(forcegrid, (sendmax - sendmin + 1) * recv_dimy * recv_dimz * sizeof(fftw_real), MPI_BYTE, recvTask, TAG_PERIODIC_D, workspace + (recvmin - (meshmin[0] - 2)) * dimy * dimz, (recvmax - recvmin + 1) * dimy * dimz * sizeof(fftw_real), MPI_BYTE, recvTask, TAG_PERIODIC_D, MPI_COMM_WORLD, &status); } else { memcpy(workspace + (recvmin - (meshmin[0] - 2)) * dimy * dimz, forcegrid, (recvmax - recvmin + 1) * dimy * dimz * sizeof(fftw_real)); } } } } } dimx = meshmax[0] - meshmin[0] + 2; dimy = meshmax[1] - meshmin[1] + 2; dimz = meshmax[2] - meshmin[2] + 2; recv_dimx = meshmax[0] - meshmin[0] + 6; recv_dimy = meshmax[1] - meshmin[1] + 6; recv_dimz = meshmax[2] - meshmin[2] + 6; for(x = 0; x < meshmax[0] - meshmin[0] + 2; x++) for(y = 0; y < meshmax[1] - meshmin[1] + 2; y++) for(z = 0; z < meshmax[2] - meshmin[2] + 2; z++) { forcegrid[(x * dimy + y) * dimz + z] = workspace[((x + 2) * recv_dimy + (y + 2)) * recv_dimz + (z + 2)]; } /* read out the potential */ for(i = 0; i < NumPart; i++) { slab_x = to_slab_fac * P[i].Pos[0]; if(slab_x >= PMGRID) slab_x = PMGRID - 1; dx = to_slab_fac * P[i].Pos[0] - slab_x; slab_x -= meshmin[0]; slab_xx = slab_x + 1; slab_y = to_slab_fac * P[i].Pos[1]; if(slab_y >= PMGRID) slab_y = PMGRID - 1; dy = to_slab_fac * P[i].Pos[1] - slab_y; slab_y -= meshmin[1]; slab_yy = slab_y + 1; slab_z = to_slab_fac * P[i].Pos[2]; if(slab_z >= PMGRID) slab_z = PMGRID - 1; dz = to_slab_fac * P[i].Pos[2] - slab_z; slab_z -= meshmin[2]; slab_zz = slab_z + 1; P[i].Potential += forcegrid[(slab_x * dimy + slab_y) * dimz + slab_z] * (1.0 - dx) * (1.0 - dy) * (1.0 - dz); P[i].Potential += forcegrid[(slab_x * dimy + slab_yy) * dimz + slab_z] * (1.0 - dx) * dy * (1.0 - dz); P[i].Potential += forcegrid[(slab_x * dimy + slab_y) * dimz + slab_zz] * (1.0 - dx) * (1.0 - dy) * dz; P[i].Potential += forcegrid[(slab_x * dimy + slab_yy) * dimz + slab_zz] * (1.0 - dx) * dy * dz; P[i].Potential += forcegrid[(slab_xx * dimy + slab_y) * dimz + slab_z] * (dx) * (1.0 - dy) * (1.0 - dz); P[i].Potential += forcegrid[(slab_xx * dimy + slab_yy) * dimz + slab_z] * (dx) * dy * (1.0 - dz); P[i].Potential += forcegrid[(slab_xx * dimy + slab_y) * dimz + slab_zz] * (dx) * (1.0 - dy) * dz; P[i].Potential += forcegrid[(slab_xx * dimy + slab_yy) * dimz + slab_zz] * (dx) * dy * dz; } pm_init_periodic_free(); force_treeallocate(All.TreeAllocFactor * All.MaxPart, All.MaxPart); All.NumForcesSinceLastDomainDecomp = 1 + All.TotNumPart * All.TreeDomainUpdateFrequency; if(ThisTask == 0) { printf("done PM-Potential.\n"); fflush(stdout); } }
/*! Calculates the long-range periodic force given the particle positions * using the PM method. The force is Gaussian filtered with Asmth, given in * mesh-cell units. We carry out a CIC charge assignment, and compute the * potenial by Fourier transform methods. The potential is finite differenced * using a 4-point finite differencing formula, and the forces are * interpolated tri-linearly to the particle positions. The CIC kernel is * deconvolved. Note that the particle distribution is not in the slab * decomposition that is used for the FFT. Instead, overlapping patches * between local domains and FFT slabs are communicated as needed. */ void pmforce_periodic(void) { double k2, kx, ky, kz, smth; double dx, dy, dz; double fx, fy, fz, ff; double asmth2, fac, acc_dim; int i, j, slab, level, sendTask, recvTask; int x, y, z, xl, yl, zl, xr, yr, zr, xll, yll, zll, xrr, yrr, zrr, ip, dim; int slab_x, slab_y, slab_z; int slab_xx, slab_yy, slab_zz; int meshmin[3], meshmax[3], sendmin, sendmax, recvmin, recvmax; int rep, ncont, cont_sendmin[2], cont_sendmax[2], cont_recvmin[2], cont_recvmax[2]; int dimx, dimy, dimz, recv_dimx, recv_dimy, recv_dimz; #ifndef NOMPI MPI_Status status; #endif if(ThisTask == 0) { printf("Starting periodic PM calculation.\n"); fflush(stdout); } force_treefree(); asmth2 = (2 * M_PI) * All.Asmth[0] / All.BoxSize; asmth2 *= asmth2; fac = All.G / (M_PI * All.BoxSize); /* to get potential */ fac *= 1 / (2 * All.BoxSize / PMGRID); /* for finite differencing */ /* first, establish the extension of the local patch in the PMGRID */ for(j = 0; j < 3; j++) { meshmin[j] = PMGRID; meshmax[j] = 0; } for(i = 0; i < NumPart; i++) { for(j = 0; j < 3; j++) { slab = to_slab_fac * P[i].Pos[j]; if(slab >= PMGRID) slab = PMGRID - 1; if(slab < meshmin[j]) meshmin[j] = slab; if(slab > meshmax[j]) meshmax[j] = slab; } } #ifndef NOMPI MPI_Allgather(meshmin, 3, MPI_INT, meshmin_list, 3, MPI_INT, MPI_COMM_WORLD); MPI_Allgather(meshmax, 3, MPI_INT, meshmax_list, 3, MPI_INT, MPI_COMM_WORLD); #else for( i = 0; i < 3; i++){ mesmmin_list[0+i] = meshmin[i]; meshmax_list[0+i] = meshmax[i]; } #endif dimx = meshmax[0] - meshmin[0] + 2; dimy = meshmax[1] - meshmin[1] + 2; dimz = meshmax[2] - meshmin[2] + 2; pm_init_periodic_allocate((dimx + 4) * (dimy + 4) * (dimz + 4)); for(i = 0; i < dimx * dimy * dimz; i++) workspace[i] = 0; for(i = 0; i < NumPart; i++) { slab_x = to_slab_fac * P[i].Pos[0]; if(slab_x >= PMGRID) slab_x = PMGRID - 1; dx = to_slab_fac * P[i].Pos[0] - slab_x; slab_x -= meshmin[0]; slab_xx = slab_x + 1; slab_y = to_slab_fac * P[i].Pos[1]; if(slab_y >= PMGRID) slab_y = PMGRID - 1; dy = to_slab_fac * P[i].Pos[1] - slab_y; slab_y -= meshmin[1]; slab_yy = slab_y + 1; slab_z = to_slab_fac * P[i].Pos[2]; if(slab_z >= PMGRID) slab_z = PMGRID - 1; dz = to_slab_fac * P[i].Pos[2] - slab_z; slab_z -= meshmin[2]; slab_zz = slab_z + 1; workspace[(slab_x * dimy + slab_y) * dimz + slab_z] += P[i].Mass * (1.0 - dx) * (1.0 - dy) * (1.0 - dz); workspace[(slab_x * dimy + slab_yy) * dimz + slab_z] += P[i].Mass * (1.0 - dx) * dy * (1.0 - dz); workspace[(slab_x * dimy + slab_y) * dimz + slab_zz] += P[i].Mass * (1.0 - dx) * (1.0 - dy) * dz; workspace[(slab_x * dimy + slab_yy) * dimz + slab_zz] += P[i].Mass * (1.0 - dx) * dy * dz; workspace[(slab_xx * dimy + slab_y) * dimz + slab_z] += P[i].Mass * (dx) * (1.0 - dy) * (1.0 - dz); workspace[(slab_xx * dimy + slab_yy) * dimz + slab_z] += P[i].Mass * (dx) * dy * (1.0 - dz); workspace[(slab_xx * dimy + slab_y) * dimz + slab_zz] += P[i].Mass * (dx) * (1.0 - dy) * dz; workspace[(slab_xx * dimy + slab_yy) * dimz + slab_zz] += P[i].Mass * (dx) * dy * dz; } for(i = 0; i < fftsize; i++) /* clear local density field */ rhogrid[i] = 0; for(level = 0; level < (1 << PTask); level++) /* note: for level=0, target is the same task */ { sendTask = ThisTask; recvTask = ThisTask ^ level; if(recvTask < NTask) { /* check how much we have to send */ sendmin = 2 * PMGRID; sendmax = -1; for(slab_x = meshmin[0]; slab_x < meshmax[0] + 2; slab_x++) if(slab_to_task[slab_x % PMGRID] == recvTask) { if(slab_x < sendmin) sendmin = slab_x; if(slab_x > sendmax) sendmax = slab_x; } if(sendmax == -1) sendmin = 0; /* check how much we have to receive */ recvmin = 2 * PMGRID; recvmax = -1; for(slab_x = meshmin_list[3 * recvTask]; slab_x < meshmax_list[3 * recvTask] + 2; slab_x++) if(slab_to_task[slab_x % PMGRID] == sendTask) { if(slab_x < recvmin) recvmin = slab_x; if(slab_x > recvmax) recvmax = slab_x; } if(recvmax == -1) recvmin = 0; if((recvmax - recvmin) >= 0 || (sendmax - sendmin) >= 0) /* ok, we have a contribution to the slab */ { recv_dimx = meshmax_list[3 * recvTask + 0] - meshmin_list[3 * recvTask + 0] + 2; recv_dimy = meshmax_list[3 * recvTask + 1] - meshmin_list[3 * recvTask + 1] + 2; recv_dimz = meshmax_list[3 * recvTask + 2] - meshmin_list[3 * recvTask + 2] + 2; if(level > 0) { #ifndef NOMPI MPI_Sendrecv(workspace + (sendmin - meshmin[0]) * dimy * dimz, (sendmax - sendmin + 1) * dimy * dimz * sizeof(fftw_real), MPI_BYTE, recvTask, TAG_PERIODIC_A, forcegrid, (recvmax - recvmin + 1) * recv_dimy * recv_dimz * sizeof(fftw_real), MPI_BYTE, recvTask, TAG_PERIODIC_A, MPI_COMM_WORLD, &status); #else memcpy(forcegrid, workspace + (sendmin - meshmin[0]) * dimy * dimz, (sendmax - sendmin + 1) * dimy * dimz * sizeof(fftw_real)); #endif } else { memcpy(forcegrid, workspace + (sendmin - meshmin[0]) * dimy * dimz, (sendmax - sendmin + 1) * dimy * dimz * sizeof(fftw_real)); } for(slab_x = recvmin; slab_x <= recvmax; slab_x++) { slab_xx = (slab_x % PMGRID) - first_slab_of_task[ThisTask]; if(slab_xx >= 0 && slab_xx < slabs_per_task[ThisTask]) { for(slab_y = meshmin_list[3 * recvTask + 1]; slab_y <= meshmax_list[3 * recvTask + 1] + 1; slab_y++) { slab_yy = slab_y; if(slab_yy >= PMGRID) slab_yy -= PMGRID; for(slab_z = meshmin_list[3 * recvTask + 2]; slab_z <= meshmax_list[3 * recvTask + 2] + 1; slab_z++) { slab_zz = slab_z; if(slab_zz >= PMGRID) slab_zz -= PMGRID; rhogrid[PMGRID * PMGRID2 * slab_xx + PMGRID2 * slab_yy + slab_zz] += forcegrid[((slab_x - recvmin) * recv_dimy + (slab_y - meshmin_list[3 * recvTask + 1])) * recv_dimz + (slab_z - meshmin_list[3 * recvTask + 2])]; } } } } } } } /* Do the FFT of the density field */ rfftwnd_mpi(fft_forward_plan, 1, rhogrid, workspace, FFTW_TRANSPOSED_ORDER); /* multiply with Green's function for the potential */ for(y = slabstart_y; y < slabstart_y + nslab_y; y++) for(x = 0; x < PMGRID; x++) for(z = 0; z < PMGRID / 2 + 1; z++) { if(x > PMGRID / 2) kx = x - PMGRID; else kx = x; if(y > PMGRID / 2) ky = y - PMGRID; else ky = y; if(z > PMGRID / 2) kz = z - PMGRID; else kz = z; k2 = kx * kx + ky * ky + kz * kz; if(k2 > 0) { smth = -exp(-k2 * asmth2) / k2; /* do deconvolution */ fx = fy = fz = 1; if(kx != 0) { fx = (M_PI * kx) / PMGRID; fx = sin(fx) / fx; } if(ky != 0) { fy = (M_PI * ky) / PMGRID; fy = sin(fy) / fy; } if(kz != 0) { fz = (M_PI * kz) / PMGRID; fz = sin(fz) / fz; } ff = 1 / (fx * fy * fz); smth *= ff * ff * ff * ff; /* end deconvolution */ ip = PMGRID * (PMGRID / 2 + 1) * (y - slabstart_y) + (PMGRID / 2 + 1) * x + z; fft_of_rhogrid[ip].re *= smth; fft_of_rhogrid[ip].im *= smth; } } if(slabstart_y == 0) fft_of_rhogrid[0].re = fft_of_rhogrid[0].im = 0.0; /* Do the FFT to get the potential */ rfftwnd_mpi(fft_inverse_plan, 1, rhogrid, workspace, FFTW_TRANSPOSED_ORDER); /* Now rhogrid holds the potential */ /* construct the potential for the local patch */ dimx = meshmax[0] - meshmin[0] + 6; dimy = meshmax[1] - meshmin[1] + 6; dimz = meshmax[2] - meshmin[2] + 6; for(level = 0; level < (1 << PTask); level++) /* note: for level=0, target is the same task */ { sendTask = ThisTask; recvTask = ThisTask ^ level; if(recvTask < NTask) { /* check how much we have to send */ sendmin = 2 * PMGRID; sendmax = -PMGRID; for(slab_x = meshmin_list[3 * recvTask] - 2; slab_x < meshmax_list[3 * recvTask] + 4; slab_x++) if(slab_to_task[(slab_x + PMGRID) % PMGRID] == sendTask) { if(slab_x < sendmin) sendmin = slab_x; if(slab_x > sendmax) sendmax = slab_x; } if(sendmax == -PMGRID) sendmin = sendmax + 1; /* check how much we have to receive */ recvmin = 2 * PMGRID; recvmax = -PMGRID; for(slab_x = meshmin[0] - 2; slab_x < meshmax[0] + 4; slab_x++) if(slab_to_task[(slab_x + PMGRID) % PMGRID] == recvTask) { if(slab_x < recvmin) recvmin = slab_x; if(slab_x > recvmax) recvmax = slab_x; } if(recvmax == -PMGRID) recvmin = recvmax + 1; if((recvmax - recvmin) >= 0 || (sendmax - sendmin) >= 0) /* ok, we have a contribution to the slab */ { recv_dimx = meshmax_list[3 * recvTask + 0] - meshmin_list[3 * recvTask + 0] + 6; recv_dimy = meshmax_list[3 * recvTask + 1] - meshmin_list[3 * recvTask + 1] + 6; recv_dimz = meshmax_list[3 * recvTask + 2] - meshmin_list[3 * recvTask + 2] + 6; ncont = 1; cont_sendmin[0] = sendmin; cont_sendmax[0] = sendmax; cont_sendmin[1] = sendmax + 1; cont_sendmax[1] = sendmax; cont_recvmin[0] = recvmin; cont_recvmax[0] = recvmax; cont_recvmin[1] = recvmax + 1; cont_recvmax[1] = recvmax; for(slab_x = sendmin; slab_x <= sendmax; slab_x++) { if(slab_to_task[(slab_x + PMGRID) % PMGRID] != ThisTask) { /* non-contiguous */ cont_sendmax[0] = slab_x - 1; while(slab_to_task[(slab_x + PMGRID) % PMGRID] != ThisTask) slab_x++; cont_sendmin[1] = slab_x; ncont++; } } for(slab_x = recvmin; slab_x <= recvmax; slab_x++) { if(slab_to_task[(slab_x + PMGRID) % PMGRID] != recvTask) { /* non-contiguous */ cont_recvmax[0] = slab_x - 1; while(slab_to_task[(slab_x + PMGRID) % PMGRID] != recvTask) slab_x++; cont_recvmin[1] = slab_x; if(ncont == 1) ncont++; } } for(rep = 0; rep < ncont; rep++) { sendmin = cont_sendmin[rep]; sendmax = cont_sendmax[rep]; recvmin = cont_recvmin[rep]; recvmax = cont_recvmax[rep]; /* prepare what we want to send */ if(sendmax - sendmin >= 0) { for(slab_x = sendmin; slab_x <= sendmax; slab_x++) { slab_xx = ((slab_x + PMGRID) % PMGRID) - first_slab_of_task[ThisTask]; for(slab_y = meshmin_list[3 * recvTask + 1] - 2; slab_y < meshmax_list[3 * recvTask + 1] + 4; slab_y++) { slab_yy = (slab_y + PMGRID) % PMGRID; for(slab_z = meshmin_list[3 * recvTask + 2] - 2; slab_z < meshmax_list[3 * recvTask + 2] + 4; slab_z++) { slab_zz = (slab_z + PMGRID) % PMGRID; forcegrid[((slab_x - sendmin) * recv_dimy + (slab_y - (meshmin_list[3 * recvTask + 1] - 2))) * recv_dimz + slab_z - (meshmin_list[3 * recvTask + 2] - 2)] = rhogrid[PMGRID * PMGRID2 * slab_xx + PMGRID2 * slab_yy + slab_zz]; } } } } if(level > 0) { #ifndef NOMPI MPI_Sendrecv(forcegrid, (sendmax - sendmin + 1) * recv_dimy * recv_dimz * sizeof(fftw_real), MPI_BYTE, recvTask, TAG_PERIODIC_B, workspace + (recvmin - (meshmin[0] - 2)) * dimy * dimz, (recvmax - recvmin + 1) * dimy * dimz * sizeof(fftw_real), MPI_BYTE, recvTask, TAG_PERIODIC_B, MPI_COMM_WORLD, &status); #else memcpy(workspace + (recvmin - (meshmin[0] - 2)) * dimy * dimz, forcegrid, (recvmax - recvmin + 1) * dimy * dimz * sizeof(fftw_real)); #endif } else { memcpy(workspace + (recvmin - (meshmin[0] - 2)) * dimy * dimz, forcegrid, (recvmax - recvmin + 1) * dimy * dimz * sizeof(fftw_real)); } } } } } dimx = meshmax[0] - meshmin[0] + 2; dimy = meshmax[1] - meshmin[1] + 2; dimz = meshmax[2] - meshmin[2] + 2; recv_dimx = meshmax[0] - meshmin[0] + 6; recv_dimy = meshmax[1] - meshmin[1] + 6; recv_dimz = meshmax[2] - meshmin[2] + 6; for(dim = 0; dim < 3; dim++) /* Calculate each component of the force. */ { /* get the force component by finite differencing the potential */ /* note: "workspace" now contains the potential for the local patch, plus a suffiently large buffer region */ for(x = 0; x < meshmax[0] - meshmin[0] + 2; x++) for(y = 0; y < meshmax[1] - meshmin[1] + 2; y++) for(z = 0; z < meshmax[2] - meshmin[2] + 2; z++) { xrr = xll = xr = xl = x; yrr = yll = yr = yl = y; zrr = zll = zr = zl = z; switch (dim) { case 0: xr = x + 1; xrr = x + 2; xl = x - 1; xll = x - 2; break; case 1: yr = y + 1; yl = y - 1; yrr = y + 2; yll = y - 2; break; case 2: zr = z + 1; zl = z - 1; zrr = z + 2; zll = z - 2; break; } forcegrid[(x * dimy + y) * dimz + z] = fac * ((4.0 / 3) * (workspace[((xl + 2) * recv_dimy + (yl + 2)) * recv_dimz + (zl + 2)] - workspace[((xr + 2) * recv_dimy + (yr + 2)) * recv_dimz + (zr + 2)]) - (1.0 / 6) * (workspace[((xll + 2) * recv_dimy + (yll + 2)) * recv_dimz + (zll + 2)] - workspace[((xrr + 2) * recv_dimy + (yrr + 2)) * recv_dimz + (zrr + 2)])); } /* read out the forces */ for(i = 0; i < NumPart; i++) { slab_x = to_slab_fac * P[i].Pos[0]; if(slab_x >= PMGRID) slab_x = PMGRID - 1; dx = to_slab_fac * P[i].Pos[0] - slab_x; slab_x -= meshmin[0]; slab_xx = slab_x + 1; slab_y = to_slab_fac * P[i].Pos[1]; if(slab_y >= PMGRID) slab_y = PMGRID - 1; dy = to_slab_fac * P[i].Pos[1] - slab_y; slab_y -= meshmin[1]; slab_yy = slab_y + 1; slab_z = to_slab_fac * P[i].Pos[2]; if(slab_z >= PMGRID) slab_z = PMGRID - 1; dz = to_slab_fac * P[i].Pos[2] - slab_z; slab_z -= meshmin[2]; slab_zz = slab_z + 1; acc_dim = forcegrid[(slab_x * dimy + slab_y) * dimz + slab_z] * (1.0 - dx) * (1.0 - dy) * (1.0 - dz); acc_dim += forcegrid[(slab_x * dimy + slab_yy) * dimz + slab_z] * (1.0 - dx) * dy * (1.0 - dz); acc_dim += forcegrid[(slab_x * dimy + slab_y) * dimz + slab_zz] * (1.0 - dx) * (1.0 - dy) * dz; acc_dim += forcegrid[(slab_x * dimy + slab_yy) * dimz + slab_zz] * (1.0 - dx) * dy * dz; acc_dim += forcegrid[(slab_xx * dimy + slab_y) * dimz + slab_z] * (dx) * (1.0 - dy) * (1.0 - dz); acc_dim += forcegrid[(slab_xx * dimy + slab_yy) * dimz + slab_z] * (dx) * dy * (1.0 - dz); acc_dim += forcegrid[(slab_xx * dimy + slab_y) * dimz + slab_zz] * (dx) * (1.0 - dy) * dz; acc_dim += forcegrid[(slab_xx * dimy + slab_yy) * dimz + slab_zz] * (dx) * dy * dz; P[i].GravPM[dim] = acc_dim; } } pm_init_periodic_free(); force_treeallocate(All.TreeAllocFactor * All.MaxPart, All.MaxPart); All.NumForcesSinceLastDomainDecomp = 1 + All.TotNumPart * All.TreeDomainUpdateFrequency; if(ThisTask == 0) { printf("done PM.\n"); fflush(stdout); } }
/*! Calculates the long-range periodic force given the particle positions * using the PM method. The force is Gaussian filtered with Asmth, given in * mesh-cell units. We carry out a CIC charge assignment, and compute the * potenial by Fourier transform methods. The potential is finite differenced * using a 4-point finite differencing formula, and the forces are * interpolated tri-linearly to the particle positions. The CIC kernel is * deconvolved. Note that the particle distribution is not in the slab * decomposition that is used for the FFT. Instead, overlapping patches * between local domains and FFT slabs are communicated as needed. */ void pmforce_periodic(void) { double k2, kx, ky, kz, smth; double dx, dy, dz; double fx, fy, fz, ff; double asmth2, fac, acc_dim; int i, j, slab, level, sendTask, recvTask; int x, y, z, xl, yl, zl, xr, yr, zr, xll, yll, zll, xrr, yrr, zrr, ip, dim; int slab_x, slab_y, slab_z; int slab_xx, slab_yy, slab_zz; int meshmin[3], meshmax[3], sendmin, sendmax, recvmin, recvmax; int rep, ncont, cont_sendmin[2], cont_sendmax[2], cont_recvmin[2], cont_recvmax[2]; int dimx, dimy, dimz, recv_dimx, recv_dimy, recv_dimz; MPI_Status status; if(ThisTask == 0) { printf("Starting periodic PM calculation.\n"); fflush(stdout); } #ifdef FFTW3 if(fftw_plan_exists) { /* macro defined in callgrind.h */ // CALLGRIND_START_INSTRUMENTATION; } #else // CALLGRIND_START_INSTRUMENTATION; #endif force_treefree(); asmth2 = (2 * M_PI) * All.Asmth[0] / All.BoxSize; asmth2 *= asmth2; fac = All.G / (M_PI * All.BoxSize); /* to get potential */ fac *= 1 / (2 * All.BoxSize / PMGRID); /* for finite differencing */ /* first, establish the extension of the local patch in the PMGRID */ for(j = 0; j < 3; j++) { meshmin[j] = PMGRID; meshmax[j] = 0; } for(i = 0; i < NumPart; i++) { for(j = 0; j < 3; j++) { slab = to_slab_fac * P[i].Pos[j]; if(slab >= PMGRID) slab = PMGRID - 1; if(slab < meshmin[j]) meshmin[j] = slab; if(slab > meshmax[j]) meshmax[j] = slab; } } MPI_Allgather(meshmin, 3, MPI_INT, meshmin_list, 3, MPI_INT, MPI_COMM_WORLD); MPI_Allgather(meshmax, 3, MPI_INT, meshmax_list, 3, MPI_INT, MPI_COMM_WORLD); dimx = meshmax[0] - meshmin[0] + 2; dimy = meshmax[1] - meshmin[1] + 2; dimz = meshmax[2] - meshmin[2] + 2; pm_init_periodic_allocate((dimx + 4) * (dimy + 4) * (dimz + 4)); #ifdef FFTW3 if(!fftw_plan_exists) { /* Create plan for in-place r2c DFT */ fft_forward_plan = fftw_mpi_plan_dft_r2c_3d(PMGRID, PMGRID, PMGRID, rhogrid, fft_of_rhogrid, MPI_COMM_WORLD, FFTW_PATIENT | FFTW_MPI_TRANSPOSED_OUT); fft_inverse_plan = fftw_mpi_plan_dft_c2r_3d(PMGRID, PMGRID, PMGRID, fft_of_rhogrid, rhogrid, MPI_COMM_WORLD, FFTW_PATIENT | FFTW_MPI_TRANSPOSED_IN); fftw_plan_exists = true; // use C99 bool type if(ThisTask == 0) printf("Created new FFTW3 plan.\n"); } else { /* do nothing, the plan has already been created by previous call to this function */ } #endif /* For FFTW3, there is a different convention for fftsize for real-to-complex transforms, i.e. fftsize is the size of the complex data (number of complex values), NOT the size of the real data! We attempt to take care of this by defining fftsize to be fftsize_real when using FFTW3. */ for(i = 0; i < dimx * dimy * dimz; i++) workspace[i] = 0; for(i = 0; i < NumPart; i++) { slab_x = to_slab_fac * P[i].Pos[0]; if(slab_x >= PMGRID) slab_x = PMGRID - 1; dx = to_slab_fac * P[i].Pos[0] - slab_x; slab_x -= meshmin[0]; slab_xx = slab_x + 1; slab_y = to_slab_fac * P[i].Pos[1]; if(slab_y >= PMGRID) slab_y = PMGRID - 1; dy = to_slab_fac * P[i].Pos[1] - slab_y; slab_y -= meshmin[1]; slab_yy = slab_y + 1; slab_z = to_slab_fac * P[i].Pos[2]; if(slab_z >= PMGRID) slab_z = PMGRID - 1; dz = to_slab_fac * P[i].Pos[2] - slab_z; slab_z -= meshmin[2]; slab_zz = slab_z + 1; workspace[(slab_x * dimy + slab_y) * dimz + slab_z] += P[i].Mass * (1.0 - dx) * (1.0 - dy) * (1.0 - dz); workspace[(slab_x * dimy + slab_yy) * dimz + slab_z] += P[i].Mass * (1.0 - dx) * dy * (1.0 - dz); workspace[(slab_x * dimy + slab_y) * dimz + slab_zz] += P[i].Mass * (1.0 - dx) * (1.0 - dy) * dz; workspace[(slab_x * dimy + slab_yy) * dimz + slab_zz] += P[i].Mass * (1.0 - dx) * dy * dz; workspace[(slab_xx * dimy + slab_y) * dimz + slab_z] += P[i].Mass * (dx) * (1.0 - dy) * (1.0 - dz); workspace[(slab_xx * dimy + slab_yy) * dimz + slab_z] += P[i].Mass * (dx) * dy * (1.0 - dz); workspace[(slab_xx * dimy + slab_y) * dimz + slab_zz] += P[i].Mass * (dx) * (1.0 - dy) * dz; workspace[(slab_xx * dimy + slab_yy) * dimz + slab_zz] += P[i].Mass * (dx) * dy * dz; } for(i = 0; i < fftsize; i++) /* clear local density field */ rhogrid[i] = 0; for(level = 0; level < (1 << PTask); level++) /* note: for level=0, target is the same task */ { sendTask = ThisTask; recvTask = ThisTask ^ level; if(recvTask < NTask) { /* check how much we have to send */ sendmin = 2 * PMGRID; sendmax = -1; for(slab_x = meshmin[0]; slab_x < meshmax[0] + 2; slab_x++) if(slab_to_task[slab_x % PMGRID] == recvTask) { if(slab_x < sendmin) sendmin = slab_x; if(slab_x > sendmax) sendmax = slab_x; } if(sendmax == -1) sendmin = 0; /* check how much we have to receive */ recvmin = 2 * PMGRID; recvmax = -1; for(slab_x = meshmin_list[3 * recvTask]; slab_x < meshmax_list[3 * recvTask] + 2; slab_x++) if(slab_to_task[slab_x % PMGRID] == sendTask) { if(slab_x < recvmin) recvmin = slab_x; if(slab_x > recvmax) recvmax = slab_x; } if(recvmax == -1) recvmin = 0; if((recvmax - recvmin) >= 0 || (sendmax - sendmin) >= 0) /* ok, we have a contribution to the slab */ { recv_dimx = meshmax_list[3 * recvTask + 0] - meshmin_list[3 * recvTask + 0] + 2; recv_dimy = meshmax_list[3 * recvTask + 1] - meshmin_list[3 * recvTask + 1] + 2; recv_dimz = meshmax_list[3 * recvTask + 2] - meshmin_list[3 * recvTask + 2] + 2; if(level > 0) { MPI_Sendrecv(workspace + (sendmin - meshmin[0]) * dimy * dimz, (sendmax - sendmin + 1) * dimy * dimz * sizeof(fftw_real), MPI_BYTE, recvTask, TAG_PERIODIC_A, forcegrid, (recvmax - recvmin + 1) * recv_dimy * recv_dimz * sizeof(fftw_real), MPI_BYTE, recvTask, TAG_PERIODIC_A, MPI_COMM_WORLD, &status); } else { memcpy(forcegrid, workspace + (sendmin - meshmin[0]) * dimy * dimz, (sendmax - sendmin + 1) * dimy * dimz * sizeof(fftw_real)); } for(slab_x = recvmin; slab_x <= recvmax; slab_x++) { slab_xx = (slab_x % PMGRID) - first_slab_of_task[ThisTask]; if(slab_xx >= 0 && slab_xx < slabs_per_task[ThisTask]) { for(slab_y = meshmin_list[3 * recvTask + 1]; slab_y <= meshmax_list[3 * recvTask + 1] + 1; slab_y++) { slab_yy = slab_y; if(slab_yy >= PMGRID) slab_yy -= PMGRID; for(slab_z = meshmin_list[3 * recvTask + 2]; slab_z <= meshmax_list[3 * recvTask + 2] + 1; slab_z++) { slab_zz = slab_z; if(slab_zz >= PMGRID) slab_zz -= PMGRID; rhogrid[PMGRID * PMGRID2 * slab_xx + PMGRID2 * slab_yy + slab_zz] += forcegrid[((slab_x - recvmin) * recv_dimy + (slab_y - meshmin_list[3 * recvTask + 1])) * recv_dimz + (slab_z - meshmin_list[3 * recvTask + 2])]; } } } } } } } #ifdef DEBUG_FFT double norm_density = 0.; for(i = 0; i < fftsize; i++) { norm_density += rhogrid[i]*rhogrid[i]; } /* Write out rhogrid to a 'fft-snapshot' file */ if (ThisTask == 0) { FILE *fp; /* Print the norm of the fft */ printf("L2-norm of density: %f\n", norm_density); printf("First five values of density: %f, %f, %f, %f, %f\n", rhogrid[0],rhogrid[1],rhogrid[2],rhogrid[3], rhogrid[4]); } #endif /* Do the FFT of the density field */ #ifdef FFTW3 fftw_execute_dft_r2c(fft_forward_plan, rhogrid, fft_of_rhogrid); #else rfftwnd_mpi(fft_forward_plan, 1, rhogrid, workspace, FFTW_TRANSPOSED_ORDER); #endif #ifdef DEBUG_FFT double norm_complex = 0.; for(i = 0; i < fftsize; i++) { norm_complex += rhogrid[i]*rhogrid[i]; } /* Write out rhogrid to a 'fft-snapshot' file */ if (ThisTask == 0) { FILE *fp; /* Print the norm of the fft */ printf("L2-norm of complex rhogrid: %f\n", norm_complex); printf("First two values of complex fft: %f + i*%f, %f + i*%f\n", rhogrid[0],rhogrid[1],rhogrid[2],rhogrid[3]); } #endif /* multiply with Green's function for the potential */ for(y = slabstart_y; y < slabstart_y + nslab_y; y++) for(x = 0; x < PMGRID; x++) for(z = 0; z < PMGRID / 2 + 1; z++) { if(x > PMGRID / 2) kx = x - PMGRID; else kx = x; if(y > PMGRID / 2) ky = y - PMGRID; else ky = y; if(z > PMGRID / 2) kz = z - PMGRID; else kz = z; k2 = kx * kx + ky * ky + kz * kz; if(k2 > 0) { smth = -exp(-k2 * asmth2) / k2; /* do deconvolution */ fx = fy = fz = 1; if(kx != 0) { fx = (M_PI * kx) / PMGRID; fx = sin(fx) / fx; } if(ky != 0) { fy = (M_PI * ky) / PMGRID; fy = sin(fy) / fy; } if(kz != 0) { fz = (M_PI * kz) / PMGRID; fz = sin(fz) / fz; } ff = 1 / (fx * fy * fz); smth *= ff * ff * ff * ff; /* end deconvolution */ ip = PMGRID * (PMGRID / 2 + 1) * (y - slabstart_y) + (PMGRID / 2 + 1) * x + z; c_re(fft_of_rhogrid[ip]) *= smth; c_im(fft_of_rhogrid[ip]) *= smth; } } if(slabstart_y == 0) c_re(fft_of_rhogrid[0]) = c_im(fft_of_rhogrid[0]) = 0.0; /* Do the FFT to get the potential */ #ifdef FFTW3 fftw_execute_dft_c2r(fft_inverse_plan, fft_of_rhogrid, rhogrid); /* Now normalize the output for(i = 0; i < fftsize; i++) rhogrid[i] = rhogrid[i] / (PMGRID*PMGRID*PMGRID); */ #else rfftwnd_mpi(fft_inverse_plan, 1, rhogrid, workspace, FFTW_TRANSPOSED_ORDER); #endif #ifdef DEBUG_FFT double norm = 0.; for(i = 0; i < fftsize; i++) { norm += rhogrid[i]*rhogrid[i]; } /* Write out rhogrid to a 'fft-snapshot' file */ if (ThisTask == 0) { FILE *fp; /* Print the norm of the fft */ printf("L2-norm of rhogrid: %f\n", norm); printf("First five values of fft: %f, %f, %f, %f, %f\n\n", rhogrid[0],rhogrid[1],rhogrid[2],rhogrid[3],rhogrid[4]); /* fp = fopen("rhogrid.0", "wb"); /* add suffix to indicate which node this is */ /* fwrite(rhogrid, sizeof(rhogrid[0]), fftsize, fp);*/ /* fclose(fp); */ } #endif /* Now rhogrid holds the potential */ /* construct the potential for the local patch */ dimx = meshmax[0] - meshmin[0] + 6; dimy = meshmax[1] - meshmin[1] + 6; dimz = meshmax[2] - meshmin[2] + 6; for(level = 0; level < (1 << PTask); level++) /* note: for level=0, target is the same task */ { sendTask = ThisTask; recvTask = ThisTask ^ level; if(recvTask < NTask) { /* check how much we have to send */ sendmin = 2 * PMGRID; sendmax = -PMGRID; for(slab_x = meshmin_list[3 * recvTask] - 2; slab_x < meshmax_list[3 * recvTask] + 4; slab_x++) if(slab_to_task[(slab_x + PMGRID) % PMGRID] == sendTask) { if(slab_x < sendmin) sendmin = slab_x; if(slab_x > sendmax) sendmax = slab_x; } if(sendmax == -PMGRID) sendmin = sendmax + 1; /* check how much we have to receive */ recvmin = 2 * PMGRID; recvmax = -PMGRID; for(slab_x = meshmin[0] - 2; slab_x < meshmax[0] + 4; slab_x++) if(slab_to_task[(slab_x + PMGRID) % PMGRID] == recvTask) { if(slab_x < recvmin) recvmin = slab_x; if(slab_x > recvmax) recvmax = slab_x; } if(recvmax == -PMGRID) recvmin = recvmax + 1; if((recvmax - recvmin) >= 0 || (sendmax - sendmin) >= 0) /* ok, we have a contribution to the slab */ { recv_dimx = meshmax_list[3 * recvTask + 0] - meshmin_list[3 * recvTask + 0] + 6; recv_dimy = meshmax_list[3 * recvTask + 1] - meshmin_list[3 * recvTask + 1] + 6; recv_dimz = meshmax_list[3 * recvTask + 2] - meshmin_list[3 * recvTask + 2] + 6; ncont = 1; cont_sendmin[0] = sendmin; cont_sendmax[0] = sendmax; cont_sendmin[1] = sendmax + 1; cont_sendmax[1] = sendmax; cont_recvmin[0] = recvmin; cont_recvmax[0] = recvmax; cont_recvmin[1] = recvmax + 1; cont_recvmax[1] = recvmax; for(slab_x = sendmin; slab_x <= sendmax; slab_x++) { if(slab_to_task[(slab_x + PMGRID) % PMGRID] != ThisTask) { /* non-contiguous */ cont_sendmax[0] = slab_x - 1; while(slab_to_task[(slab_x + PMGRID) % PMGRID] != ThisTask) slab_x++; cont_sendmin[1] = slab_x; ncont++; } } for(slab_x = recvmin; slab_x <= recvmax; slab_x++) { if(slab_to_task[(slab_x + PMGRID) % PMGRID] != recvTask) { /* non-contiguous */ cont_recvmax[0] = slab_x - 1; while(slab_to_task[(slab_x + PMGRID) % PMGRID] != recvTask) slab_x++; cont_recvmin[1] = slab_x; if(ncont == 1) ncont++; } } for(rep = 0; rep < ncont; rep++) { sendmin = cont_sendmin[rep]; sendmax = cont_sendmax[rep]; recvmin = cont_recvmin[rep]; recvmax = cont_recvmax[rep]; /* prepare what we want to send */ if(sendmax - sendmin >= 0) { for(slab_x = sendmin; slab_x <= sendmax; slab_x++) { slab_xx = ((slab_x + PMGRID) % PMGRID) - first_slab_of_task[ThisTask]; for(slab_y = meshmin_list[3 * recvTask + 1] - 2; slab_y < meshmax_list[3 * recvTask + 1] + 4; slab_y++) { slab_yy = (slab_y + PMGRID) % PMGRID; for(slab_z = meshmin_list[3 * recvTask + 2] - 2; slab_z < meshmax_list[3 * recvTask + 2] + 4; slab_z++) { slab_zz = (slab_z + PMGRID) % PMGRID; forcegrid[((slab_x - sendmin) * recv_dimy + (slab_y - (meshmin_list[3 * recvTask + 1] - 2))) * recv_dimz + slab_z - (meshmin_list[3 * recvTask + 2] - 2)] = rhogrid[PMGRID * PMGRID2 * slab_xx + PMGRID2 * slab_yy + slab_zz]; } } } } if(level > 0) { MPI_Sendrecv(forcegrid, (sendmax - sendmin + 1) * recv_dimy * recv_dimz * sizeof(fftw_real), MPI_BYTE, recvTask, TAG_PERIODIC_B, workspace + (recvmin - (meshmin[0] - 2)) * dimy * dimz, (recvmax - recvmin + 1) * dimy * dimz * sizeof(fftw_real), MPI_BYTE, recvTask, TAG_PERIODIC_B, MPI_COMM_WORLD, &status); } else { memcpy(workspace + (recvmin - (meshmin[0] - 2)) * dimy * dimz, forcegrid, (recvmax - recvmin + 1) * dimy * dimz * sizeof(fftw_real)); } } } } } dimx = meshmax[0] - meshmin[0] + 2; dimy = meshmax[1] - meshmin[1] + 2; dimz = meshmax[2] - meshmin[2] + 2; recv_dimx = meshmax[0] - meshmin[0] + 6; recv_dimy = meshmax[1] - meshmin[1] + 6; recv_dimz = meshmax[2] - meshmin[2] + 6; for(dim = 0; dim < 3; dim++) /* Calculate each component of the force. */ { /* get the force component by finite differencing the potential */ /* note: "workspace" now contains the potential for the local patch, plus a suffiently large buffer region */ for(x = 0; x < meshmax[0] - meshmin[0] + 2; x++) for(y = 0; y < meshmax[1] - meshmin[1] + 2; y++) for(z = 0; z < meshmax[2] - meshmin[2] + 2; z++) { xrr = xll = xr = xl = x; yrr = yll = yr = yl = y; zrr = zll = zr = zl = z; switch (dim) { case 0: xr = x + 1; xrr = x + 2; xl = x - 1; xll = x - 2; break; case 1: yr = y + 1; yl = y - 1; yrr = y + 2; yll = y - 2; break; case 2: zr = z + 1; zl = z - 1; zrr = z + 2; zll = z - 2; break; } forcegrid[(x * dimy + y) * dimz + z] = fac * ((4.0 / 3) * (workspace[((xl + 2) * recv_dimy + (yl + 2)) * recv_dimz + (zl + 2)] - workspace[((xr + 2) * recv_dimy + (yr + 2)) * recv_dimz + (zr + 2)]) - (1.0 / 6) * (workspace[((xll + 2) * recv_dimy + (yll + 2)) * recv_dimz + (zll + 2)] - workspace[((xrr + 2) * recv_dimy + (yrr + 2)) * recv_dimz + (zrr + 2)])); } /* read out the forces */ for(i = 0; i < NumPart; i++) { slab_x = to_slab_fac * P[i].Pos[0]; if(slab_x >= PMGRID) slab_x = PMGRID - 1; dx = to_slab_fac * P[i].Pos[0] - slab_x; slab_x -= meshmin[0]; slab_xx = slab_x + 1; slab_y = to_slab_fac * P[i].Pos[1]; if(slab_y >= PMGRID) slab_y = PMGRID - 1; dy = to_slab_fac * P[i].Pos[1] - slab_y; slab_y -= meshmin[1]; slab_yy = slab_y + 1; slab_z = to_slab_fac * P[i].Pos[2]; if(slab_z >= PMGRID) slab_z = PMGRID - 1; dz = to_slab_fac * P[i].Pos[2] - slab_z; slab_z -= meshmin[2]; slab_zz = slab_z + 1; acc_dim = forcegrid[(slab_x * dimy + slab_y) * dimz + slab_z] * (1.0 - dx) * (1.0 - dy) * (1.0 - dz); acc_dim += forcegrid[(slab_x * dimy + slab_yy) * dimz + slab_z] * (1.0 - dx) * dy * (1.0 - dz); acc_dim += forcegrid[(slab_x * dimy + slab_y) * dimz + slab_zz] * (1.0 - dx) * (1.0 - dy) * dz; acc_dim += forcegrid[(slab_x * dimy + slab_yy) * dimz + slab_zz] * (1.0 - dx) * dy * dz; acc_dim += forcegrid[(slab_xx * dimy + slab_y) * dimz + slab_z] * (dx) * (1.0 - dy) * (1.0 - dz); acc_dim += forcegrid[(slab_xx * dimy + slab_yy) * dimz + slab_z] * (dx) * dy * (1.0 - dz); acc_dim += forcegrid[(slab_xx * dimy + slab_y) * dimz + slab_zz] * (dx) * (1.0 - dy) * dz; acc_dim += forcegrid[(slab_xx * dimy + slab_yy) * dimz + slab_zz] * (dx) * dy * dz; P[i].GravPM[dim] = acc_dim; } } pm_init_periodic_free(); force_treeallocate(All.TreeAllocFactor * All.MaxPart, All.MaxPart); All.NumForcesSinceLastDomainDecomp = 1 + All.TotNumPart * All.TreeDomainUpdateFrequency; if(ThisTask == 0) { printf("done PM.\n"); fflush(stdout); } #ifdef FFTW3 if(fftw_plan_exists) { /* macro defined in callgrind.h */ // CALLGRIND_STOP_INSTRUMENTATION; } #else // CALLGRIND_STOP_INSTRUMENTATION; #endif }