void PPPMTIP4POMP::fieldforce() { // loop over my charges, interpolate electric field from nearby grid points // (nx,ny,nz) = global coords of grid pt to "lower left" of charge // (dx,dy,dz) = distance to "lower left" grid pt // (mx,my,mz) = global coords of moving stencil pt // ek = 3 components of E-field on particle const double * const q = atom->q; const double * const * const x = atom->x; const int * const type = atom->type; const int nthreads = comm->nthreads; const int nlocal = atom->nlocal; const double qqrd2e = force->qqrd2e; #if defined(_OPENMP) #pragma omp parallel default(none) #endif { #if defined(_OPENMP) // each thread works on a fixed chunk of atoms. const int tid = omp_get_thread_num(); const int inum = nlocal; const int idelta = 1 + inum/nthreads; const int ifrom = tid*idelta; const int ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta; #else const int ifrom = 0; const int ito = nlocal; const int tid = 0; #endif ThrData *thr = fix->get_thr(tid); double * const * const f = thr->get_f(); FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d()); int l,m,n,nx,ny,nz,mx,my,mz; FFT_SCALAR dx,dy,dz,x0,y0,z0; FFT_SCALAR ekx,eky,ekz; int iH1,iH2; double xM[3], fx,fy,fz; double ddotf, rOMx, rOMy, rOMz, f1x, f1y, f1z; // this if protects against having more threads than local atoms if (ifrom < nlocal) { for (int i = ifrom; i < ito; i++) { if (type[i] == typeO) { find_M(i,iH1,iH2,xM); } else { xM[0] = x[i][0]; xM[1] = x[i][1]; xM[2] = x[i][2]; } nx = part2grid[i][0]; ny = part2grid[i][1]; nz = part2grid[i][2]; dx = nx+shiftone - (xM[0]-boxlo[0])*delxinv; dy = ny+shiftone - (xM[1]-boxlo[1])*delyinv; dz = nz+shiftone - (xM[2]-boxlo[2])*delzinv; compute_rho1d_thr(r1d,dx,dy,dz); ekx = eky = ekz = ZEROF; for (n = nlower; n <= nupper; n++) { mz = n+nz; z0 = r1d[2][n]; for (m = nlower; m <= nupper; m++) { my = m+ny; y0 = z0*r1d[1][m]; for (l = nlower; l <= nupper; l++) { mx = l+nx; x0 = y0*r1d[0][l]; ekx -= x0*vdx_brick[mz][my][mx]; eky -= x0*vdy_brick[mz][my][mx]; ekz -= x0*vdz_brick[mz][my][mx]; } } } // convert E-field to force const double qfactor = qqrd2e*scale*q[i]; if (type[i] != typeO) { f[i][0] += qfactor*ekx; f[i][1] += qfactor*eky; f[i][2] += qfactor*ekz; } else { fx = qfactor * ekx; fy = qfactor * eky; fz = qfactor * ekz; find_M(i,iH1,iH2,xM); rOMx = xM[0] - x[i][0]; rOMy = xM[1] - x[i][1]; rOMz = xM[2] - x[i][2]; ddotf = (rOMx * fx + rOMy * fy + rOMz * fz) / (qdist * qdist); f1x = ddotf * rOMx; f1y = ddotf * rOMy; f1z = ddotf * rOMz; f[i][0] += fx - alpha * (fx - f1x); f[i][1] += fy - alpha * (fy - f1y); f[i][2] += fz - alpha * (fz - f1z); f[iH1][0] += 0.5*alpha*(fx - f1x); f[iH1][1] += 0.5*alpha*(fy - f1y); f[iH1][2] += 0.5*alpha*(fz - f1z); f[iH2][0] += 0.5*alpha*(fx - f1x); f[iH2][1] += 0.5*alpha*(fy - f1y); f[iH2][2] += 0.5*alpha*(fz - f1z); } } } } }
void PPPMTIP4POMP::make_rho() { const double * const q = atom->q; const double * const * const x = atom->x; const int * const type = atom->type; const int nthreads = comm->nthreads; const int nlocal = atom->nlocal; #if defined(_OPENMP) #pragma omp parallel default(none) #endif { #if defined(_OPENMP) // each thread works on a fixed chunk of atoms. const int tid = omp_get_thread_num(); const int inum = nlocal; const int idelta = 1 + inum/nthreads; const int ifrom = tid*idelta; const int ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta; #else const int tid = 0; const int ifrom = 0; const int ito = nlocal; #endif int l,m,n,nx,ny,nz,mx,my,mz,iH1,iH2; FFT_SCALAR dx,dy,dz,x0,y0,z0; double xM[3]; // set up clear 3d density array const int nzoffs = (nzhi_out-nzlo_out+1)*tid; FFT_SCALAR * const * const * const db = &(density_brick[nzoffs]); memset(&(db[nzlo_out][nylo_out][nxlo_out]),0,ngrid*sizeof(FFT_SCALAR)); ThrData *thr = fix->get_thr(tid); FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d()); // loop over my charges, add their contribution to nearby grid points // (nx,ny,nz) = global coords of grid pt to "lower left" of charge // (dx,dy,dz) = distance to "lower left" grid pt // (mx,my,mz) = global coords of moving stencil pt // this if protects against having more threads than local atoms if (ifrom < nlocal) { for (int i = ifrom; i < ito; i++) { if (type[i] == typeO) { find_M(i,iH1,iH2,xM); } else { xM[0] = x[i][0]; xM[1] = x[i][1]; xM[2] = x[i][2]; } nx = part2grid[i][0]; ny = part2grid[i][1]; nz = part2grid[i][2]; dx = nx+shiftone - (xM[0]-boxlo[0])*delxinv; dy = ny+shiftone - (xM[1]-boxlo[1])*delyinv; dz = nz+shiftone - (xM[2]-boxlo[2])*delzinv; compute_rho1d_thr(r1d,dx,dy,dz); z0 = delvolinv * q[i]; for (n = nlower; n <= nupper; n++) { mz = n+nz; y0 = z0*r1d[2][n]; for (m = nlower; m <= nupper; m++) { my = m+ny; x0 = y0*r1d[1][m]; for (l = nlower; l <= nupper; l++) { mx = l+nx; db[mz][my][mx] += x0*r1d[0][l]; } } } } } #if defined(_OPENMP) // reduce 3d density array if (nthreads > 1) { data_reduce_fft(&(density_brick[nzlo_out][nylo_out][nxlo_out]),ngrid,nthreads,1,tid); } #endif } }