void nb_kernel_allvsall(t_forcerec * fr, t_mdatoms * mdatoms, t_blocka * excl, real * x, real * f, real * Vc, real * Vvdw, int * outeriter, int * inneriter, void * work) { gmx_allvsall_data_t *aadata; int natoms; int ni0,ni1; int nj0,nj1,nj2; int i,j,k; real * charge; int * type; real facel; real * pvdw; int ggid; int * mask; real ix,iy,iz,iq; real fix,fiy,fiz; real jx,jy,jz,qq; real dx,dy,dz; real tx,ty,tz; real rsq,rinv,rinvsq,rinvsix; real vcoul,vctot; real c6,c12,Vvdw6,Vvdw12,Vvdwtot; real fscal; charge = mdatoms->chargeA; type = mdatoms->typeA; facel = fr->epsfac; natoms = mdatoms->nr; ni0 = mdatoms->start; ni1 = mdatoms->start+mdatoms->homenr; aadata = *((gmx_allvsall_data_t **)work); if(aadata==NULL) { setup_aadata(&aadata,excl,natoms,type,fr->ntype,fr->nbfp); *((gmx_allvsall_data_t **)work) = aadata; } for(i=ni0; i<ni1; i++) { /* We assume shifts are NOT used for all-vs-all interactions */ /* Load i atom data */ ix = x[3*i]; iy = x[3*i+1]; iz = x[3*i+2]; iq = facel*charge[i]; pvdw = aadata->pvdwparam[type[i]]; /* Zero the potential energy for this list */ Vvdwtot = 0.0; vctot = 0.0; /* Clear i atom forces */ fix = 0.0; fiy = 0.0; fiz = 0.0; /* Load limits for loop over neighbors */ nj0 = aadata->jindex[3*i]; nj1 = aadata->jindex[3*i+1]; nj2 = aadata->jindex[3*i+2]; mask = aadata->exclusion_mask[i]; /* Prologue part, including exclusion mask */ for(j=nj0; j<nj1; j++,mask++) { if(*mask!=0) { k = j%natoms; /* load j atom coordinates */ jx = x[3*k]; jy = x[3*k+1]; jz = x[3*k+2]; /* Calculate distance */ dx = ix - jx; dy = iy - jy; dz = iz - jz; rsq = dx*dx+dy*dy+dz*dz; /* Calculate 1/r and 1/r2 */ rinv = gmx_invsqrt(rsq); rinvsq = rinv*rinv; /* Load parameters for j atom */ qq = iq*charge[k]; c6 = pvdw[2*k]; c12 = pvdw[2*k+1]; /* Coulomb interaction */ vcoul = qq*rinv; vctot = vctot+vcoul; /* Lennard-Jones interaction */ rinvsix = rinvsq*rinvsq*rinvsq; Vvdw6 = c6*rinvsix; Vvdw12 = c12*rinvsix*rinvsix; Vvdwtot = Vvdwtot+Vvdw12-Vvdw6; fscal = (vcoul+12.0*Vvdw12-6.0*Vvdw6)*rinvsq; /* Calculate temporary vectorial force */ tx = fscal*dx; ty = fscal*dy; tz = fscal*dz; /* Increment i atom force */ fix = fix + tx; fiy = fiy + ty; fiz = fiz + tz; /* Decrement j atom force */ f[3*k] = f[3*k] - tx; f[3*k+1] = f[3*k+1] - ty; f[3*k+2] = f[3*k+2] - tz; } /* Inner loop uses 38 flops/iteration */ } /* Main part, no exclusions */ for(j=nj1; j<nj2; j++) { k = j%natoms; /* load j atom coordinates */ jx = x[3*k]; jy = x[3*k+1]; jz = x[3*k+2]; /* Calculate distance */ dx = ix - jx; dy = iy - jy; dz = iz - jz; rsq = dx*dx+dy*dy+dz*dz; /* Calculate 1/r and 1/r2 */ rinv = gmx_invsqrt(rsq); rinvsq = rinv*rinv; /* Load parameters for j atom */ qq = iq*charge[k]; c6 = pvdw[2*k]; c12 = pvdw[2*k+1]; /* Coulomb interaction */ vcoul = qq*rinv; vctot = vctot+vcoul; /* Lennard-Jones interaction */ rinvsix = rinvsq*rinvsq*rinvsq; Vvdw6 = c6*rinvsix; Vvdw12 = c12*rinvsix*rinvsix; Vvdwtot = Vvdwtot+Vvdw12-Vvdw6; fscal = (vcoul+12.0*Vvdw12-6.0*Vvdw6)*rinvsq; /* Calculate temporary vectorial force */ tx = fscal*dx; ty = fscal*dy; tz = fscal*dz; /* Increment i atom force */ fix = fix + tx; fiy = fiy + ty; fiz = fiz + tz; /* Decrement j atom force */ f[3*k] = f[3*k] - tx; f[3*k+1] = f[3*k+1] - ty; f[3*k+2] = f[3*k+2] - tz; /* Inner loop uses 38 flops/iteration */ } f[3*i] += fix; f[3*i+1] += fiy; f[3*i+2] += fiz; /* Add potential energies to the group for this list */ ggid = 0; Vc[ggid] = Vc[ggid] + vctot; Vvdw[ggid] = Vvdw[ggid] + Vvdwtot; /* Outer loop uses 6 flops/iteration */ } /* Write outer/inner iteration count to pointers */ *outeriter = ni1-ni0; *inneriter = (ni1-ni0)*natoms/2; }
void nb_kernel_allvsallgb(t_forcerec * fr, t_mdatoms * mdatoms, t_blocka * excl, real * x, real * f, real * Vc, real * Vvdw, real * vpol, int * outeriter, int * inneriter, void * work) { gmx_allvsall_data_t *aadata; int natoms; int ni0,ni1; int nj0,nj1,nj2; int i,j,k; real * charge; int * type; real facel; real * pvdw; int ggid; int * mask; real * GBtab; real gbfactor; real * invsqrta; real * dvda; real vgbtot,dvdasum; int nnn,n0; real ix,iy,iz,iq; real fix,fiy,fiz; real jx,jy,jz,qq; real dx,dy,dz; real tx,ty,tz; real rsq,rinv,rinvsq,rinvsix; real vcoul,vctot; real c6,c12,Vvdw6,Vvdw12,Vvdwtot; real fscal,dvdatmp,fijC,vgb; real Y,F,Fp,Geps,Heps2,VV,FF,eps,eps2,r,rt; real dvdaj,gbscale,isaprod,isai,isaj,gbtabscale; charge = mdatoms->chargeA; type = mdatoms->typeA; gbfactor = ((1.0/fr->epsilon_r) - (1.0/fr->gb_epsilon_solvent)); facel = fr->epsfac; GBtab = fr->gbtab.tab; gbtabscale = fr->gbtab.scale; invsqrta = fr->invsqrta; dvda = fr->dvda; natoms = mdatoms->nr; ni0 = mdatoms->start; ni1 = mdatoms->start+mdatoms->homenr; aadata = *((gmx_allvsall_data_t **)work); if(aadata==NULL) { setup_aadata(&aadata,excl,natoms,type,fr->ntype,fr->nbfp); *((gmx_allvsall_data_t **)work) = aadata; } for(i=ni0; i<ni1; i++) { /* We assume shifts are NOT used for all-vs-all interactions */ /* Load i atom data */ ix = x[3*i]; iy = x[3*i+1]; iz = x[3*i+2]; iq = facel*charge[i]; isai = invsqrta[i]; pvdw = aadata->pvdwparam[type[i]]; /* Zero the potential energy for this list */ Vvdwtot = 0.0; vctot = 0.0; vgbtot = 0.0; dvdasum = 0.0; /* Clear i atom forces */ fix = 0.0; fiy = 0.0; fiz = 0.0; /* Load limits for loop over neighbors */ nj0 = aadata->jindex[3*i]; nj1 = aadata->jindex[3*i+1]; nj2 = aadata->jindex[3*i+2]; mask = aadata->exclusion_mask[i]; /* Prologue part, including exclusion mask */ for(j=nj0; j<nj1; j++,mask++) { if(*mask!=0) { k = j%natoms; /* load j atom coordinates */ jx = x[3*k]; jy = x[3*k+1]; jz = x[3*k+2]; /* Calculate distance */ dx = ix - jx; dy = iy - jy; dz = iz - jz; rsq = dx*dx+dy*dy+dz*dz; /* Calculate 1/r and 1/r2 */ rinv = gmx_invsqrt(rsq); /* Load parameters for j atom */ isaj = invsqrta[k]; isaprod = isai*isaj; qq = iq*charge[k]; vcoul = qq*rinv; fscal = vcoul*rinv; qq = isaprod*(-qq)*gbfactor; gbscale = isaprod*gbtabscale; c6 = pvdw[2*k]; c12 = pvdw[2*k+1]; rinvsq = rinv*rinv; /* Tabulated Generalized-Born interaction */ dvdaj = dvda[k]; r = rsq*rinv; /* Calculate table index */ rt = r*gbscale; n0 = rt; eps = rt-n0; eps2 = eps*eps; nnn = 4*n0; Y = GBtab[nnn]; F = GBtab[nnn+1]; Geps = eps*GBtab[nnn+2]; Heps2 = eps2*GBtab[nnn+3]; Fp = F+Geps+Heps2; VV = Y+eps*Fp; FF = Fp+Geps+2.0*Heps2; vgb = qq*VV; fijC = qq*FF*gbscale; dvdatmp = -0.5*(vgb+fijC*r); dvdasum = dvdasum + dvdatmp; dvda[k] = dvdaj+dvdatmp*isaj*isaj; vctot = vctot + vcoul; vgbtot = vgbtot + vgb; /* Lennard-Jones interaction */ rinvsix = rinvsq*rinvsq*rinvsq; Vvdw6 = c6*rinvsix; Vvdw12 = c12*rinvsix*rinvsix; Vvdwtot = Vvdwtot+Vvdw12-Vvdw6; fscal = (12.0*Vvdw12-6.0*Vvdw6)*rinvsq-(fijC-fscal)*rinv; /* Calculate temporary vectorial force */ tx = fscal*dx; ty = fscal*dy; tz = fscal*dz; /* Increment i atom force */ fix = fix + tx; fiy = fiy + ty; fiz = fiz + tz; /* Decrement j atom force */ f[3*k] = f[3*k] - tx; f[3*k+1] = f[3*k+1] - ty; f[3*k+2] = f[3*k+2] - tz; } /* Inner loop uses 38 flops/iteration */ } /* Main part, no exclusions */ for(j=nj1; j<nj2; j++) { k = j%natoms; /* load j atom coordinates */ jx = x[3*k]; jy = x[3*k+1]; jz = x[3*k+2]; /* Calculate distance */ dx = ix - jx; dy = iy - jy; dz = iz - jz; rsq = dx*dx+dy*dy+dz*dz; /* Calculate 1/r and 1/r2 */ rinv = gmx_invsqrt(rsq); /* Load parameters for j atom */ isaj = invsqrta[k]; isaprod = isai*isaj; qq = iq*charge[k]; vcoul = qq*rinv; fscal = vcoul*rinv; qq = isaprod*(-qq)*gbfactor; gbscale = isaprod*gbtabscale; c6 = pvdw[2*k]; c12 = pvdw[2*k+1]; rinvsq = rinv*rinv; /* Tabulated Generalized-Born interaction */ dvdaj = dvda[k]; r = rsq*rinv; /* Calculate table index */ rt = r*gbscale; n0 = rt; eps = rt-n0; eps2 = eps*eps; nnn = 4*n0; Y = GBtab[nnn]; F = GBtab[nnn+1]; Geps = eps*GBtab[nnn+2]; Heps2 = eps2*GBtab[nnn+3]; Fp = F+Geps+Heps2; VV = Y+eps*Fp; FF = Fp+Geps+2.0*Heps2; vgb = qq*VV; fijC = qq*FF*gbscale; dvdatmp = -0.5*(vgb+fijC*r); dvdasum = dvdasum + dvdatmp; dvda[k] = dvdaj+dvdatmp*isaj*isaj; vctot = vctot + vcoul; vgbtot = vgbtot + vgb; /* Lennard-Jones interaction */ rinvsix = rinvsq*rinvsq*rinvsq; Vvdw6 = c6*rinvsix; Vvdw12 = c12*rinvsix*rinvsix; Vvdwtot = Vvdwtot+Vvdw12-Vvdw6; fscal = (12.0*Vvdw12-6.0*Vvdw6)*rinvsq-(fijC-fscal)*rinv; /* Calculate temporary vectorial force */ tx = fscal*dx; ty = fscal*dy; tz = fscal*dz; /* Increment i atom force */ fix = fix + tx; fiy = fiy + ty; fiz = fiz + tz; /* Decrement j atom force */ f[3*k] = f[3*k] - tx; f[3*k+1] = f[3*k+1] - ty; f[3*k+2] = f[3*k+2] - tz; /* Inner loop uses 38 flops/iteration */ } f[3*i] += fix; f[3*i+1] += fiy; f[3*i+2] += fiz; /* Add potential energies to the group for this list */ ggid = 0; Vc[ggid] = Vc[ggid] + vctot; Vvdw[ggid] = Vvdw[ggid] + Vvdwtot; vpol[ggid] = vpol[ggid] + vgbtot; dvda[i] = dvda[i] + dvdasum*isai*isai; /* Outer loop uses 6 flops/iteration */ } /* Write outer/inner iteration count to pointers */ *outeriter = ni1-ni0; *inneriter = (ni1-ni0)*natoms/2; }
void nb_kernel_allvsall(t_nblist gmx_unused * nlist, rvec * xx, rvec * ff, struct t_forcerec * fr, t_mdatoms * mdatoms, nb_kernel_data_t * kernel_data, t_nrnb * nrnb) { gmx_allvsall_data_t *aadata; int natoms; int ni0, ni1; int nj0, nj1, nj2; int i, j, k; real * charge; int * type; real facel; real * pvdw; int ggid; int * mask; real ix, iy, iz, iq; real fix, fiy, fiz; real jx, jy, jz, qq; real dx, dy, dz; real tx, ty, tz; real rsq, rinv, rinvsq, rinvsix; real vcoul, vctot; real c6, c12, Vvdw6, Vvdw12, Vvdwtot; real fscal; const t_blocka *excl; real * f; real * x; real * Vvdw; real * Vc; x = xx[0]; f = ff[0]; charge = mdatoms->chargeA; type = mdatoms->typeA; facel = fr->ic->epsfac; natoms = mdatoms->nr; ni0 = 0; ni1 = mdatoms->homenr; aadata = reinterpret_cast<gmx_allvsall_data_t *>(fr->AllvsAll_work); excl = kernel_data->exclusions; Vc = kernel_data->energygrp_elec; Vvdw = kernel_data->energygrp_vdw; if (aadata == nullptr) { setup_aadata(&aadata, excl, natoms, type, fr->ntype, fr->nbfp); fr->AllvsAll_work = aadata; } for (i = ni0; i < ni1; i++) { /* We assume shifts are NOT used for all-vs-all interactions */ /* Load i atom data */ ix = x[3*i]; iy = x[3*i+1]; iz = x[3*i+2]; iq = facel*charge[i]; pvdw = aadata->pvdwparam[type[i]]; /* Zero the potential energy for this list */ Vvdwtot = 0.0; vctot = 0.0; /* Clear i atom forces */ fix = 0.0; fiy = 0.0; fiz = 0.0; /* Load limits for loop over neighbors */ nj0 = aadata->jindex[3*i]; nj1 = aadata->jindex[3*i+1]; nj2 = aadata->jindex[3*i+2]; mask = aadata->exclusion_mask[i]; /* Prologue part, including exclusion mask */ for (j = nj0; j < nj1; j++, mask++) { if (*mask != 0) { k = j%natoms; /* load j atom coordinates */ jx = x[3*k]; jy = x[3*k+1]; jz = x[3*k+2]; /* Calculate distance */ dx = ix - jx; dy = iy - jy; dz = iz - jz; rsq = dx*dx+dy*dy+dz*dz; /* Calculate 1/r and 1/r2 */ rinv = 1.0/sqrt(rsq); rinvsq = rinv*rinv; /* Load parameters for j atom */ qq = iq*charge[k]; c6 = pvdw[2*k]; c12 = pvdw[2*k+1]; /* Coulomb interaction */ vcoul = qq*rinv; vctot = vctot+vcoul; /* Lennard-Jones interaction */ rinvsix = rinvsq*rinvsq*rinvsq; Vvdw6 = c6*rinvsix; Vvdw12 = c12*rinvsix*rinvsix; Vvdwtot = Vvdwtot+Vvdw12-Vvdw6; fscal = (vcoul+12.0*Vvdw12-6.0*Vvdw6)*rinvsq; /* Calculate temporary vectorial force */ tx = fscal*dx; ty = fscal*dy; tz = fscal*dz; /* Increment i atom force */ fix = fix + tx; fiy = fiy + ty; fiz = fiz + tz; /* Decrement j atom force */ f[3*k] = f[3*k] - tx; f[3*k+1] = f[3*k+1] - ty; f[3*k+2] = f[3*k+2] - tz; } /* Inner loop uses 38 flops/iteration */ } /* Main part, no exclusions */ for (j = nj1; j < nj2; j++) { k = j%natoms; /* load j atom coordinates */ jx = x[3*k]; jy = x[3*k+1]; jz = x[3*k+2]; /* Calculate distance */ dx = ix - jx; dy = iy - jy; dz = iz - jz; rsq = dx*dx+dy*dy+dz*dz; /* Calculate 1/r and 1/r2 */ rinv = 1.0/sqrt(rsq); rinvsq = rinv*rinv; /* Load parameters for j atom */ qq = iq*charge[k]; c6 = pvdw[2*k]; c12 = pvdw[2*k+1]; /* Coulomb interaction */ vcoul = qq*rinv; vctot = vctot+vcoul; /* Lennard-Jones interaction */ rinvsix = rinvsq*rinvsq*rinvsq; Vvdw6 = c6*rinvsix; Vvdw12 = c12*rinvsix*rinvsix; Vvdwtot = Vvdwtot+Vvdw12-Vvdw6; fscal = (vcoul+12.0*Vvdw12-6.0*Vvdw6)*rinvsq; /* Calculate temporary vectorial force */ tx = fscal*dx; ty = fscal*dy; tz = fscal*dz; /* Increment i atom force */ fix = fix + tx; fiy = fiy + ty; fiz = fiz + tz; /* Decrement j atom force */ f[3*k] = f[3*k] - tx; f[3*k+1] = f[3*k+1] - ty; f[3*k+2] = f[3*k+2] - tz; /* Inner loop uses 38 flops/iteration */ } f[3*i] += fix; f[3*i+1] += fiy; f[3*i+2] += fiz; /* Add potential energies to the group for this list */ ggid = 0; Vc[ggid] = Vc[ggid] + vctot; Vvdw[ggid] = Vvdw[ggid] + Vvdwtot; /* Outer loop uses 6 flops/iteration */ } /* 12 flops per outer iteration * 19 flops per inner iteration */ inc_nrnb(nrnb, eNR_NBKERNEL_ELEC_VDW_VF, (ni1-ni0)*12 + gmx::exactDiv(natoms*(natoms-1), 2)*19); }