static void nma_full_hessian(real * hess, int ndim, gmx_bool bM, t_topology * top, int begin, int end, real * eigenvalues, real * eigenvectors) { int i, j, k, l; real mass_fac, rdum; int natoms; natoms = top->atoms.nr; /* divide elements hess[i][j] by sqrt(mas[i])*sqrt(mas[j]) when required */ if (bM) { for (i = 0; (i < natoms); i++) { for (j = 0; (j < DIM); j++) { for (k = 0; (k < natoms); k++) { mass_fac = gmx_invsqrt(top->atoms.atom[i].m*top->atoms.atom[k].m); for (l = 0; (l < DIM); l++) { hess[(i*DIM+j)*ndim+k*DIM+l] *= mass_fac; } } } } } /* call diagonalization routine. */ fprintf(stderr, "\nDiagonalizing to find vectors %d through %d...\n", begin, end); fflush(stderr); eigensolver(hess, ndim, begin-1, end-1, eigenvalues, eigenvectors); /* And scale the output eigenvectors */ if (bM && eigenvectors != NULL) { for (i = 0; i < (end-begin+1); i++) { for (j = 0; j < natoms; j++) { mass_fac = gmx_invsqrt(top->atoms.atom[j].m); for (k = 0; (k < DIM); k++) { eigenvectors[i*ndim+j*DIM+k] *= mass_fac; } } } } }
static void nma_sparse_hessian(gmx_sparsematrix_t * sparse_hessian, gmx_bool bM, t_topology * top, int neig, real * eigenvalues, real * eigenvectors) { int i, j, k; int row, col; real mass_fac; int iatom, katom; int natoms; int ndim; natoms = top->atoms.nr; ndim = DIM*natoms; /* Cannot check symmetry since we only store half matrix */ /* divide elements hess[i][j] by sqrt(mas[i])*sqrt(mas[j]) when required */ if (bM) { for (iatom = 0; (iatom < natoms); iatom++) { for (j = 0; (j < DIM); j++) { row = DIM*iatom+j; for (k = 0; k < sparse_hessian->ndata[row]; k++) { col = sparse_hessian->data[row][k].col; katom = col/3; mass_fac = gmx_invsqrt(top->atoms.atom[iatom].m*top->atoms.atom[katom].m); sparse_hessian->data[row][k].value *= mass_fac; } } } } fprintf(stderr, "\nDiagonalizing to find eigenvectors 1 through %d...\n", neig); fflush(stderr); sparse_eigensolver(sparse_hessian, neig, eigenvalues, eigenvectors, 10000000); /* Scale output eigenvectors */ if (bM && eigenvectors != NULL) { for (i = 0; i < neig; i++) { for (j = 0; j < natoms; j++) { mass_fac = gmx_invsqrt(top->atoms.atom[j].m); for (k = 0; (k < DIM); k++) { eigenvectors[i*ndim+j*DIM+k] *= mass_fac; } } } } }
void compute_factors_restangles(int type, const t_iparams forceparams[], rvec delta_ante, rvec delta_post, real *prefactor, real *ratio_ante, real *ratio_post, real *v) { real theta_equil, k_bending; real cosine_theta_equil; real c_ante, c_cros, c_post; real norm; real delta_cosine, cosine_theta; real sine_theta_sq; real term_theta_theta_equil; k_bending = forceparams[type].harmonic.krA; theta_equil = forceparams[type].harmonic.rA*DEG2RAD; theta_equil = M_PI - theta_equil; cosine_theta_equil = cos(theta_equil); c_ante = iprod(delta_ante, delta_ante); c_cros = iprod(delta_ante, delta_post); c_post = iprod(delta_post, delta_post); norm = gmx_invsqrt(c_ante * c_post); cosine_theta = c_cros * norm; sine_theta_sq = 1 - cosine_theta * cosine_theta; *ratio_ante = c_cros / c_ante; *ratio_post = c_cros / c_post; delta_cosine = cosine_theta - cosine_theta_equil; term_theta_theta_equil = 1 - cosine_theta * cosine_theta_equil; *prefactor = -(k_bending) * delta_cosine * norm * term_theta_theta_equil / (sine_theta_sq * sine_theta_sq); *v = k_bending * 0.5 * delta_cosine * delta_cosine / sine_theta_sq; }
static void cconerr(gmx_domdec_t *dd, int ncons,int *bla,real *bllen,rvec *x,t_pbc *pbc, real *ncons_loc,real *ssd,real *max,int *imax) { real len,d,ma,ssd2,r2; int *nlocat,count,b,im; rvec dx; if (dd && dd->constraints) { nlocat = dd_constraints_nlocalatoms(dd); } else { nlocat = 0; } ma = 0; ssd2 = 0; im = 0; count = 0; for(b=0;b<ncons;b++) { if (pbc) { pbc_dx_aiuc(pbc,x[bla[2*b]],x[bla[2*b+1]],dx); } else { rvec_sub(x[bla[2*b]],x[bla[2*b+1]],dx); } r2 = norm2(dx); len = r2*gmx_invsqrt(r2); d = fabs(len/bllen[b]-1); if (d > ma && (nlocat==NULL || nlocat[b])) { ma = d; im = b; } if (nlocat == NULL) { ssd2 += d*d; count++; } else { ssd2 += nlocat[b]*d*d; count += nlocat[b]; } } *ncons_loc = (nlocat ? 0.5 : 1)*count; *ssd = (nlocat ? 0.5 : 1)*ssd2; *max = ma; *imax = im; }
int main(int argc,char *argv[]) { real x,y,z,diff,av; int i; printf("%12s %12s %12s %12s %12s\n","X","invsqrt(X)","1/sqrt(X)","Abs. Diff.","Rel. Diff."); for(i=1; (i<1000); i++) { x = i*1.0; y = gmx_invsqrt(x); z = 1.0/sqrt(x); diff = y-z; av = 0.5*(y+z); printf("%12.5e %12.5e %12.5e %12.5e %12.5e\n",x,y,z,diff,diff/z); } return 0; }
/* This might logically belong better in the nb_generic.c module, but it is only * used in do_nonbonded_listed(), and we want it to be inlined there to avoid an * extra functional call for every single pair listed in the topology. */ static real nb_evaluate_single(real r2, real tabscale, real *vftab, real qq, real c6, real c12, real *velec, real *vvdw) { real rinv, r, rtab, eps, eps2, Y, F, Geps, Heps2, Fp, VVe, FFe, VVd, FFd, VVr, FFr, fscal; int ntab; /* Do the tabulated interactions - first table lookup */ rinv = gmx_invsqrt(r2); r = r2*rinv; rtab = r*tabscale; ntab = rtab; eps = rtab-ntab; eps2 = eps*eps; ntab = 12*ntab; /* Electrostatics */ Y = vftab[ntab]; F = vftab[ntab+1]; Geps = eps*vftab[ntab+2]; Heps2 = eps2*vftab[ntab+3]; Fp = F+Geps+Heps2; VVe = Y+eps*Fp; FFe = Fp+Geps+2.0*Heps2; /* Dispersion */ Y = vftab[ntab+4]; F = vftab[ntab+5]; Geps = eps*vftab[ntab+6]; Heps2 = eps2*vftab[ntab+7]; Fp = F+Geps+Heps2; VVd = Y+eps*Fp; FFd = Fp+Geps+2.0*Heps2; /* Repulsion */ Y = vftab[ntab+8]; F = vftab[ntab+9]; Geps = eps*vftab[ntab+10]; Heps2 = eps2*vftab[ntab+11]; Fp = F+Geps+Heps2; VVr = Y+eps*Fp; FFr = Fp+Geps+2.0*Heps2; *velec = qq*VVe; *vvdw = c6*VVd+c12*VVr; fscal = -(qq*FFe+c6*FFd+c12*FFr)*tabscale*rinv; return fscal; }
/* Estimate the direct space part error of the SPME Ewald sum */ static real estimate_direct( t_inputinfo *info ) { real e_dir=0; /* Error estimate */ real beta=0; /* Splitting parameter (1/nm) */ real r_coulomb=0; /* Cut-off in direct space */ beta = info->ewald_beta[0]; r_coulomb = info->rcoulomb[0]; e_dir = 2.0 * info->q2all * gmx_invsqrt( info->q2allnr * r_coulomb * info->volume ); e_dir *= exp (-beta*beta*r_coulomb*r_coulomb); return ONE_4PI_EPS0*e_dir; }
void compute_factors_cbtdihs(int type, const t_iparams forceparams[], rvec delta_ante, rvec delta_crnt, rvec delta_post, rvec f_phi_ai, rvec f_phi_aj, rvec f_phi_ak, rvec f_phi_al, rvec f_theta_ante_ai, rvec f_theta_ante_aj, rvec f_theta_ante_ak, rvec f_theta_post_aj, rvec f_theta_post_ak, rvec f_theta_post_al, real * v) { int j, d; real torsion_coef[NR_CBTDIHS]; real c_self_ante, c_self_crnt, c_self_post; real c_cros_ante, c_cros_acrs, c_cros_post; real c_prod, d_ante, d_post; real norm_phi, norm_theta_ante, norm_theta_post; real cosine_phi, cosine_theta_ante, cosine_theta_post; real sine_theta_ante_sq, sine_theta_post_sq; real sine_theta_ante, sine_theta_post; real prefactor_phi; real ratio_phi_ante, ratio_phi_post; real r1, r2; real factor_phi_ai_ante, factor_phi_ai_crnt, factor_phi_ai_post; real factor_phi_aj_ante, factor_phi_aj_crnt, factor_phi_aj_post; real factor_phi_ak_ante, factor_phi_ak_crnt, factor_phi_ak_post; real factor_phi_al_ante, factor_phi_al_crnt, factor_phi_al_post; real prefactor_theta_ante, ratio_theta_ante_ante, ratio_theta_ante_crnt; real prefactor_theta_post, ratio_theta_post_crnt, ratio_theta_post_post; /* The formula for combined bending-torsion potential (see file "restcbt.h") contains * in its expression not only the dihedral angle \f[\phi\f] but also \f[\theta_{i-1}\f] * (theta_ante bellow) and \f[\theta_{i}\f] (theta_post bellow)--- the adjacent bending * angles. The forces for the particles ai, aj, ak, al have components coming from the * derivatives of the potential with respect to all three angles. * This function is organised in 4 parts * PART 1 - Computes force factors common to all the derivatives for the four particles * PART 2 - Computes the force components due to the derivatives of dihedral angle Phi * PART 3 - Computes the force components due to the derivatives of bending angle Theta_Ante * PART 4 - Computes the force components due to the derivatives of bending angle Theta_Post * Bellow we will respct thuis structure */ /* PART 1 - COMPUTES FORCE FACTORS COMMON TO ALL DERIVATIVES FOR THE FOUR PARTICLES */ for (j = 0; (j < NR_CBTDIHS); j++) { torsion_coef[j] = forceparams[type].cbtdihs.cbtcA[j]; } /* Computation of the cosine of the dihedral angle. The scalar ("dot") product method * is used. c_*_* cummulate the scalar products of the differences of particles * positions while c_prod, d_ante and d_post are differences of products of scalar * terms that are parts of the derivatives of forces */ c_self_ante = iprod(delta_ante, delta_ante); c_self_crnt = iprod(delta_crnt, delta_crnt); c_self_post = iprod(delta_post, delta_post); c_cros_ante = iprod(delta_ante, delta_crnt); c_cros_acrs = iprod(delta_ante, delta_post); c_cros_post = iprod(delta_crnt, delta_post); c_prod = c_cros_ante * c_cros_post - c_self_crnt * c_cros_acrs; d_ante = c_self_ante * c_self_crnt - c_cros_ante * c_cros_ante; d_post = c_self_post * c_self_crnt - c_cros_post * c_cros_post; /* When three consecutive beads align, we obtain values close to zero. Here we avoid small values to prevent round-off errors. */ if (d_ante < GMX_REAL_EPS) { d_ante = GMX_REAL_EPS; } if (d_post < GMX_REAL_EPS) { d_post = GMX_REAL_EPS; } /* Computations of cosines */ norm_phi = gmx_invsqrt(d_ante * d_post); norm_theta_ante = gmx_invsqrt(c_self_ante * c_self_crnt); norm_theta_post = gmx_invsqrt(c_self_crnt * c_self_post); cosine_phi = c_prod * norm_phi; cosine_theta_ante = c_cros_ante * norm_theta_ante; cosine_theta_post = c_cros_post * norm_theta_post; sine_theta_ante_sq = 1 - cosine_theta_ante * cosine_theta_ante; sine_theta_post_sq = 1 - cosine_theta_post * cosine_theta_post; /* It is possible that cosine_theta is slightly bigger than 1.0 due to round-off errors. */ if (sine_theta_ante_sq < 0.0) { sine_theta_ante_sq = 0.0; } if (sine_theta_post_sq < 0.0) { sine_theta_post_sq = 0.0; } sine_theta_ante = sqrt(sine_theta_ante_sq); sine_theta_post = sqrt(sine_theta_post_sq); /* PART 2 - COMPUTES FORCE COMPONENTS DUE TO DERIVATIVES TO DIHEDRAL ANGLE PHI */ /* Computation of ratios */ ratio_phi_ante = c_prod / d_ante; ratio_phi_post = c_prod / d_post; /* Computation of the prefactor */ /* Computing 2nd power */ r1 = cosine_phi; prefactor_phi = -torsion_coef[0] * norm_phi * (torsion_coef[2] + torsion_coef[3] * 2.0 * cosine_phi + torsion_coef[4] * 3.0 * (r1 * r1) + 4*torsion_coef[5]*r1*r1*r1) * sine_theta_ante_sq * sine_theta_ante * sine_theta_post_sq * sine_theta_post; /* Computation of factors (important for gaining speed). Factors factor_phi_* are coming from the * derivatives of the torsion angle (phi) with respect to the beads ai, aj, al, ak, * (four) coordinates and they are multiplied in the force computations with the * differences of the particles positions stored in parameters delta_ante, * delta_crnt, delta_post. For formulas see file "restcbt.h" */ factor_phi_ai_ante = ratio_phi_ante * c_self_crnt; factor_phi_ai_crnt = -c_cros_post - ratio_phi_ante * c_cros_ante; factor_phi_ai_post = c_self_crnt; factor_phi_aj_ante = -c_cros_post - ratio_phi_ante * (c_self_crnt + c_cros_ante); factor_phi_aj_crnt = c_cros_post + c_cros_acrs * 2.0 + ratio_phi_ante * (c_self_ante + c_cros_ante) + ratio_phi_post * c_self_post; factor_phi_aj_post = -(c_cros_ante + c_self_crnt) - ratio_phi_post * c_cros_post; factor_phi_ak_ante = c_cros_post + c_self_crnt + ratio_phi_ante * c_cros_ante; factor_phi_ak_crnt = -(c_cros_ante + c_cros_acrs * 2.0) - ratio_phi_ante * c_self_ante - ratio_phi_post * (c_self_post + c_cros_post); factor_phi_ak_post = c_cros_ante + ratio_phi_post * (c_self_crnt + c_cros_post); factor_phi_al_ante = -c_self_crnt; factor_phi_al_crnt = c_cros_ante + ratio_phi_post * c_cros_post; factor_phi_al_post = -ratio_phi_post * c_self_crnt; /* Computation of forces due to the derivatives of dihedral angle phi*/ for (d = 0; d < DIM; d++) { f_phi_ai[d] = prefactor_phi * (factor_phi_ai_ante * delta_ante[d] + factor_phi_ai_crnt * delta_crnt[d] + factor_phi_ai_post * delta_post[d]); f_phi_aj[d] = prefactor_phi * (factor_phi_aj_ante * delta_ante[d] + factor_phi_aj_crnt * delta_crnt[d] + factor_phi_aj_post * delta_post[d]); f_phi_ak[d] = prefactor_phi * (factor_phi_ak_ante * delta_ante[d] + factor_phi_ak_crnt * delta_crnt[d] + factor_phi_ak_post * delta_post[d]); f_phi_al[d] = prefactor_phi * (factor_phi_al_ante * delta_ante[d] + factor_phi_al_crnt * delta_crnt[d] + factor_phi_al_post * delta_post[d]); } /* PART 3 - COMPUTES THE FORCE COMPONENTS DUE TO THE DERIVATIVES OF BENDING ANGLE THETHA_ANTHE */ /* Computation of ratios */ ratio_theta_ante_ante = c_cros_ante / c_self_ante; ratio_theta_ante_crnt = c_cros_ante / c_self_crnt; /* Computation of the prefactor */ /* Computing 2nd power */ r1 = cosine_phi; /* Computing 3rd power */ r2 = cosine_phi; prefactor_theta_ante = -torsion_coef[0] * norm_theta_ante * ( torsion_coef[1] + torsion_coef[2] * cosine_phi + torsion_coef[3] * (r1 * r1) + torsion_coef[4] * (r2 * (r2 * r2))+ torsion_coef[5] * (r2 * (r2 * (r2 * r2)))) * (-3.0) * cosine_theta_ante * sine_theta_ante * sine_theta_post_sq * sine_theta_post; /* Computation of forces due to the derivatives of bending angle theta_ante */ for (d = 0; d < DIM; d++) { f_theta_ante_ai[d] = prefactor_theta_ante * (ratio_theta_ante_ante * delta_ante[d] - delta_crnt[d]); f_theta_ante_aj[d] = prefactor_theta_ante * ((ratio_theta_ante_crnt + 1.0) * delta_crnt[d] - (ratio_theta_ante_ante + 1.0) * delta_ante[d]); f_theta_ante_ak[d] = prefactor_theta_ante * (delta_ante[d] - ratio_theta_ante_crnt * delta_crnt[d]); } /* PART 4 - COMPUTES THE FORCE COMPONENTS DUE TO THE DERIVATIVES OF THE BENDING ANGLE THETA_POST */ /* Computation of ratios */ ratio_theta_post_crnt = c_cros_post / c_self_crnt; ratio_theta_post_post = c_cros_post / c_self_post; /* Computation of the prefactor */ /* Computing 2nd power */ r1 = cosine_phi; /* Computing 3rd power */ r2 = cosine_phi; prefactor_theta_post = -torsion_coef[0] * norm_theta_post * (torsion_coef[1] + torsion_coef[2] * cosine_phi + torsion_coef[3] * (r1 * r1) + torsion_coef[4] * (r2 * (r2 * r2)) + torsion_coef[5] * (r2 * (r2 * (r2 * r2)))) * sine_theta_ante_sq * sine_theta_ante * (-3.0) * cosine_theta_post * sine_theta_post; /* Computation of forces due to the derivatives of bending angle Theta_Post */ for (d = 0; d < DIM; d++) { f_theta_post_aj[d] = prefactor_theta_post * (ratio_theta_post_crnt * delta_crnt[d] - delta_post[d]); f_theta_post_ak[d] = prefactor_theta_post * ((ratio_theta_post_post + 1.0) * delta_post[d] - (ratio_theta_post_crnt + 1.0) * delta_crnt[d]); f_theta_post_al[d] = prefactor_theta_post * (delta_crnt[d] - ratio_theta_post_post * delta_post[d]); } r1 = cosine_phi; r2 = cosine_phi; /* Contribution to energy - for formula see file "restcbt.h" */ *v = torsion_coef[0] * (torsion_coef[1] + torsion_coef[2] * cosine_phi + torsion_coef[3] * (r1 * r1) + torsion_coef[4] * (r2 * (r2 * r2)) + torsion_coef[5] * (r2 * (r2 * (r2 * r2)))) * sine_theta_ante_sq * sine_theta_ante * sine_theta_post_sq * sine_theta_post; }
void nb_kernel_allvsallgb(t_forcerec * fr, t_mdatoms * mdatoms, t_blocka * excl, real * x, real * f, real * Vc, real * Vvdw, real * vpol, int * outeriter, int * inneriter, void * work) { gmx_allvsall_data_t *aadata; int natoms; int ni0,ni1; int nj0,nj1,nj2; int i,j,k; real * charge; int * type; real facel; real * pvdw; int ggid; int * mask; real * GBtab; real gbfactor; real * invsqrta; real * dvda; real vgbtot,dvdasum; int nnn,n0; real ix,iy,iz,iq; real fix,fiy,fiz; real jx,jy,jz,qq; real dx,dy,dz; real tx,ty,tz; real rsq,rinv,rinvsq,rinvsix; real vcoul,vctot; real c6,c12,Vvdw6,Vvdw12,Vvdwtot; real fscal,dvdatmp,fijC,vgb; real Y,F,Fp,Geps,Heps2,VV,FF,eps,eps2,r,rt; real dvdaj,gbscale,isaprod,isai,isaj,gbtabscale; charge = mdatoms->chargeA; type = mdatoms->typeA; gbfactor = ((1.0/fr->epsilon_r) - (1.0/fr->gb_epsilon_solvent)); facel = fr->epsfac; GBtab = fr->gbtab.tab; gbtabscale = fr->gbtab.scale; invsqrta = fr->invsqrta; dvda = fr->dvda; natoms = mdatoms->nr; ni0 = mdatoms->start; ni1 = mdatoms->start+mdatoms->homenr; aadata = *((gmx_allvsall_data_t **)work); if(aadata==NULL) { setup_aadata(&aadata,excl,natoms,type,fr->ntype,fr->nbfp); *((gmx_allvsall_data_t **)work) = aadata; } for(i=ni0; i<ni1; i++) { /* We assume shifts are NOT used for all-vs-all interactions */ /* Load i atom data */ ix = x[3*i]; iy = x[3*i+1]; iz = x[3*i+2]; iq = facel*charge[i]; isai = invsqrta[i]; pvdw = aadata->pvdwparam[type[i]]; /* Zero the potential energy for this list */ Vvdwtot = 0.0; vctot = 0.0; vgbtot = 0.0; dvdasum = 0.0; /* Clear i atom forces */ fix = 0.0; fiy = 0.0; fiz = 0.0; /* Load limits for loop over neighbors */ nj0 = aadata->jindex[3*i]; nj1 = aadata->jindex[3*i+1]; nj2 = aadata->jindex[3*i+2]; mask = aadata->exclusion_mask[i]; /* Prologue part, including exclusion mask */ for(j=nj0; j<nj1; j++,mask++) { if(*mask!=0) { k = j%natoms; /* load j atom coordinates */ jx = x[3*k]; jy = x[3*k+1]; jz = x[3*k+2]; /* Calculate distance */ dx = ix - jx; dy = iy - jy; dz = iz - jz; rsq = dx*dx+dy*dy+dz*dz; /* Calculate 1/r and 1/r2 */ rinv = gmx_invsqrt(rsq); /* Load parameters for j atom */ isaj = invsqrta[k]; isaprod = isai*isaj; qq = iq*charge[k]; vcoul = qq*rinv; fscal = vcoul*rinv; qq = isaprod*(-qq)*gbfactor; gbscale = isaprod*gbtabscale; c6 = pvdw[2*k]; c12 = pvdw[2*k+1]; rinvsq = rinv*rinv; /* Tabulated Generalized-Born interaction */ dvdaj = dvda[k]; r = rsq*rinv; /* Calculate table index */ rt = r*gbscale; n0 = rt; eps = rt-n0; eps2 = eps*eps; nnn = 4*n0; Y = GBtab[nnn]; F = GBtab[nnn+1]; Geps = eps*GBtab[nnn+2]; Heps2 = eps2*GBtab[nnn+3]; Fp = F+Geps+Heps2; VV = Y+eps*Fp; FF = Fp+Geps+2.0*Heps2; vgb = qq*VV; fijC = qq*FF*gbscale; dvdatmp = -0.5*(vgb+fijC*r); dvdasum = dvdasum + dvdatmp; dvda[k] = dvdaj+dvdatmp*isaj*isaj; vctot = vctot + vcoul; vgbtot = vgbtot + vgb; /* Lennard-Jones interaction */ rinvsix = rinvsq*rinvsq*rinvsq; Vvdw6 = c6*rinvsix; Vvdw12 = c12*rinvsix*rinvsix; Vvdwtot = Vvdwtot+Vvdw12-Vvdw6; fscal = (12.0*Vvdw12-6.0*Vvdw6)*rinvsq-(fijC-fscal)*rinv; /* Calculate temporary vectorial force */ tx = fscal*dx; ty = fscal*dy; tz = fscal*dz; /* Increment i atom force */ fix = fix + tx; fiy = fiy + ty; fiz = fiz + tz; /* Decrement j atom force */ f[3*k] = f[3*k] - tx; f[3*k+1] = f[3*k+1] - ty; f[3*k+2] = f[3*k+2] - tz; } /* Inner loop uses 38 flops/iteration */ } /* Main part, no exclusions */ for(j=nj1; j<nj2; j++) { k = j%natoms; /* load j atom coordinates */ jx = x[3*k]; jy = x[3*k+1]; jz = x[3*k+2]; /* Calculate distance */ dx = ix - jx; dy = iy - jy; dz = iz - jz; rsq = dx*dx+dy*dy+dz*dz; /* Calculate 1/r and 1/r2 */ rinv = gmx_invsqrt(rsq); /* Load parameters for j atom */ isaj = invsqrta[k]; isaprod = isai*isaj; qq = iq*charge[k]; vcoul = qq*rinv; fscal = vcoul*rinv; qq = isaprod*(-qq)*gbfactor; gbscale = isaprod*gbtabscale; c6 = pvdw[2*k]; c12 = pvdw[2*k+1]; rinvsq = rinv*rinv; /* Tabulated Generalized-Born interaction */ dvdaj = dvda[k]; r = rsq*rinv; /* Calculate table index */ rt = r*gbscale; n0 = rt; eps = rt-n0; eps2 = eps*eps; nnn = 4*n0; Y = GBtab[nnn]; F = GBtab[nnn+1]; Geps = eps*GBtab[nnn+2]; Heps2 = eps2*GBtab[nnn+3]; Fp = F+Geps+Heps2; VV = Y+eps*Fp; FF = Fp+Geps+2.0*Heps2; vgb = qq*VV; fijC = qq*FF*gbscale; dvdatmp = -0.5*(vgb+fijC*r); dvdasum = dvdasum + dvdatmp; dvda[k] = dvdaj+dvdatmp*isaj*isaj; vctot = vctot + vcoul; vgbtot = vgbtot + vgb; /* Lennard-Jones interaction */ rinvsix = rinvsq*rinvsq*rinvsq; Vvdw6 = c6*rinvsix; Vvdw12 = c12*rinvsix*rinvsix; Vvdwtot = Vvdwtot+Vvdw12-Vvdw6; fscal = (12.0*Vvdw12-6.0*Vvdw6)*rinvsq-(fijC-fscal)*rinv; /* Calculate temporary vectorial force */ tx = fscal*dx; ty = fscal*dy; tz = fscal*dz; /* Increment i atom force */ fix = fix + tx; fiy = fiy + ty; fiz = fiz + tz; /* Decrement j atom force */ f[3*k] = f[3*k] - tx; f[3*k+1] = f[3*k+1] - ty; f[3*k+2] = f[3*k+2] - tz; /* Inner loop uses 38 flops/iteration */ } f[3*i] += fix; f[3*i+1] += fiy; f[3*i+2] += fiz; /* Add potential energies to the group for this list */ ggid = 0; Vc[ggid] = Vc[ggid] + vctot; Vvdw[ggid] = Vvdw[ggid] + Vvdwtot; vpol[ggid] = vpol[ggid] + vgbtot; dvda[i] = dvda[i] + dvdasum*isai*isai; /* Outer loop uses 6 flops/iteration */ } /* Write outer/inner iteration count to pointers */ *outeriter = ni1-ni0; *inneriter = (ni1-ni0)*natoms/2; }
int gmx_nmens(int argc, char *argv[]) { const char *desc[] = { "[THISMODULE] generates an ensemble around an average structure", "in a subspace that is defined by a set of normal modes (eigenvectors).", "The eigenvectors are assumed to be mass-weighted.", "The position along each eigenvector is randomly taken from a Gaussian", "distribution with variance kT/eigenvalue.[PAR]", "By default the starting eigenvector is set to 7, since the first six", "normal modes are the translational and rotational degrees of freedom." }; static int nstruct = 100, first = 7, last = -1, seed = -1; static real temp = 300.0; t_pargs pa[] = { { "-temp", FALSE, etREAL, {&temp}, "Temperature in Kelvin" }, { "-seed", FALSE, etINT, {&seed}, "Random seed, -1 generates a seed from time and pid" }, { "-num", FALSE, etINT, {&nstruct}, "Number of structures to generate" }, { "-first", FALSE, etINT, {&first}, "First eigenvector to use (-1 is select)" }, { "-last", FALSE, etINT, {&last}, "Last eigenvector to use (-1 is till the last)" } }; #define NPA asize(pa) t_trxstatus *out; int status, trjout; t_topology top; int ePBC; t_atoms *atoms; rvec *xtop, *xref, *xav, *xout1, *xout2; gmx_bool bDMR, bDMA, bFit; int nvec, *eignr = NULL; rvec **eigvec = NULL; matrix box; real *eigval, totmass, *invsqrtm, t, disp; int natoms, neigval; char *grpname, title[STRLEN]; const char *indexfile; int i, j, d, s, v; int nout, *iout, noutvec, *outvec; atom_id *index; real rfac, invfr, rhalf, jr; int * eigvalnr; output_env_t oenv; gmx_rng_t rng; unsigned long jran; const unsigned long im = 0xffff; const unsigned long ia = 1093; const unsigned long ic = 18257; t_filenm fnm[] = { { efTRN, "-v", "eigenvec", ffREAD }, { efXVG, "-e", "eigenval", ffREAD }, { efTPS, NULL, NULL, ffREAD }, { efNDX, NULL, NULL, ffOPTRD }, { efTRO, "-o", "ensemble", ffWRITE } }; #define NFILE asize(fnm) if (!parse_common_args(&argc, argv, PCA_BE_NICE, NFILE, fnm, NPA, pa, asize(desc), desc, 0, NULL, &oenv)) { return 0; } indexfile = ftp2fn_null(efNDX, NFILE, fnm); read_eigenvectors(opt2fn("-v", NFILE, fnm), &natoms, &bFit, &xref, &bDMR, &xav, &bDMA, &nvec, &eignr, &eigvec, &eigval); read_tps_conf(ftp2fn(efTPS, NFILE, fnm), title, &top, &ePBC, &xtop, NULL, box, bDMA); atoms = &top.atoms; printf("\nSelect an index group of %d elements that corresponds to the eigenvectors\n", natoms); get_index(atoms, indexfile, 1, &i, &index, &grpname); if (i != natoms) { gmx_fatal(FARGS, "you selected a group with %d elements instead of %d", i, natoms); } printf("\n"); snew(invsqrtm, natoms); if (bDMA) { for (i = 0; (i < natoms); i++) { invsqrtm[i] = gmx_invsqrt(atoms->atom[index[i]].m); } } else { for (i = 0; (i < natoms); i++) { invsqrtm[i] = 1.0; } } if (last == -1) { last = natoms*DIM; } if (first > -1) { /* make an index from first to last */ nout = last-first+1; snew(iout, nout); for (i = 0; i < nout; i++) { iout[i] = first-1+i; } } else { printf("Select eigenvectors for output, end your selection with 0\n"); nout = -1; iout = NULL; do { nout++; srenew(iout, nout+1); if (1 != scanf("%d", &iout[nout])) { gmx_fatal(FARGS, "Error reading user input"); } iout[nout]--; } while (iout[nout] >= 0); printf("\n"); } /* make an index of the eigenvectors which are present */ snew(outvec, nout); noutvec = 0; for (i = 0; i < nout; i++) { j = 0; while ((j < nvec) && (eignr[j] != iout[i])) { j++; } if ((j < nvec) && (eignr[j] == iout[i])) { outvec[noutvec] = j; iout[noutvec] = iout[i]; noutvec++; } } fprintf(stderr, "%d eigenvectors selected for output\n", noutvec); if (seed == -1) { seed = (int)gmx_rng_make_seed(); rng = gmx_rng_init(seed); } else { rng = gmx_rng_init(seed); } fprintf(stderr, "Using seed %d and a temperature of %g K\n", seed, temp); snew(xout1, natoms); snew(xout2, atoms->nr); out = open_trx(ftp2fn(efTRO, NFILE, fnm), "w"); jran = (unsigned long)((real)im*gmx_rng_uniform_real(rng)); gmx_rng_destroy(rng); for (s = 0; s < nstruct; s++) { for (i = 0; i < natoms; i++) { copy_rvec(xav[i], xout1[i]); } for (j = 0; j < noutvec; j++) { v = outvec[j]; /* (r-0.5) n times: var_n = n * var_1 = n/12 n=4: var_n = 1/3, so multiply with 3 */ rfac = sqrt(3.0 * BOLTZ*temp/eigval[iout[j]]); rhalf = 2.0*rfac; rfac = rfac/(real)im; jran = (jran*ia+ic) & im; jr = (real)jran; jran = (jran*ia+ic) & im; jr += (real)jran; jran = (jran*ia+ic) & im; jr += (real)jran; jran = (jran*ia+ic) & im; jr += (real)jran; disp = rfac * jr - rhalf; for (i = 0; i < natoms; i++) { for (d = 0; d < DIM; d++) { xout1[i][d] += disp*eigvec[v][i][d]*invsqrtm[i]; } } } for (i = 0; i < natoms; i++) { copy_rvec(xout1[i], xout2[index[i]]); } t = s+1; write_trx(out, natoms, index, atoms, 0, t, box, xout2, NULL, NULL); fprintf(stderr, "\rGenerated %d structures", s+1); } fprintf(stderr, "\n"); close_trx(out); return 0; }
/* * Gromacs nonbonded kernel pf_nb_kernel320 * Coulomb interaction: Tabulated * VdW interaction: Buckingham * water optimization: No * Calculate forces: yes */ void pf_nb_kernel320( int * p_nri, int * iinr, int * jindex, int * jjnr, int * shift, real * shiftvec, real * fshift, int * gid, real * pos, real * faction, real * charge, real * p_facel, real * p_krf, real * p_crf, real * Vc, int * type, int * p_ntype, real * vdwparam, real * Vvdw, real * p_tabscale, real * VFtab, real * invsqrta, real * dvda, real * p_gbtabscale, real * GBtab, int * p_nthreads, int * count, void * mtx, int * outeriter, int * inneriter, real * work, t_pf_global * pf_global) { int nri,ntype,nthreads; real facel,krf,crf,tabscale,gbtabscale; int n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid; int nn0,nn1,nouter,ninner; real shX,shY,shZ; real fscal,tx,ty,tz; real rinvsq; real iq; real qq,vcoul,vctot; int nti; int tj; real rinvsix; real Vvdw6,Vvdwtot; real r,rt,eps,eps2; int n0,nnn; real Y,F,Geps,Heps2,Fp,VV; real FF; real fijC; real Vvdwexp,br; real ix1,iy1,iz1,fix1,fiy1,fiz1; real jx1,jy1,jz1; real dx11,dy11,dz11,rsq11,rinv11; real c6,cexp1,cexp2; real pf_coul, pf_lj; nri = *p_nri; ntype = *p_ntype; nthreads = *p_nthreads; facel = *p_facel; krf = *p_krf; crf = *p_crf; tabscale = *p_tabscale; /* Reset outer and inner iteration counters */ nouter = 0; ninner = 0; /* Loop over thread workunits */ do { #ifdef GMX_THREAD_SHM_FDECOMP tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx); nn0 = *count; /* Take successively smaller chunks (at least 10 lists) */ nn1 = nn0+(nri-nn0)/(2*nthreads)+10; *count = nn1; tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx); if(nn1>nri) nn1=nri; #else nn0 = 0; nn1 = nri; #endif /* Start outer loop over neighborlists */ for(n=nn0; (n<nn1); n++) { /* Load shift vector for this list */ is3 = 3*shift[n]; shX = shiftvec[is3]; shY = shiftvec[is3+1]; shZ = shiftvec[is3+2]; /* Load limits for loop over neighbors */ nj0 = jindex[n]; nj1 = jindex[n+1]; /* Get outer coordinate index */ ii = iinr[n]; ii3 = 3*ii; /* Load i atom data, add shift vector */ ix1 = shX + pos[ii3+0]; iy1 = shY + pos[ii3+1]; iz1 = shZ + pos[ii3+2]; /* Load parameters for i atom */ iq = facel*charge[ii]; nti = 3*ntype*type[ii]; /* Zero the potential energy for this list */ vctot = 0; Vvdwtot = 0; /* Clear i atom forces */ fix1 = 0; fiy1 = 0; fiz1 = 0; for(k=nj0; (k<nj1); k++) { /* Get j neighbor index, and coordinate index */ jnr = jjnr[k]; j3 = 3*jnr; /* load j atom coordinates */ jx1 = pos[j3+0]; jy1 = pos[j3+1]; jz1 = pos[j3+2]; /* Calculate distance */ dx11 = ix1 - jx1; dy11 = iy1 - jy1; dz11 = iz1 - jz1; rsq11 = dx11*dx11+dy11*dy11+dz11*dz11; /* Calculate 1/r and 1/r2 */ rinv11 = gmx_invsqrt(rsq11); /* Load parameters for j atom */ qq = iq*charge[jnr]; tj = nti+3*type[jnr]; c6 = vdwparam[tj]; cexp1 = vdwparam[tj+1]; cexp2 = vdwparam[tj+2]; rinvsq = rinv11*rinv11; /* Calculate table index */ r = rsq11*rinv11; /* Calculate table index */ rt = r*tabscale; n0 = rt; eps = rt-n0; eps2 = eps*eps; nnn = 4*n0; /* Tabulated coulomb interaction */ Y = VFtab[nnn]; F = VFtab[nnn+1]; Geps = eps*VFtab[nnn+2]; Heps2 = eps2*VFtab[nnn+3]; Fp = F+Geps+Heps2; VV = Y+eps*Fp; FF = Fp+Geps+2.0*Heps2; vcoul = qq*VV; fijC = qq*FF; vctot = vctot + vcoul; /* Buckingham interaction */ rinvsix = rinvsq*rinvsq*rinvsq; Vvdw6 = c6*rinvsix; br = cexp2*rsq11*rinv11; Vvdwexp = cexp1*exp(-br); Vvdwtot = Vvdwtot+Vvdwexp-Vvdw6; pf_coul = -((fijC)*tabscale)*rinv11; pf_lj = (br*Vvdwexp-6.0*Vvdw6)*rinvsq; fscal = pf_lj+pf_coul; /* Calculate temporary vectorial force */ tx = fscal*dx11; ty = fscal*dy11; tz = fscal*dz11; /* Increment i atom force */ fix1 = fix1 + tx; fiy1 = fiy1 + ty; fiz1 = fiz1 + tz; /* Decrement j atom force */ faction[j3+0] = faction[j3+0] - tx; faction[j3+1] = faction[j3+1] - ty; faction[j3+2] = faction[j3+2] - tz; /* pairwise forces */ if (pf_global->bInitialized) pf_atom_add_nonbonded(pf_global, ii, jnr, pf_coul, pf_lj, dx11, dy11, dz11); /* Inner loop uses 81 flops/iteration */ } /* Add i forces to mem and shifted force list */ faction[ii3+0] = faction[ii3+0] + fix1; faction[ii3+1] = faction[ii3+1] + fiy1; faction[ii3+2] = faction[ii3+2] + fiz1; fshift[is3] = fshift[is3]+fix1; fshift[is3+1] = fshift[is3+1]+fiy1; fshift[is3+2] = fshift[is3+2]+fiz1; /* Add potential energies to the group for this list */ ggid = gid[n]; Vc[ggid] = Vc[ggid] + vctot; Vvdw[ggid] = Vvdw[ggid] + Vvdwtot; /* Increment number of inner iterations */ ninner = ninner + nj1 - nj0; /* Outer loop uses 12 flops/iteration */ } /* Increment number of outer iterations */ nouter = nouter + nn1 - nn0; } while (nn1<nri); /* Write outer/inner iteration count to pointers */ *outeriter = nouter; *inneriter = ninner; }
/* * Gromacs nonbonded kernel nb_kernel231 * Coulomb interaction: Reaction field * VdW interaction: Tabulated * water optimization: SPC/TIP3P - other atoms * Calculate forces: yes */ void nb_kernel231( int * p_nri, int * iinr, int * jindex, int * jjnr, int * shift, real * shiftvec, real * fshift, int * gid, real * pos, real * faction, real * charge, real * p_facel, real * p_krf, real * p_crf, real * Vc, int * type, int * p_ntype, real * vdwparam, real * Vvdw, real * p_tabscale, real * VFtab, real * invsqrta, real * dvda, real * p_gbtabscale, real * GBtab, int * p_nthreads, int * count, void * mtx, int * outeriter, int * inneriter, real * work) { int nri,ntype,nthreads; real facel,krf,crf,tabscale,gbtabscale; int n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid; int nn0,nn1,nouter,ninner; real shX,shY,shZ; real fscal,tx,ty,tz; real rinvsq; real jq; real qq,vcoul,vctot; int nti; int tj; real Vvdw6,Vvdwtot; real Vvdw12; real r,rt,eps,eps2; int n0,nnn; real Y,F,Geps,Heps2,Fp,VV; real FF; real fijD,fijR; real krsq; real ix1,iy1,iz1,fix1,fiy1,fiz1; real ix2,iy2,iz2,fix2,fiy2,fiz2; real ix3,iy3,iz3,fix3,fiy3,fiz3; real jx1,jy1,jz1,fjx1,fjy1,fjz1; real dx11,dy11,dz11,rsq11,rinv11; real dx21,dy21,dz21,rsq21,rinv21; real dx31,dy31,dz31,rsq31,rinv31; real qO,qH; real c6,c12; nri = *p_nri; ntype = *p_ntype; nthreads = *p_nthreads; facel = *p_facel; krf = *p_krf; crf = *p_crf; tabscale = *p_tabscale; /* Initialize water data */ ii = iinr[0]; qO = facel*charge[ii]; qH = facel*charge[ii+1]; nti = 2*ntype*type[ii]; /* Reset outer and inner iteration counters */ nouter = 0; ninner = 0; /* Loop over thread workunits */ do { #ifdef GMX_THREAD_SHM_FDECOMP tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx); nn0 = *count; /* Take successively smaller chunks (at least 10 lists) */ nn1 = nn0+(nri-nn0)/(2*nthreads)+10; *count = nn1; tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx); if(nn1>nri) nn1=nri; #else nn0 = 0; nn1 = nri; #endif /* Start outer loop over neighborlists */ for(n=nn0; (n<nn1); n++) { /* Load shift vector for this list */ is3 = 3*shift[n]; shX = shiftvec[is3]; shY = shiftvec[is3+1]; shZ = shiftvec[is3+2]; /* Load limits for loop over neighbors */ nj0 = jindex[n]; nj1 = jindex[n+1]; /* Get outer coordinate index */ ii = iinr[n]; ii3 = 3*ii; /* Load i atom data, add shift vector */ ix1 = shX + pos[ii3+0]; iy1 = shY + pos[ii3+1]; iz1 = shZ + pos[ii3+2]; ix2 = shX + pos[ii3+3]; iy2 = shY + pos[ii3+4]; iz2 = shZ + pos[ii3+5]; ix3 = shX + pos[ii3+6]; iy3 = shY + pos[ii3+7]; iz3 = shZ + pos[ii3+8]; /* Zero the potential energy for this list */ vctot = 0; Vvdwtot = 0; /* Clear i atom forces */ fix1 = 0; fiy1 = 0; fiz1 = 0; fix2 = 0; fiy2 = 0; fiz2 = 0; fix3 = 0; fiy3 = 0; fiz3 = 0; for(k=nj0; (k<nj1); k++) { /* Get j neighbor index, and coordinate index */ jnr = jjnr[k]; j3 = 3*jnr; /* load j atom coordinates */ jx1 = pos[j3+0]; jy1 = pos[j3+1]; jz1 = pos[j3+2]; /* Calculate distance */ dx11 = ix1 - jx1; dy11 = iy1 - jy1; dz11 = iz1 - jz1; rsq11 = dx11*dx11+dy11*dy11+dz11*dz11; dx21 = ix2 - jx1; dy21 = iy2 - jy1; dz21 = iz2 - jz1; rsq21 = dx21*dx21+dy21*dy21+dz21*dz21; dx31 = ix3 - jx1; dy31 = iy3 - jy1; dz31 = iz3 - jz1; rsq31 = dx31*dx31+dy31*dy31+dz31*dz31; /* Calculate 1/r and 1/r2 */ rinv11 = gmx_invsqrt(rsq11); rinv21 = gmx_invsqrt(rsq21); rinv31 = gmx_invsqrt(rsq31); /* Load parameters for j atom */ jq = charge[jnr+0]; qq = qO*jq; tj = nti+2*type[jnr]; c6 = vdwparam[tj]; c12 = vdwparam[tj+1]; rinvsq = rinv11*rinv11; /* Coulomb reaction-field interaction */ krsq = krf*rsq11; vcoul = qq*(rinv11+krsq-crf); vctot = vctot+vcoul; /* Calculate table index */ r = rsq11*rinv11; /* Calculate table index */ rt = r*tabscale; n0 = rt; eps = rt-n0; eps2 = eps*eps; nnn = 8*n0; /* Tabulated VdW interaction - dispersion */ Y = VFtab[nnn]; F = VFtab[nnn+1]; Geps = eps*VFtab[nnn+2]; Heps2 = eps2*VFtab[nnn+3]; Fp = F+Geps+Heps2; VV = Y+eps*Fp; FF = Fp+Geps+2.0*Heps2; Vvdw6 = c6*VV; fijD = c6*FF; /* Tabulated VdW interaction - repulsion */ nnn = nnn+4; Y = VFtab[nnn]; F = VFtab[nnn+1]; Geps = eps*VFtab[nnn+2]; Heps2 = eps2*VFtab[nnn+3]; Fp = F+Geps+Heps2; VV = Y+eps*Fp; FF = Fp+Geps+2.0*Heps2; Vvdw12 = c12*VV; fijR = c12*FF; Vvdwtot = Vvdwtot+ Vvdw6 + Vvdw12; fscal = (qq*(rinv11-2.0*krsq))*rinvsq-((fijD+fijR)*tabscale)*rinv11; /* Calculate temporary vectorial force */ tx = fscal*dx11; ty = fscal*dy11; tz = fscal*dz11; /* Increment i atom force */ fix1 = fix1 + tx; fiy1 = fiy1 + ty; fiz1 = fiz1 + tz; /* Decrement j atom force */ fjx1 = faction[j3+0] - tx; fjy1 = faction[j3+1] - ty; fjz1 = faction[j3+2] - tz; /* Load parameters for j atom */ qq = qH*jq; rinvsq = rinv21*rinv21; /* Coulomb reaction-field interaction */ krsq = krf*rsq21; vcoul = qq*(rinv21+krsq-crf); vctot = vctot+vcoul; fscal = (qq*(rinv21-2.0*krsq))*rinvsq; /* Calculate temporary vectorial force */ tx = fscal*dx21; ty = fscal*dy21; tz = fscal*dz21; /* Increment i atom force */ fix2 = fix2 + tx; fiy2 = fiy2 + ty; fiz2 = fiz2 + tz; /* Decrement j atom force */ fjx1 = fjx1 - tx; fjy1 = fjy1 - ty; fjz1 = fjz1 - tz; /* Load parameters for j atom */ rinvsq = rinv31*rinv31; /* Coulomb reaction-field interaction */ krsq = krf*rsq31; vcoul = qq*(rinv31+krsq-crf); vctot = vctot+vcoul; fscal = (qq*(rinv31-2.0*krsq))*rinvsq; /* Calculate temporary vectorial force */ tx = fscal*dx31; ty = fscal*dy31; tz = fscal*dz31; /* Increment i atom force */ fix3 = fix3 + tx; fiy3 = fiy3 + ty; fiz3 = fiz3 + tz; /* Decrement j atom force */ faction[j3+0] = fjx1 - tx; faction[j3+1] = fjy1 - ty; faction[j3+2] = fjz1 - tz; /* Inner loop uses 130 flops/iteration */ } /* Add i forces to mem and shifted force list */ faction[ii3+0] = faction[ii3+0] + fix1; faction[ii3+1] = faction[ii3+1] + fiy1; faction[ii3+2] = faction[ii3+2] + fiz1; faction[ii3+3] = faction[ii3+3] + fix2; faction[ii3+4] = faction[ii3+4] + fiy2; faction[ii3+5] = faction[ii3+5] + fiz2; faction[ii3+6] = faction[ii3+6] + fix3; faction[ii3+7] = faction[ii3+7] + fiy3; faction[ii3+8] = faction[ii3+8] + fiz3; fshift[is3] = fshift[is3]+fix1+fix2+fix3; fshift[is3+1] = fshift[is3+1]+fiy1+fiy2+fiy3; fshift[is3+2] = fshift[is3+2]+fiz1+fiz2+fiz3; /* Add potential energies to the group for this list */ ggid = gid[n]; Vc[ggid] = Vc[ggid] + vctot; Vvdw[ggid] = Vvdw[ggid] + Vvdwtot; /* Increment number of inner iterations */ ninner = ninner + nj1 - nj0; /* Outer loop uses 29 flops/iteration */ } /* Increment number of outer iterations */ nouter = nouter + nn1 - nn0; } while (nn1<nri); /* Write outer/inner iteration count to pointers */ *outeriter = nouter; *inneriter = ninner; }
static void gmx_print_version_info(FILE *fp) { fprintf(fp, "Gromacs version: %s\n", gmx_version()); const char *const git_hash = gmx_version_git_full_hash(); if (git_hash[0] != '\0') { fprintf(fp, "GIT SHA1 hash: %s\n", git_hash); } const char *const base_hash = gmx_version_git_central_base_hash(); if (base_hash[0] != '\0') { fprintf(fp, "Branched from: %s\n", base_hash); } #ifdef GMX_DOUBLE fprintf(fp, "Precision: double\n"); #else fprintf(fp, "Precision: single\n"); #endif fprintf(fp, "Memory model: %u bit\n", (unsigned)(8*sizeof(void *))); #ifdef GMX_THREAD_MPI fprintf(fp, "MPI library: thread_mpi\n"); #elif defined(GMX_MPI) fprintf(fp, "MPI library: MPI\n"); #else fprintf(fp, "MPI library: none\n"); #endif #ifdef GMX_OPENMP fprintf(fp, "OpenMP support: enabled (GMX_OPENMP_MAX_THREADS = %d)\n", GMX_OPENMP_MAX_THREADS); #else fprintf(fp, "OpenMP support: disabled\n"); #endif #ifdef GMX_GPU fprintf(fp, "GPU support: enabled\n"); #else fprintf(fp, "GPU support: disabled\n"); #endif /* A preprocessor trick to avoid duplicating logic from vec.h */ #define gmx_stringify2(x) #x #define gmx_stringify(x) gmx_stringify2(x) fprintf(fp, "invsqrt routine: %s\n", gmx_stringify(gmx_invsqrt(x))); fprintf(fp, "SIMD instructions: %s\n", GMX_SIMD_STRING); fprintf(fp, "FFT library: %s\n", gmx_fft_get_version_info()); #ifdef HAVE_RDTSCP fprintf(fp, "RDTSCP usage: enabled\n"); #else fprintf(fp, "RDTSCP usage: disabled\n"); #endif #ifdef GMX_CXX11 fprintf(fp, "C++11 compilation: enabled\n"); #else fprintf(fp, "C++11 compilation: disabled\n"); #endif #ifdef GMX_USE_TNG fprintf(fp, "TNG support: enabled\n"); #else fprintf(fp, "TNG support: disabled\n"); #endif #ifdef HAVE_EXTRAE unsigned major, minor, revision; Extrae_get_version(&major, &minor, &revision); fprintf(fp, "Tracing support: enabled. Using Extrae-%d.%d.%d\n", major, minor, revision); #else fprintf(fp, "Tracing support: disabled\n"); #endif fprintf(fp, "Built on: %s\n", BUILD_TIME); fprintf(fp, "Built by: %s\n", BUILD_USER); fprintf(fp, "Build OS/arch: %s\n", BUILD_HOST); fprintf(fp, "Build CPU vendor: %s\n", BUILD_CPU_VENDOR); fprintf(fp, "Build CPU brand: %s\n", BUILD_CPU_BRAND); fprintf(fp, "Build CPU family: %d Model: %d Stepping: %d\n", BUILD_CPU_FAMILY, BUILD_CPU_MODEL, BUILD_CPU_STEPPING); /* TODO: The below strings can be quite long, so it would be nice to wrap * them. Can wait for later, as the master branch has ready code to do all * that. */ fprintf(fp, "Build CPU features: %s\n", BUILD_CPU_FEATURES); fprintf(fp, "C compiler: %s\n", BUILD_C_COMPILER); fprintf(fp, "C compiler flags: %s\n", BUILD_CFLAGS); fprintf(fp, "C++ compiler: %s\n", BUILD_CXX_COMPILER); fprintf(fp, "C++ compiler flags: %s\n", BUILD_CXXFLAGS); #ifdef HAVE_LIBMKL /* MKL might be used for LAPACK/BLAS even if FFTs use FFTW, so keep it separate */ fprintf(fp, "Linked with Intel MKL version %d.%d.%d.\n", __INTEL_MKL__, __INTEL_MKL_MINOR__, __INTEL_MKL_UPDATE__); #endif #ifdef GMX_EXTERNAL_BOOST const bool bExternalBoost = true; #else const bool bExternalBoost = false; #endif fprintf(fp, "Boost version: %d.%d.%d%s\n", BOOST_VERSION / 100000, BOOST_VERSION / 100 % 1000, BOOST_VERSION % 100, bExternalBoost ? " (external)" : " (internal)"); #ifdef GMX_GPU gmx_print_version_info_gpu(fp); #endif }
void adress_thermo_force(int start, int homenr, t_block * cgs, rvec x[], rvec f[], t_forcerec * fr, t_mdatoms * mdatoms, t_pbc * pbc) { int iatom, n0, nnn, nrcg, i; int adresstype; real adressw, adressr; atom_id * cgindex; unsigned short * ptype; rvec * ref; real * wf; real tabscale; real * ATFtab; rvec dr; real w, wsq, wmin1, wmin1sq, wp, wt, rinv, sqr_dl, dl; real eps, eps2, F, Geps, Heps2, Fp, dmu_dwp, dwp_dr, fscal; adresstype = fr->adress_type; adressw = fr->adress_hy_width; adressr = fr->adress_ex_width; cgindex = cgs->index; ptype = mdatoms->ptype; ref = &(fr->adress_refs); wf = mdatoms->wf; for (iatom = start; (iatom < start+homenr); iatom++) { if (egp_coarsegrained(fr, mdatoms->cENER[iatom])) { if (ptype[iatom] == eptVSite) { w = wf[iatom]; /* is it hybrid or apply the thermodynamics force everywhere?*/ if (mdatoms->tf_table_index[iatom] != NO_TF_TABLE) { if (fr->n_adress_tf_grps > 0) { /* multi component tf is on, select the right table */ ATFtab = fr->atf_tabs[mdatoms->tf_table_index[iatom]].data; tabscale = fr->atf_tabs[mdatoms->tf_table_index[iatom]].scale; } else { /* just on component*/ ATFtab = fr->atf_tabs[DEFAULT_TF_TABLE].data; tabscale = fr->atf_tabs[DEFAULT_TF_TABLE].scale; } fscal = 0; if (pbc) { pbc_dx(pbc, (*ref), x[iatom], dr); } else { rvec_sub((*ref), x[iatom], dr); } /* calculate distace to adress center again */ sqr_dl = 0.0; switch (adresstype) { case eAdressXSplit: /* plane through center of ref, varies in x direction */ sqr_dl = dr[0]*dr[0]; rinv = gmx_invsqrt(dr[0]*dr[0]); break; case eAdressSphere: /* point at center of ref, assuming cubic geometry */ for (i = 0; i < 3; i++) { sqr_dl += dr[i]*dr[i]; } rinv = gmx_invsqrt(sqr_dl); break; default: /* This case should not happen */ rinv = 0.0; } dl = sqrt(sqr_dl); /* table origin is adress center */ wt = dl*tabscale; n0 = wt; eps = wt-n0; eps2 = eps*eps; nnn = 4*n0; F = ATFtab[nnn+1]; Geps = eps*ATFtab[nnn+2]; Heps2 = eps2*ATFtab[nnn+3]; Fp = F+Geps+Heps2; F = (Fp+Geps+2.0*Heps2)*tabscale; fscal = F*rinv; f[iatom][0] += fscal*dr[0]; if (adresstype != eAdressXSplit) { f[iatom][1] += fscal*dr[1]; f[iatom][2] += fscal*dr[2]; } } } } } }
/* * Gromacs nonbonded kernel nb_kernel103 * Coulomb interaction: Normal Coulomb * VdW interaction: Not calculated * water optimization: TIP4P - other atoms * Calculate forces: yes */ void nb_kernel103( int * p_nri, int * iinr, int * jindex, int * jjnr, int * shift, real * shiftvec, real * fshift, int * gid, real * pos, real * faction, real * charge, real * p_facel, real * p_krf, real * p_crf, real * Vc, int * type, int * p_ntype, real * vdwparam, real * Vvdw, real * p_tabscale, real * VFtab, real * invsqrta, real * dvda, real * p_gbtabscale, real * GBtab, int * p_nthreads, int * count, void * mtx, int * outeriter, int * inneriter, real * work) { int nri,ntype,nthreads; real facel,krf,crf,tabscale,gbtabscale; int n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid; int nn0,nn1,nouter,ninner; real shX,shY,shZ; real fscal,tx,ty,tz; real rinvsq; real jq; real qq,vcoul,vctot; real ix2,iy2,iz2,fix2,fiy2,fiz2; real ix3,iy3,iz3,fix3,fiy3,fiz3; real ix4,iy4,iz4,fix4,fiy4,fiz4; real jx1,jy1,jz1,fjx1,fjy1,fjz1; real dx21,dy21,dz21,rsq21,rinv21; real dx31,dy31,dz31,rsq31,rinv31; real dx41,dy41,dz41,rsq41,rinv41; real qH,qM; nri = *p_nri; ntype = *p_ntype; nthreads = *p_nthreads; facel = *p_facel; krf = *p_krf; crf = *p_crf; tabscale = *p_tabscale; /* Initialize water data */ ii = iinr[0]; qH = facel*charge[ii+1]; qM = facel*charge[ii+3]; /* Reset outer and inner iteration counters */ nouter = 0; ninner = 0; /* Loop over thread workunits */ do { #ifdef GMX_THREAD_SHM_FDECOMP tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx); nn0 = *count; /* Take successively smaller chunks (at least 10 lists) */ nn1 = nn0+(nri-nn0)/(2*nthreads)+10; *count = nn1; tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx); if(nn1>nri) nn1=nri; #else nn0 = 0; nn1 = nri; #endif /* Start outer loop over neighborlists */ for(n=nn0; (n<nn1); n++) { /* Load shift vector for this list */ is3 = 3*shift[n]; shX = shiftvec[is3]; shY = shiftvec[is3+1]; shZ = shiftvec[is3+2]; /* Load limits for loop over neighbors */ nj0 = jindex[n]; nj1 = jindex[n+1]; /* Get outer coordinate index */ ii = iinr[n]; ii3 = 3*ii; /* Load i atom data, add shift vector */ ix2 = shX + pos[ii3+3]; iy2 = shY + pos[ii3+4]; iz2 = shZ + pos[ii3+5]; ix3 = shX + pos[ii3+6]; iy3 = shY + pos[ii3+7]; iz3 = shZ + pos[ii3+8]; ix4 = shX + pos[ii3+9]; iy4 = shY + pos[ii3+10]; iz4 = shZ + pos[ii3+11]; /* Zero the potential energy for this list */ vctot = 0; /* Clear i atom forces */ fix2 = 0; fiy2 = 0; fiz2 = 0; fix3 = 0; fiy3 = 0; fiz3 = 0; fix4 = 0; fiy4 = 0; fiz4 = 0; for(k=nj0; (k<nj1); k++) { /* Get j neighbor index, and coordinate index */ jnr = jjnr[k]; j3 = 3*jnr; /* load j atom coordinates */ jx1 = pos[j3+0]; jy1 = pos[j3+1]; jz1 = pos[j3+2]; /* Calculate distance */ dx21 = ix2 - jx1; dy21 = iy2 - jy1; dz21 = iz2 - jz1; rsq21 = dx21*dx21+dy21*dy21+dz21*dz21; dx31 = ix3 - jx1; dy31 = iy3 - jy1; dz31 = iz3 - jz1; rsq31 = dx31*dx31+dy31*dy31+dz31*dz31; dx41 = ix4 - jx1; dy41 = iy4 - jy1; dz41 = iz4 - jz1; rsq41 = dx41*dx41+dy41*dy41+dz41*dz41; /* Calculate 1/r and 1/r2 */ rinv21 = gmx_invsqrt(rsq21); rinv31 = gmx_invsqrt(rsq31); rinv41 = gmx_invsqrt(rsq41); /* Load parameters for j atom */ jq = charge[jnr+0]; qq = qH*jq; rinvsq = rinv21*rinv21; /* Coulomb interaction */ vcoul = qq*rinv21; vctot = vctot+vcoul; fscal = (vcoul)*rinvsq; /* Calculate temporary vectorial force */ tx = fscal*dx21; ty = fscal*dy21; tz = fscal*dz21; /* Increment i atom force */ fix2 = fix2 + tx; fiy2 = fiy2 + ty; fiz2 = fiz2 + tz; /* Decrement j atom force */ fjx1 = faction[j3+0] - tx; fjy1 = faction[j3+1] - ty; fjz1 = faction[j3+2] - tz; /* Load parameters for j atom */ rinvsq = rinv31*rinv31; /* Coulomb interaction */ vcoul = qq*rinv31; vctot = vctot+vcoul; fscal = (vcoul)*rinvsq; /* Calculate temporary vectorial force */ tx = fscal*dx31; ty = fscal*dy31; tz = fscal*dz31; /* Increment i atom force */ fix3 = fix3 + tx; fiy3 = fiy3 + ty; fiz3 = fiz3 + tz; /* Decrement j atom force */ fjx1 = fjx1 - tx; fjy1 = fjy1 - ty; fjz1 = fjz1 - tz; /* Load parameters for j atom */ qq = qM*jq; rinvsq = rinv41*rinv41; /* Coulomb interaction */ vcoul = qq*rinv41; vctot = vctot+vcoul; fscal = (vcoul)*rinvsq; /* Calculate temporary vectorial force */ tx = fscal*dx41; ty = fscal*dy41; tz = fscal*dz41; /* Increment i atom force */ fix4 = fix4 + tx; fiy4 = fiy4 + ty; fiz4 = fiz4 + tz; /* Decrement j atom force */ faction[j3+0] = fjx1 - tx; faction[j3+1] = fjy1 - ty; faction[j3+2] = fjz1 - tz; /* Inner loop uses 80 flops/iteration */ } /* Add i forces to mem and shifted force list */ faction[ii3+3] = faction[ii3+3] + fix2; faction[ii3+4] = faction[ii3+4] + fiy2; faction[ii3+5] = faction[ii3+5] + fiz2; faction[ii3+6] = faction[ii3+6] + fix3; faction[ii3+7] = faction[ii3+7] + fiy3; faction[ii3+8] = faction[ii3+8] + fiz3; faction[ii3+9] = faction[ii3+9] + fix4; faction[ii3+10] = faction[ii3+10] + fiy4; faction[ii3+11] = faction[ii3+11] + fiz4; fshift[is3] = fshift[is3]+fix2+fix3+fix4; fshift[is3+1] = fshift[is3+1]+fiy2+fiy3+fiy4; fshift[is3+2] = fshift[is3+2]+fiz2+fiz3+fiz4; /* Add potential energies to the group for this list */ ggid = gid[n]; Vc[ggid] = Vc[ggid] + vctot; /* Increment number of inner iterations */ ninner = ninner + nj1 - nj0; /* Outer loop uses 28 flops/iteration */ } /* Increment number of outer iterations */ nouter = nouter + nn1 - nn0; } while (nn1<nri); /* Write outer/inner iteration count to pointers */ *outeriter = nouter; *inneriter = ninner; }
void gmx_nb_free_energy_kernel(t_nblist * nlist, rvec * xx, rvec * ff, t_forcerec * fr, t_mdatoms * mdatoms, nb_kernel_data_t * kernel_data, t_nrnb * nrnb) { #define STATE_A 0 #define STATE_B 1 #define NSTATES 2 int i, j, n, ii, is3, ii3, k, nj0, nj1, jnr, j3, ggid; real shX, shY, shZ; real Fscal, FscalC[NSTATES], FscalV[NSTATES], tx, ty, tz; real Vcoul[NSTATES], Vvdw[NSTATES]; real rinv6, r, rt, rtC, rtV; real iqA, iqB; real qq[NSTATES], vctot, krsq; int ntiA, ntiB, tj[NSTATES]; real Vvdw6, Vvdw12, vvtot; real ix, iy, iz, fix, fiy, fiz; real dx, dy, dz, rsq, rinv; real c6[NSTATES], c12[NSTATES]; real LFC[NSTATES], LFV[NSTATES], DLF[NSTATES]; double dvdl_coul, dvdl_vdw; real lfac_coul[NSTATES], dlfac_coul[NSTATES], lfac_vdw[NSTATES], dlfac_vdw[NSTATES]; real sigma6[NSTATES], alpha_vdw_eff, alpha_coul_eff, sigma2_def, sigma2_min; real rp, rpm2, rC, rV, rinvC, rpinvC, rinvV, rpinvV; real sigma2[NSTATES], sigma_pow[NSTATES], sigma_powm2[NSTATES], rs, rs2; int do_coultab, do_vdwtab, do_tab, tab_elemsize; int n0, n1C, n1V, nnn; real Y, F, G, H, Fp, Geps, Heps2, epsC, eps2C, epsV, eps2V, VV, FF; int icoul, ivdw; int nri; int * iinr; int * jindex; int * jjnr; int * shift; int * gid; int * typeA; int * typeB; int ntype; real * shiftvec; real dvdl_part; real * fshift; real tabscale; real * VFtab; real * x; real * f; real facel, krf, crf; real * chargeA; real * chargeB; real sigma6_min, sigma6_def, lam_power, sc_power, sc_r_power; real alpha_coul, alpha_vdw, lambda_coul, lambda_vdw, ewc; real * nbfp; real * dvdl; real * Vv; real * Vc; gmx_bool bDoForces; real rcoulomb, rvdw, sh_invrc6; gmx_bool bExactElecCutoff, bExactVdwCutoff; real rcutoff, rcutoff2, rswitch, d, d2, swV3, swV4, swV5, swF2, swF3, swF4, sw, dsw, rinvcorr; x = xx[0]; f = ff[0]; fshift = fr->fshift[0]; Vc = kernel_data->energygrp_elec; Vv = kernel_data->energygrp_vdw; tabscale = kernel_data->table_elec_vdw->scale; VFtab = kernel_data->table_elec_vdw->data; nri = nlist->nri; iinr = nlist->iinr; jindex = nlist->jindex; jjnr = nlist->jjnr; icoul = nlist->ielec; ivdw = nlist->ivdw; shift = nlist->shift; gid = nlist->gid; shiftvec = fr->shift_vec[0]; chargeA = mdatoms->chargeA; chargeB = mdatoms->chargeB; facel = fr->epsfac; krf = fr->k_rf; crf = fr->c_rf; ewc = fr->ewaldcoeff; Vc = kernel_data->energygrp_elec; typeA = mdatoms->typeA; typeB = mdatoms->typeB; ntype = fr->ntype; nbfp = fr->nbfp; Vv = kernel_data->energygrp_vdw; tabscale = kernel_data->table_elec_vdw->scale; VFtab = kernel_data->table_elec_vdw->data; lambda_coul = kernel_data->lambda[efptCOUL]; lambda_vdw = kernel_data->lambda[efptVDW]; dvdl = kernel_data->dvdl; alpha_coul = fr->sc_alphacoul; alpha_vdw = fr->sc_alphavdw; lam_power = fr->sc_power; sc_r_power = fr->sc_r_power; sigma6_def = fr->sc_sigma6_def; sigma6_min = fr->sc_sigma6_min; bDoForces = kernel_data->flags & GMX_NONBONDED_DO_FORCE; rcoulomb = fr->rcoulomb; rvdw = fr->rvdw; sh_invrc6 = fr->ic->sh_invrc6; if (fr->coulomb_modifier == eintmodPOTSWITCH || fr->vdw_modifier == eintmodPOTSWITCH) { rcutoff = (fr->coulomb_modifier == eintmodPOTSWITCH) ? fr->rcoulomb : fr->rvdw; rcutoff2 = rcutoff*rcutoff; rswitch = (fr->coulomb_modifier == eintmodPOTSWITCH) ? fr->rcoulomb_switch : fr->rvdw_switch; d = rcutoff-rswitch; swV3 = -10.0/(d*d*d); swV4 = 15.0/(d*d*d*d); swV5 = -6.0/(d*d*d*d*d); swF2 = -30.0/(d*d*d); swF3 = 60.0/(d*d*d*d); swF4 = -30.0/(d*d*d*d*d); } else { /* Stupid compilers dont realize these variables will not be used */ rswitch = 0.0; swV3 = 0.0; swV4 = 0.0; swV5 = 0.0; swF2 = 0.0; swF3 = 0.0; swF4 = 0.0; } bExactElecCutoff = (fr->coulomb_modifier != eintmodNONE) || fr->eeltype == eelRF_ZERO; bExactVdwCutoff = (fr->vdw_modifier != eintmodNONE); /* fix compiler warnings */ nj1 = 0; n1C = n1V = 0; epsC = epsV = 0; eps2C = eps2V = 0; dvdl_coul = 0; dvdl_vdw = 0; /* Lambda factor for state A, 1-lambda*/ LFC[STATE_A] = 1.0 - lambda_coul; LFV[STATE_A] = 1.0 - lambda_vdw; /* Lambda factor for state B, lambda*/ LFC[STATE_B] = lambda_coul; LFV[STATE_B] = lambda_vdw; /*derivative of the lambda factor for state A and B */ DLF[STATE_A] = -1; DLF[STATE_B] = 1; for (i = 0; i < NSTATES; i++) { lfac_coul[i] = (lam_power == 2 ? (1-LFC[i])*(1-LFC[i]) : (1-LFC[i])); dlfac_coul[i] = DLF[i]*lam_power/sc_r_power*(lam_power == 2 ? (1-LFC[i]) : 1); lfac_vdw[i] = (lam_power == 2 ? (1-LFV[i])*(1-LFV[i]) : (1-LFV[i])); dlfac_vdw[i] = DLF[i]*lam_power/sc_r_power*(lam_power == 2 ? (1-LFV[i]) : 1); } /* precalculate */ sigma2_def = pow(sigma6_def, 1.0/3.0); sigma2_min = pow(sigma6_min, 1.0/3.0); /* Ewald (not PME) table is special (icoul==enbcoulFEWALD) */ do_coultab = (icoul == GMX_NBKERNEL_ELEC_CUBICSPLINETABLE); do_vdwtab = (ivdw == GMX_NBKERNEL_VDW_CUBICSPLINETABLE); do_tab = do_coultab || do_vdwtab; /* we always use the combined table here */ tab_elemsize = 12; for (n = 0; (n < nri); n++) { is3 = 3*shift[n]; shX = shiftvec[is3]; shY = shiftvec[is3+1]; shZ = shiftvec[is3+2]; nj0 = jindex[n]; nj1 = jindex[n+1]; ii = iinr[n]; ii3 = 3*ii; ix = shX + x[ii3+0]; iy = shY + x[ii3+1]; iz = shZ + x[ii3+2]; iqA = facel*chargeA[ii]; iqB = facel*chargeB[ii]; ntiA = 2*ntype*typeA[ii]; ntiB = 2*ntype*typeB[ii]; vctot = 0; vvtot = 0; fix = 0; fiy = 0; fiz = 0; for (k = nj0; (k < nj1); k++) { jnr = jjnr[k]; j3 = 3*jnr; dx = ix - x[j3]; dy = iy - x[j3+1]; dz = iz - x[j3+2]; rsq = dx*dx+dy*dy+dz*dz; rinv = gmx_invsqrt(rsq); r = rsq*rinv; if (sc_r_power == 6.0) { rpm2 = rsq*rsq; /* r4 */ rp = rpm2*rsq; /* r6 */ } else if (sc_r_power == 48.0) { rp = rsq*rsq*rsq; /* r6 */ rp = rp*rp; /* r12 */ rp = rp*rp; /* r24 */ rp = rp*rp; /* r48 */ rpm2 = rp/rsq; /* r46 */ } else { rp = pow(r, sc_r_power); /* not currently supported as input, but can handle it */ rpm2 = rp/rsq; } tj[STATE_A] = ntiA+2*typeA[jnr]; tj[STATE_B] = ntiB+2*typeB[jnr]; qq[STATE_A] = iqA*chargeA[jnr]; qq[STATE_B] = iqB*chargeB[jnr]; for (i = 0; i < NSTATES; i++) { c6[i] = nbfp[tj[i]]; c12[i] = nbfp[tj[i]+1]; if ((c6[i] > 0) && (c12[i] > 0)) { /* c12 is stored scaled with 12.0 and c6 is scaled with 6.0 - correct for this */ sigma6[i] = 0.5*c12[i]/c6[i]; sigma2[i] = pow(sigma6[i], 1.0/3.0); /* should be able to get rid of this ^^^ internal pow call eventually. Will require agreement on what data to store externally. Can't be fixed without larger scale changes, so not 4.6 */ if (sigma6[i] < sigma6_min) /* for disappearing coul and vdw with soft core at the same time */ { sigma6[i] = sigma6_min; sigma2[i] = sigma2_min; } } else { sigma6[i] = sigma6_def; sigma2[i] = sigma2_def; } if (sc_r_power == 6.0) { sigma_pow[i] = sigma6[i]; sigma_powm2[i] = sigma6[i]/sigma2[i]; } else if (sc_r_power == 48.0) { sigma_pow[i] = sigma6[i]*sigma6[i]; /* sigma^12 */ sigma_pow[i] = sigma_pow[i]*sigma_pow[i]; /* sigma^24 */ sigma_pow[i] = sigma_pow[i]*sigma_pow[i]; /* sigma^48 */ sigma_powm2[i] = sigma_pow[i]/sigma2[i]; } else { /* not really supported as input, but in here for testing the general case*/ sigma_pow[i] = pow(sigma2[i], sc_r_power/2); sigma_powm2[i] = sigma_pow[i]/(sigma2[i]); } } /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/ if ((c12[STATE_A] > 0) && (c12[STATE_B] > 0)) { alpha_vdw_eff = 0; alpha_coul_eff = 0; } else { alpha_vdw_eff = alpha_vdw; alpha_coul_eff = alpha_coul; } for (i = 0; i < NSTATES; i++) { FscalC[i] = 0; FscalV[i] = 0; Vcoul[i] = 0; Vvdw[i] = 0; /* Only spend time on A or B state if it is non-zero */ if ( (qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0) ) { /* this section has to be inside the loop becaue of the dependence on sigma_pow */ rpinvC = 1.0/(alpha_coul_eff*lfac_coul[i]*sigma_pow[i]+rp); rinvC = pow(rpinvC, 1.0/sc_r_power); rC = 1.0/rinvC; rpinvV = 1.0/(alpha_vdw_eff*lfac_vdw[i]*sigma_pow[i]+rp); rinvV = pow(rpinvV, 1.0/sc_r_power); rV = 1.0/rinvV; if (do_tab) { rtC = rC*tabscale; n0 = rtC; epsC = rtC-n0; eps2C = epsC*epsC; n1C = tab_elemsize*n0; rtV = rV*tabscale; n0 = rtV; epsV = rtV-n0; eps2V = epsV*epsV; n1V = tab_elemsize*n0; } /* With Ewald and soft-core we should put the cut-off on r, * not on the soft-cored rC, as the real-space and * reciprocal space contributions should (almost) cancel. */ if (qq[i] != 0 && !(bExactElecCutoff && ((icoul != GMX_NBKERNEL_ELEC_EWALD && rC >= rcoulomb) || (icoul == GMX_NBKERNEL_ELEC_EWALD && r >= rcoulomb)))) { switch (icoul) { case GMX_NBKERNEL_ELEC_COULOMB: case GMX_NBKERNEL_ELEC_EWALD: /* simple cutoff (yes, ewald is done all on direct space for free energy) */ Vcoul[i] = qq[i]*rinvC; FscalC[i] = Vcoul[i]*rpinvC; break; case GMX_NBKERNEL_ELEC_REACTIONFIELD: /* reaction-field */ Vcoul[i] = qq[i]*(rinvC+krf*rC*rC-crf); FscalC[i] = qq[i]*(rinvC*rpinvC-2.0*krf); break; case GMX_NBKERNEL_ELEC_CUBICSPLINETABLE: /* non-Ewald tabulated coulomb */ nnn = n1C; Y = VFtab[nnn]; F = VFtab[nnn+1]; Geps = epsC*VFtab[nnn+2]; Heps2 = eps2C*VFtab[nnn+3]; Fp = F+Geps+Heps2; VV = Y+epsC*Fp; FF = Fp+Geps+2.0*Heps2; Vcoul[i] = qq[i]*VV; FscalC[i] = -qq[i]*tabscale*FF*rC*rpinvC; break; default: FscalC[i] = 0.0; Vcoul[i] = 0.0; break; } if (fr->coulomb_modifier == eintmodPOTSWITCH) { d = rC-rswitch; d = (d > 0.0) ? d : 0.0; d2 = d*d; sw = 1.0+d2*d*(swV3+d*(swV4+d*swV5)); dsw = d2*(swF2+d*(swF3+d*swF4)); Vcoul[i] *= sw; FscalC[i] = FscalC[i]*sw + Vcoul[i]*dsw; } } if ((c6[i] != 0 || c12[i] != 0) && !(bExactVdwCutoff && rV >= rvdw)) { switch (ivdw) { case GMX_NBKERNEL_VDW_LENNARDJONES: /* cutoff LJ */ if (sc_r_power == 6.0) { rinv6 = rpinvV; } else { rinv6 = pow(rinvV, 6.0); } Vvdw6 = c6[i]*rinv6; Vvdw12 = c12[i]*rinv6*rinv6; if (fr->vdw_modifier == eintmodPOTSHIFT) { Vvdw[i] = ( (Vvdw12-c12[i]*sh_invrc6*sh_invrc6)*(1.0/12.0) -(Vvdw6-c6[i]*sh_invrc6)*(1.0/6.0)); } else { Vvdw[i] = Vvdw12*(1.0/12.0)-Vvdw6*(1.0/6.0); } FscalV[i] = (Vvdw12-Vvdw6)*rpinvV; break; case GMX_NBKERNEL_VDW_BUCKINGHAM: gmx_fatal(FARGS, "Buckingham free energy not supported."); break; case GMX_NBKERNEL_VDW_CUBICSPLINETABLE: /* Table LJ */ nnn = n1V+4; /* dispersion */ Y = VFtab[nnn]; F = VFtab[nnn+1]; Geps = epsV*VFtab[nnn+2]; Heps2 = eps2V*VFtab[nnn+3]; Fp = F+Geps+Heps2; VV = Y+epsV*Fp; FF = Fp+Geps+2.0*Heps2; Vvdw[i] += c6[i]*VV; FscalV[i] -= c6[i]*tabscale*FF*rV*rpinvV; /* repulsion */ Y = VFtab[nnn+4]; F = VFtab[nnn+5]; Geps = epsV*VFtab[nnn+6]; Heps2 = eps2V*VFtab[nnn+7]; Fp = F+Geps+Heps2; VV = Y+epsV*Fp; FF = Fp+Geps+2.0*Heps2; Vvdw[i] += c12[i]*VV; FscalV[i] -= c12[i]*tabscale*FF*rV*rpinvV; break; default: Vvdw[i] = 0.0; FscalV[i] = 0.0; break; } if (fr->vdw_modifier == eintmodPOTSWITCH) { d = rV-rswitch; d = (d > 0.0) ? d : 0.0; d2 = d*d; sw = 1.0+d2*d*(swV3+d*(swV4+d*swV5)); dsw = d2*(swF2+d*(swF3+d*swF4)); Vvdw[i] *= sw; FscalV[i] = FscalV[i]*sw + Vvdw[i]*dsw; FscalV[i] = (rV < rvdw) ? FscalV[i] : 0.0; Vvdw[i] = (rV < rvdw) ? Vvdw[i] : 0.0; } } } } Fscal = 0; if (icoul == GMX_NBKERNEL_ELEC_EWALD && !(bExactElecCutoff && r >= rcoulomb)) { /* because we compute the softcore normally, we have to remove the ewald short range portion. Done outside of the states loop because this part doesn't depend on the scaled R */ #ifdef GMX_DOUBLE /* Relative accuracy at R_ERF_R_INACC of 3e-10 */ #define R_ERF_R_INACC 0.006 #else /* Relative accuracy at R_ERF_R_INACC of 2e-5 */ #define R_ERF_R_INACC 0.1 #endif if (ewc*r > R_ERF_R_INACC) { VV = gmx_erf(ewc*r)*rinv; FF = rinv*rinv*(VV - ewc*M_2_SQRTPI*exp(-ewc*ewc*rsq)); } else { VV = ewc*M_2_SQRTPI; FF = ewc*ewc*ewc*M_2_SQRTPI*(2.0/3.0 - 0.4*ewc*ewc*rsq); } for (i = 0; i < NSTATES; i++) { vctot -= LFC[i]*qq[i]*VV; Fscal -= LFC[i]*qq[i]*FF; dvdl_coul -= (DLF[i]*qq[i])*VV; } } /* Assemble A and B states */ for (i = 0; i < NSTATES; i++) { vctot += LFC[i]*Vcoul[i]; vvtot += LFV[i]*Vvdw[i]; Fscal += LFC[i]*FscalC[i]*rpm2; Fscal += LFV[i]*FscalV[i]*rpm2; dvdl_coul += Vcoul[i]*DLF[i] + LFC[i]*alpha_coul_eff*dlfac_coul[i]*FscalC[i]*sigma_pow[i]; dvdl_vdw += Vvdw[i]*DLF[i] + LFV[i]*alpha_vdw_eff*dlfac_vdw[i]*FscalV[i]*sigma_pow[i]; } if (bDoForces) { tx = Fscal*dx; ty = Fscal*dy; tz = Fscal*dz; fix = fix + tx; fiy = fiy + ty; fiz = fiz + tz; f[j3] = f[j3] - tx; f[j3+1] = f[j3+1] - ty; f[j3+2] = f[j3+2] - tz; } } if (bDoForces) { f[ii3] = f[ii3] + fix; f[ii3+1] = f[ii3+1] + fiy; f[ii3+2] = f[ii3+2] + fiz; fshift[is3] = fshift[is3] + fix; fshift[is3+1] = fshift[is3+1] + fiy; fshift[is3+2] = fshift[is3+2] + fiz; } ggid = gid[n]; Vc[ggid] = Vc[ggid] + vctot; Vv[ggid] = Vv[ggid] + vvtot; } dvdl[efptCOUL] += dvdl_coul; dvdl[efptVDW] += dvdl_vdw; /* Estimate flops, average for free energy stuff: * 12 flops per outer iteration * 150 flops per inner iteration */ inc_nrnb(nrnb, eNR_NBKERNEL_FREE_ENERGY, nlist->nri*12 + nlist->jindex[n]*150); }
real ewald_LRcorrection(FILE *fplog, int start, int end, t_commrec *cr, int thread, t_forcerec *fr, real *chargeA, real *chargeB, gmx_bool calc_excl_corr, t_blocka *excl, rvec x[], matrix box, rvec mu_tot[], int ewald_geometry, real epsilon_surface, rvec *f, tensor vir, real lambda, real *dvdlambda) { int i, i1, i2, j, k, m, iv, jv, q; atom_id *AA; double q2sumA, q2sumB, Vexcl, dvdl_excl; /* Necessary for precision */ real one_4pi_eps; real v, vc, qiA, qiB, dr, dr2, rinv, fscal, enercorr; real Vself[2], Vdipole[2], rinv2, ewc = fr->ewaldcoeff, ewcdr; rvec df, dx, mutot[2], dipcorrA, dipcorrB; tensor dxdf; real vol = box[XX][XX]*box[YY][YY]*box[ZZ][ZZ]; real L1, dipole_coeff, qqA, qqB, qqL, vr0; /*#define TABLES*/ #ifdef TABLES real tabscale = fr->tabscale; real eps, eps2, VV, FF, F, Y, Geps, Heps2, Fp, fijC, r1t; real *VFtab = fr->coulvdwtab; int n0, n1, nnn; #endif gmx_bool bFreeEnergy = (chargeB != NULL); gmx_bool bMolPBC = fr->bMolPBC; one_4pi_eps = ONE_4PI_EPS0/fr->epsilon_r; vr0 = ewc*M_2_SQRTPI; AA = excl->a; Vexcl = 0; dvdl_excl = 0; q2sumA = 0; q2sumB = 0; Vdipole[0] = 0; Vdipole[1] = 0; L1 = 1.0-lambda; /* Note that we have to transform back to gromacs units, since * mu_tot contains the dipole in debye units (for output). */ for (i = 0; (i < DIM); i++) { mutot[0][i] = mu_tot[0][i]*DEBYE2ENM; mutot[1][i] = mu_tot[1][i]*DEBYE2ENM; dipcorrA[i] = 0; dipcorrB[i] = 0; } dipole_coeff = 0; switch (ewald_geometry) { case eewg3D: if (epsilon_surface != 0) { dipole_coeff = 2*M_PI*ONE_4PI_EPS0/((2*epsilon_surface + fr->epsilon_r)*vol); for (i = 0; (i < DIM); i++) { dipcorrA[i] = 2*dipole_coeff*mutot[0][i]; dipcorrB[i] = 2*dipole_coeff*mutot[1][i]; } } break; case eewg3DC: dipole_coeff = 2*M_PI*one_4pi_eps/vol; dipcorrA[ZZ] = 2*dipole_coeff*mutot[0][ZZ]; dipcorrB[ZZ] = 2*dipole_coeff*mutot[1][ZZ]; break; default: gmx_incons("Unsupported Ewald geometry"); break; } if (debug) { fprintf(debug, "dipcorr = %8.3f %8.3f %8.3f\n", dipcorrA[XX], dipcorrA[YY], dipcorrA[ZZ]); fprintf(debug, "mutot = %8.3f %8.3f %8.3f\n", mutot[0][XX], mutot[0][YY], mutot[0][ZZ]); } clear_mat(dxdf); if ((calc_excl_corr || dipole_coeff != 0) && !bFreeEnergy) { for (i = start; (i < end); i++) { /* Initiate local variables (for this i-particle) to 0 */ qiA = chargeA[i]*one_4pi_eps; if (calc_excl_corr) { i1 = excl->index[i]; i2 = excl->index[i+1]; /* Loop over excluded neighbours */ for (j = i1; (j < i2); j++) { k = AA[j]; /* * First we must test whether k <> i, and then, because the * exclusions are all listed twice i->k and k->i we must select * just one of the two. * As a minor optimization we only compute forces when the charges * are non-zero. */ if (k > i) { qqA = qiA*chargeA[k]; if (qqA != 0.0) { rvec_sub(x[i], x[k], dx); if (bMolPBC) { /* Cheap pbc_dx, assume excluded pairs are at short distance. */ for (m = DIM-1; (m >= 0); m--) { if (dx[m] > 0.5*box[m][m]) { rvec_dec(dx, box[m]); } else if (dx[m] < -0.5*box[m][m]) { rvec_inc(dx, box[m]); } } } dr2 = norm2(dx); /* Distance between two excluded particles may be zero in the * case of shells */ if (dr2 != 0) { rinv = gmx_invsqrt(dr2); rinv2 = rinv*rinv; dr = 1.0/rinv; #ifdef TABLES r1t = tabscale*dr; n0 = r1t; assert(n0 >= 3); n1 = 12*n0; eps = r1t-n0; eps2 = eps*eps; nnn = n1; Y = VFtab[nnn]; F = VFtab[nnn+1]; Geps = eps*VFtab[nnn+2]; Heps2 = eps2*VFtab[nnn+3]; Fp = F+Geps+Heps2; VV = Y+eps*Fp; FF = Fp+Geps+2.0*Heps2; vc = qqA*(rinv-VV); fijC = qqA*FF; Vexcl += vc; fscal = vc*rinv2+fijC*tabscale*rinv; /* End of tabulated interaction part */ #else /* This is the code you would want instead if not using * tables: */ ewcdr = ewc*dr; vc = qqA*gmx_erf(ewcdr)*rinv; Vexcl += vc; #ifdef GMX_DOUBLE /* Relative accuracy at R_ERF_R_INACC of 3e-10 */ #define R_ERF_R_INACC 0.006 #else /* Relative accuracy at R_ERF_R_INACC of 2e-5 */ #define R_ERF_R_INACC 0.1 #endif if (ewcdr > R_ERF_R_INACC) { fscal = rinv2*(vc - qqA*ewc*M_2_SQRTPI*exp(-ewcdr*ewcdr)); } else { /* Use a fourth order series expansion for small ewcdr */ fscal = ewc*ewc*qqA*vr0*(2.0/3.0 - 0.4*ewcdr*ewcdr); } #endif /* The force vector is obtained by multiplication with the * distance vector */ svmul(fscal, dx, df); rvec_inc(f[k], df); rvec_dec(f[i], df); for (iv = 0; (iv < DIM); iv++) { for (jv = 0; (jv < DIM); jv++) { dxdf[iv][jv] += dx[iv]*df[jv]; } } } else { Vexcl += qqA*vr0; } } } } } /* Dipole correction on force */ if (dipole_coeff != 0) { for (j = 0; (j < DIM); j++) { f[i][j] -= dipcorrA[j]*chargeA[i]; } } } } else if (calc_excl_corr || dipole_coeff != 0) { for (i = start; (i < end); i++) { /* Initiate local variables (for this i-particle) to 0 */ qiA = chargeA[i]*one_4pi_eps; qiB = chargeB[i]*one_4pi_eps; if (calc_excl_corr) { i1 = excl->index[i]; i2 = excl->index[i+1]; /* Loop over excluded neighbours */ for (j = i1; (j < i2); j++) { k = AA[j]; if (k > i) { qqA = qiA*chargeA[k]; qqB = qiB*chargeB[k]; if (qqA != 0.0 || qqB != 0.0) { qqL = L1*qqA + lambda*qqB; rvec_sub(x[i], x[k], dx); if (bMolPBC) { /* Cheap pbc_dx, assume excluded pairs are at short distance. */ for (m = DIM-1; (m >= 0); m--) { if (dx[m] > 0.5*box[m][m]) { rvec_dec(dx, box[m]); } else if (dx[m] < -0.5*box[m][m]) { rvec_inc(dx, box[m]); } } } dr2 = norm2(dx); if (dr2 != 0) { rinv = gmx_invsqrt(dr2); rinv2 = rinv*rinv; dr = 1.0/rinv; v = gmx_erf(ewc*dr)*rinv; vc = qqL*v; Vexcl += vc; fscal = rinv2*(vc-qqL*ewc*M_2_SQRTPI*exp(-ewc*ewc*dr2)); svmul(fscal, dx, df); rvec_inc(f[k], df); rvec_dec(f[i], df); for (iv = 0; (iv < DIM); iv++) { for (jv = 0; (jv < DIM); jv++) { dxdf[iv][jv] += dx[iv]*df[jv]; } } dvdl_excl += (qqB - qqA)*v; } else { Vexcl += qqL*vr0; dvdl_excl += (qqB - qqA)*vr0; } } } } } /* Dipole correction on force */ if (dipole_coeff != 0) { for (j = 0; (j < DIM); j++) { f[i][j] -= L1*dipcorrA[j]*chargeA[i] + lambda*dipcorrB[j]*chargeB[i]; } } } } for (iv = 0; (iv < DIM); iv++) { for (jv = 0; (jv < DIM); jv++) { vir[iv][jv] += 0.5*dxdf[iv][jv]; } } Vself[0] = 0; Vself[1] = 0; /* Global corrections only on master process */ if (MASTER(cr) && thread == 0) { for (q = 0; q < (bFreeEnergy ? 2 : 1); q++) { if (calc_excl_corr) { /* Self-energy correction */ Vself[q] = ewc*one_4pi_eps*fr->q2sum[q]*M_1_SQRTPI; } /* Apply surface dipole correction: * correction = dipole_coeff * (dipole)^2 */ if (dipole_coeff != 0) { if (ewald_geometry == eewg3D) { Vdipole[q] = dipole_coeff*iprod(mutot[q], mutot[q]); } else if (ewald_geometry == eewg3DC) { Vdipole[q] = dipole_coeff*mutot[q][ZZ]*mutot[q][ZZ]; } } } } if (!bFreeEnergy) { enercorr = Vdipole[0] - Vself[0] - Vexcl; } else { enercorr = L1*(Vdipole[0] - Vself[0]) + lambda*(Vdipole[1] - Vself[1]) - Vexcl; *dvdlambda += Vdipole[1] - Vself[1] - (Vdipole[0] - Vself[0]) - dvdl_excl; } if (debug) { fprintf(debug, "Long Range corrections for Ewald interactions:\n"); fprintf(debug, "start=%d,natoms=%d\n", start, end-start); fprintf(debug, "q2sum = %g, Vself=%g\n", L1*q2sumA+lambda*q2sumB, L1*Vself[0]+lambda*Vself[1]); fprintf(debug, "Long Range correction: Vexcl=%g\n", Vexcl); if (MASTER(cr) && thread == 0) { if (epsilon_surface > 0 || ewald_geometry == eewg3DC) { fprintf(debug, "Total dipole correction: Vdipole=%g\n", L1*Vdipole[0]+lambda*Vdipole[1]); } } } /* Return the correction to the energy */ return enercorr; }
void set_lincs_matrix(struct gmx_lincsdata *li,real *invmass,real lambda) { int i,a1,a2,n,k,sign,center; int end,nk,kk; const real invsqrt2=0.7071067811865475244; for(i=0; (i<li->nc); i++) { a1 = li->bla[2*i]; a2 = li->bla[2*i+1]; li->blc[i] = gmx_invsqrt(invmass[a1] + invmass[a2]); li->blc1[i] = invsqrt2; } /* Construct the coupling coefficient matrix blmf */ li->ntriangle = 0; li->ncc_triangle = 0; for(i=0; (i<li->nc); i++) { a1 = li->bla[2*i]; a2 = li->bla[2*i+1]; for(n=li->blnr[i]; (n<li->blnr[i+1]); n++) { k = li->blbnb[n]; if (a1 == li->bla[2*k] || a2 == li->bla[2*k+1]) { sign = -1; } else { sign = 1; } if (a1 == li->bla[2*k] || a1 == li->bla[2*k+1]) { center = a1; end = a2; } else { center = a2; end = a1; } li->blmf[n] = sign*invmass[center]*li->blc[i]*li->blc[k]; li->blmf1[n] = sign*0.5; if (li->ncg_triangle > 0) { /* Look for constraint triangles */ for(nk=li->blnr[k]; (nk<li->blnr[k+1]); nk++) { kk = li->blbnb[nk]; if (kk != i && kk != k && (li->bla[2*kk] == end || li->bla[2*kk+1] == end)) { if (li->ntriangle == 0 || li->triangle[li->ntriangle-1] < i) { /* Add this constraint to the triangle list */ li->triangle[li->ntriangle] = i; li->tri_bits[li->ntriangle] = 0; li->ntriangle++; if (li->blnr[i+1] - li->blnr[i] > sizeof(li->tri_bits[0])*8 - 1) { gmx_fatal(FARGS,"A constraint is connected to %d constraints, this is more than the %d allowed for constraints participating in triangles", li->blnr[i+1] - li->blnr[i], sizeof(li->tri_bits[0])*8-1); } } li->tri_bits[li->ntriangle-1] |= (1<<(n-li->blnr[i])); li->ncc_triangle++; } } } } } if (debug) { fprintf(debug,"Of the %d constraints %d participate in triangles\n", li->nc,li->ntriangle); fprintf(debug,"There are %d couplings of which %d in triangles\n", li->ncc,li->ncc_triangle); } /* Set matlam, * so we know with which lambda value the masses have been set. */ li->matlam = lambda; }
static void do_lincs(rvec *x,rvec *xp,matrix box,t_pbc *pbc, struct gmx_lincsdata *lincsd,real *invmass, t_commrec *cr, real wangle,int *warn, real invdt,rvec *v, gmx_bool bCalcVir,tensor rmdr) { int b,i,j,k,n,iter; real tmp0,tmp1,tmp2,im1,im2,mvb,rlen,len,len2,dlen2,wfac,lam; rvec dx; int ncons,*bla,*blnr,*blbnb; rvec *r; real *blc,*blmf,*bllen,*blcc,*rhs1,*rhs2,*sol,*lambda; int *nlocat; ncons = lincsd->nc; bla = lincsd->bla; r = lincsd->tmpv; blnr = lincsd->blnr; blbnb = lincsd->blbnb; blc = lincsd->blc; blmf = lincsd->blmf; bllen = lincsd->bllen; blcc = lincsd->tmpncc; rhs1 = lincsd->tmp1; rhs2 = lincsd->tmp2; sol = lincsd->tmp3; lambda = lincsd->lambda; if (DOMAINDECOMP(cr) && cr->dd->constraints) { nlocat = dd_constraints_nlocalatoms(cr->dd); } else if (PARTDECOMP(cr)) { nlocat = pd_constraints_nlocalatoms(cr->pd); } else { nlocat = NULL; } *warn = 0; if (pbc) { /* Compute normalized i-j vectors */ for(b=0; b<ncons; b++) { pbc_dx_aiuc(pbc,x[bla[2*b]],x[bla[2*b+1]],dx); unitv(dx,r[b]); } for(b=0; b<ncons; b++) { for(n=blnr[b]; n<blnr[b+1]; n++) { blcc[n] = blmf[n]*iprod(r[b],r[blbnb[n]]); } pbc_dx_aiuc(pbc,xp[bla[2*b]],xp[bla[2*b+1]],dx); mvb = blc[b]*(iprod(r[b],dx) - bllen[b]); rhs1[b] = mvb; sol[b] = mvb; } } else { /* Compute normalized i-j vectors */ for(b=0; b<ncons; b++) { i = bla[2*b]; j = bla[2*b+1]; tmp0 = x[i][0] - x[j][0]; tmp1 = x[i][1] - x[j][1]; tmp2 = x[i][2] - x[j][2]; rlen = gmx_invsqrt(tmp0*tmp0+tmp1*tmp1+tmp2*tmp2); r[b][0] = rlen*tmp0; r[b][1] = rlen*tmp1; r[b][2] = rlen*tmp2; } /* 16 ncons flops */ for(b=0; b<ncons; b++) { tmp0 = r[b][0]; tmp1 = r[b][1]; tmp2 = r[b][2]; len = bllen[b]; i = bla[2*b]; j = bla[2*b+1]; for(n=blnr[b]; n<blnr[b+1]; n++) { k = blbnb[n]; blcc[n] = blmf[n]*(tmp0*r[k][0] + tmp1*r[k][1] + tmp2*r[k][2]); } /* 6 nr flops */ mvb = blc[b]*(tmp0*(xp[i][0] - xp[j][0]) + tmp1*(xp[i][1] - xp[j][1]) + tmp2*(xp[i][2] - xp[j][2]) - len); rhs1[b] = mvb; sol[b] = mvb; /* 10 flops */ } /* Together: 26*ncons + 6*nrtot flops */ } lincs_matrix_expand(lincsd,blcc,rhs1,rhs2,sol); /* nrec*(ncons+2*nrtot) flops */ for(b=0; b<ncons; b++) { i = bla[2*b]; j = bla[2*b+1]; mvb = blc[b]*sol[b]; lambda[b] = -mvb; im1 = invmass[i]; im2 = invmass[j]; tmp0 = r[b][0]*mvb; tmp1 = r[b][1]*mvb; tmp2 = r[b][2]*mvb; xp[i][0] -= tmp0*im1; xp[i][1] -= tmp1*im1; xp[i][2] -= tmp2*im1; xp[j][0] += tmp0*im2; xp[j][1] += tmp1*im2; xp[j][2] += tmp2*im2; } /* 16 ncons flops */ /* ******** Correction for centripetal effects ******** */ wfac = cos(DEG2RAD*wangle); wfac = wfac*wfac; for(iter=0; iter<lincsd->nIter; iter++) { if (DOMAINDECOMP(cr) && cr->dd->constraints) { /* Communicate the corrected non-local coordinates */ dd_move_x_constraints(cr->dd,box,xp,NULL); } else if (PARTDECOMP(cr)) { pd_move_x_constraints(cr,xp,NULL); } for(b=0; b<ncons; b++) { len = bllen[b]; if (pbc) { pbc_dx_aiuc(pbc,xp[bla[2*b]],xp[bla[2*b+1]],dx); } else { rvec_sub(xp[bla[2*b]],xp[bla[2*b+1]],dx); } len2 = len*len; dlen2 = 2*len2 - norm2(dx); if (dlen2 < wfac*len2 && (nlocat==NULL || nlocat[b])) { *warn = b; } if (dlen2 > 0) { mvb = blc[b]*(len - dlen2*gmx_invsqrt(dlen2)); } else { mvb = blc[b]*len; } rhs1[b] = mvb; sol[b] = mvb; } /* 20*ncons flops */ lincs_matrix_expand(lincsd,blcc,rhs1,rhs2,sol); /* nrec*(ncons+2*nrtot) flops */ for(b=0; b<ncons; b++) { i = bla[2*b]; j = bla[2*b+1]; lam = lambda[b]; mvb = blc[b]*sol[b]; lambda[b] = lam - mvb; im1 = invmass[i]; im2 = invmass[j]; tmp0 = r[b][0]*mvb; tmp1 = r[b][1]*mvb; tmp2 = r[b][2]*mvb; xp[i][0] -= tmp0*im1; xp[i][1] -= tmp1*im1; xp[i][2] -= tmp2*im1; xp[j][0] += tmp0*im2; xp[j][1] += tmp1*im2; xp[j][2] += tmp2*im2; } /* 17 ncons flops */ } /* nit*ncons*(37+9*nrec) flops */ if (v) { /* Correct the velocities */ for(b=0; b<ncons; b++) { i = bla[2*b]; j = bla[2*b+1]; im1 = invmass[i]*lambda[b]*invdt; im2 = invmass[j]*lambda[b]*invdt; v[i][0] += im1*r[b][0]; v[i][1] += im1*r[b][1]; v[i][2] += im1*r[b][2]; v[j][0] -= im2*r[b][0]; v[j][1] -= im2*r[b][1]; v[j][2] -= im2*r[b][2]; } /* 16 ncons flops */ } if (nlocat) { /* Only account for local atoms */ for(b=0; b<ncons; b++) { lambda[b] *= 0.5*nlocat[b]; } } if (bCalcVir) { /* Constraint virial */ for(b=0; b<ncons; b++) { tmp0 = bllen[b]*lambda[b]; for(i=0; i<DIM; i++) { tmp1 = tmp0*r[b][i]; for(j=0; j<DIM; j++) { rmdr[i][j] -= tmp1*r[b][j]; } } } /* 22 ncons flops */ } /* Total: * 26*ncons + 6*nrtot + nrec*(ncons+2*nrtot) * + nit * (20*ncons + nrec*(ncons+2*nrtot) + 17 ncons) * * (26+nrec)*ncons + (6+2*nrec)*nrtot * + nit * ((37+nrec)*ncons + 2*nrec*nrtot) * if nit=1 * (63+nrec)*ncons + (6+4*nrec)*nrtot */ }
/* * Gromacs nonbonded kernel pf_nb_kernel304 * Coulomb interaction: Tabulated * VdW interaction: Not calculated * water optimization: pairs of TIP4P interactions * Calculate forces: yes */ void pf_nb_kernel304( int * p_nri, int * iinr, int * jindex, int * jjnr, int * shift, real * shiftvec, real * fshift, int * gid, real * pos, real * faction, real * charge, real * p_facel, real * p_krf, real * p_crf, real * Vc, int * type, int * p_ntype, real * vdwparam, real * Vvdw, real * p_tabscale, real * VFtab, real * invsqrta, real * dvda, real * p_gbtabscale, real * GBtab, int * p_nthreads, int * count, void * mtx, int * outeriter, int * inneriter, real * work, t_pf_global * pf_global) { int nri,ntype,nthreads; real facel,krf,crf,tabscale,gbtabscale; int n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid; int nn0,nn1,nouter,ninner; real shX,shY,shZ; real fscal,tx,ty,tz; real qq,vcoul,vctot; real r,rt,eps,eps2; int n0,nnn; real Y,F,Geps,Heps2,Fp,VV; real FF; real fijC; real ix2,iy2,iz2,fix2,fiy2,fiz2; real ix3,iy3,iz3,fix3,fiy3,fiz3; real ix4,iy4,iz4,fix4,fiy4,fiz4; real jx2,jy2,jz2,fjx2,fjy2,fjz2; real jx3,jy3,jz3,fjx3,fjy3,fjz3; real jx4,jy4,jz4,fjx4,fjy4,fjz4; real dx22,dy22,dz22,rsq22,rinv22; real dx23,dy23,dz23,rsq23,rinv23; real dx24,dy24,dz24,rsq24,rinv24; real dx32,dy32,dz32,rsq32,rinv32; real dx33,dy33,dz33,rsq33,rinv33; real dx34,dy34,dz34,rsq34,rinv34; real dx42,dy42,dz42,rsq42,rinv42; real dx43,dy43,dz43,rsq43,rinv43; real dx44,dy44,dz44,rsq44,rinv44; real qH,qM,qqMM,qqMH,qqHH; nri = *p_nri; ntype = *p_ntype; nthreads = *p_nthreads; facel = *p_facel; krf = *p_krf; crf = *p_crf; tabscale = *p_tabscale; /* Initialize water data */ ii = iinr[0]; qH = charge[ii+1]; qM = charge[ii+3]; qqMM = facel*qM*qM; qqMH = facel*qM*qH; qqHH = facel*qH*qH; /* Reset outer and inner iteration counters */ nouter = 0; ninner = 0; /* Loop over thread workunits */ do { #ifdef GMX_THREAD_SHM_FDECOMP tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx); nn0 = *count; /* Take successively smaller chunks (at least 10 lists) */ nn1 = nn0+(nri-nn0)/(2*nthreads)+10; *count = nn1; tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx); if(nn1>nri) nn1=nri; #else nn0 = 0; nn1 = nri; #endif /* Start outer loop over neighborlists */ for(n=nn0; (n<nn1); n++) { /* Load shift vector for this list */ is3 = 3*shift[n]; shX = shiftvec[is3]; shY = shiftvec[is3+1]; shZ = shiftvec[is3+2]; /* Load limits for loop over neighbors */ nj0 = jindex[n]; nj1 = jindex[n+1]; /* Get outer coordinate index */ ii = iinr[n]; ii3 = 3*ii; /* Load i atom data, add shift vector */ ix2 = shX + pos[ii3+3]; iy2 = shY + pos[ii3+4]; iz2 = shZ + pos[ii3+5]; ix3 = shX + pos[ii3+6]; iy3 = shY + pos[ii3+7]; iz3 = shZ + pos[ii3+8]; ix4 = shX + pos[ii3+9]; iy4 = shY + pos[ii3+10]; iz4 = shZ + pos[ii3+11]; /* Zero the potential energy for this list */ vctot = 0; /* Clear i atom forces */ fix2 = 0; fiy2 = 0; fiz2 = 0; fix3 = 0; fiy3 = 0; fiz3 = 0; fix4 = 0; fiy4 = 0; fiz4 = 0; for(k=nj0; (k<nj1); k++) { /* Get j neighbor index, and coordinate index */ jnr = jjnr[k]; j3 = 3*jnr; /* load j atom coordinates */ jx2 = pos[j3+3]; jy2 = pos[j3+4]; jz2 = pos[j3+5]; jx3 = pos[j3+6]; jy3 = pos[j3+7]; jz3 = pos[j3+8]; jx4 = pos[j3+9]; jy4 = pos[j3+10]; jz4 = pos[j3+11]; /* Calculate distance */ dx22 = ix2 - jx2; dy22 = iy2 - jy2; dz22 = iz2 - jz2; rsq22 = dx22*dx22+dy22*dy22+dz22*dz22; dx23 = ix2 - jx3; dy23 = iy2 - jy3; dz23 = iz2 - jz3; rsq23 = dx23*dx23+dy23*dy23+dz23*dz23; dx24 = ix2 - jx4; dy24 = iy2 - jy4; dz24 = iz2 - jz4; rsq24 = dx24*dx24+dy24*dy24+dz24*dz24; dx32 = ix3 - jx2; dy32 = iy3 - jy2; dz32 = iz3 - jz2; rsq32 = dx32*dx32+dy32*dy32+dz32*dz32; dx33 = ix3 - jx3; dy33 = iy3 - jy3; dz33 = iz3 - jz3; rsq33 = dx33*dx33+dy33*dy33+dz33*dz33; dx34 = ix3 - jx4; dy34 = iy3 - jy4; dz34 = iz3 - jz4; rsq34 = dx34*dx34+dy34*dy34+dz34*dz34; dx42 = ix4 - jx2; dy42 = iy4 - jy2; dz42 = iz4 - jz2; rsq42 = dx42*dx42+dy42*dy42+dz42*dz42; dx43 = ix4 - jx3; dy43 = iy4 - jy3; dz43 = iz4 - jz3; rsq43 = dx43*dx43+dy43*dy43+dz43*dz43; dx44 = ix4 - jx4; dy44 = iy4 - jy4; dz44 = iz4 - jz4; rsq44 = dx44*dx44+dy44*dy44+dz44*dz44; /* Calculate 1/r and 1/r2 */ rinv22 = gmx_invsqrt(rsq22); rinv23 = gmx_invsqrt(rsq23); rinv24 = gmx_invsqrt(rsq24); rinv32 = gmx_invsqrt(rsq32); rinv33 = gmx_invsqrt(rsq33); rinv34 = gmx_invsqrt(rsq34); rinv42 = gmx_invsqrt(rsq42); rinv43 = gmx_invsqrt(rsq43); rinv44 = gmx_invsqrt(rsq44); /* Load parameters for j atom */ qq = qqHH; /* Calculate table index */ r = rsq22*rinv22; /* Calculate table index */ rt = r*tabscale; n0 = rt; eps = rt-n0; eps2 = eps*eps; nnn = 4*n0; /* Tabulated coulomb interaction */ Y = VFtab[nnn]; F = VFtab[nnn+1]; Geps = eps*VFtab[nnn+2]; Heps2 = eps2*VFtab[nnn+3]; Fp = F+Geps+Heps2; VV = Y+eps*Fp; FF = Fp+Geps+2.0*Heps2; vcoul = qq*VV; fijC = qq*FF; vctot = vctot + vcoul; fscal = -((fijC)*tabscale)*rinv22; /* Calculate temporary vectorial force */ tx = fscal*dx22; ty = fscal*dy22; tz = fscal*dz22; /* Increment i atom force */ fix2 = fix2 + tx; fiy2 = fiy2 + ty; fiz2 = fiz2 + tz; /* Decrement j atom force */ fjx2 = faction[j3+3] - tx; fjy2 = faction[j3+4] - ty; fjz2 = faction[j3+5] - tz; /* pairwise forces */ if (pf_global->bInitialized) pf_atom_add_nonbonded_single(pf_global, ii+1, jnr+1, PF_INTER_COULOMB, fscal, dx22, dy22, dz22); /* Load parameters for j atom */ qq = qqHH; /* Calculate table index */ r = rsq23*rinv23; /* Calculate table index */ rt = r*tabscale; n0 = rt; eps = rt-n0; eps2 = eps*eps; nnn = 4*n0; /* Tabulated coulomb interaction */ Y = VFtab[nnn]; F = VFtab[nnn+1]; Geps = eps*VFtab[nnn+2]; Heps2 = eps2*VFtab[nnn+3]; Fp = F+Geps+Heps2; VV = Y+eps*Fp; FF = Fp+Geps+2.0*Heps2; vcoul = qq*VV; fijC = qq*FF; vctot = vctot + vcoul; fscal = -((fijC)*tabscale)*rinv23; /* Calculate temporary vectorial force */ tx = fscal*dx23; ty = fscal*dy23; tz = fscal*dz23; /* Increment i atom force */ fix2 = fix2 + tx; fiy2 = fiy2 + ty; fiz2 = fiz2 + tz; /* Decrement j atom force */ fjx3 = faction[j3+6] - tx; fjy3 = faction[j3+7] - ty; fjz3 = faction[j3+8] - tz; /* pairwise forces */ if (pf_global->bInitialized) pf_atom_add_nonbonded_single(pf_global, ii+1, jnr+2, PF_INTER_COULOMB, fscal, dx23, dy23, dz23); /* Load parameters for j atom */ qq = qqMH; /* Calculate table index */ r = rsq24*rinv24; /* Calculate table index */ rt = r*tabscale; n0 = rt; eps = rt-n0; eps2 = eps*eps; nnn = 4*n0; /* Tabulated coulomb interaction */ Y = VFtab[nnn]; F = VFtab[nnn+1]; Geps = eps*VFtab[nnn+2]; Heps2 = eps2*VFtab[nnn+3]; Fp = F+Geps+Heps2; VV = Y+eps*Fp; FF = Fp+Geps+2.0*Heps2; vcoul = qq*VV; fijC = qq*FF; vctot = vctot + vcoul; fscal = -((fijC)*tabscale)*rinv24; /* Calculate temporary vectorial force */ tx = fscal*dx24; ty = fscal*dy24; tz = fscal*dz24; /* Increment i atom force */ fix2 = fix2 + tx; fiy2 = fiy2 + ty; fiz2 = fiz2 + tz; /* Decrement j atom force */ fjx4 = faction[j3+9] - tx; fjy4 = faction[j3+10] - ty; fjz4 = faction[j3+11] - tz; /* pairwise forces */ if (pf_global->bInitialized) pf_atom_add_nonbonded_single(pf_global, ii+1, jnr+3, PF_INTER_COULOMB, fscal, dx24, dy24, dz24); /* Load parameters for j atom */ qq = qqHH; /* Calculate table index */ r = rsq32*rinv32; /* Calculate table index */ rt = r*tabscale; n0 = rt; eps = rt-n0; eps2 = eps*eps; nnn = 4*n0; /* Tabulated coulomb interaction */ Y = VFtab[nnn]; F = VFtab[nnn+1]; Geps = eps*VFtab[nnn+2]; Heps2 = eps2*VFtab[nnn+3]; Fp = F+Geps+Heps2; VV = Y+eps*Fp; FF = Fp+Geps+2.0*Heps2; vcoul = qq*VV; fijC = qq*FF; vctot = vctot + vcoul; fscal = -((fijC)*tabscale)*rinv32; /* Calculate temporary vectorial force */ tx = fscal*dx32; ty = fscal*dy32; tz = fscal*dz32; /* Increment i atom force */ fix3 = fix3 + tx; fiy3 = fiy3 + ty; fiz3 = fiz3 + tz; /* Decrement j atom force */ fjx2 = fjx2 - tx; fjy2 = fjy2 - ty; fjz2 = fjz2 - tz; /* pairwise forces */ if (pf_global->bInitialized) pf_atom_add_nonbonded_single(pf_global, ii+2, jnr+1, PF_INTER_COULOMB, fscal, dx32, dy32, dz32); /* Load parameters for j atom */ qq = qqHH; /* Calculate table index */ r = rsq33*rinv33; /* Calculate table index */ rt = r*tabscale; n0 = rt; eps = rt-n0; eps2 = eps*eps; nnn = 4*n0; /* Tabulated coulomb interaction */ Y = VFtab[nnn]; F = VFtab[nnn+1]; Geps = eps*VFtab[nnn+2]; Heps2 = eps2*VFtab[nnn+3]; Fp = F+Geps+Heps2; VV = Y+eps*Fp; FF = Fp+Geps+2.0*Heps2; vcoul = qq*VV; fijC = qq*FF; vctot = vctot + vcoul; fscal = -((fijC)*tabscale)*rinv33; /* Calculate temporary vectorial force */ tx = fscal*dx33; ty = fscal*dy33; tz = fscal*dz33; /* Increment i atom force */ fix3 = fix3 + tx; fiy3 = fiy3 + ty; fiz3 = fiz3 + tz; /* Decrement j atom force */ fjx3 = fjx3 - tx; fjy3 = fjy3 - ty; fjz3 = fjz3 - tz; /* pairwise forces */ if (pf_global->bInitialized) pf_atom_add_nonbonded_single(pf_global, ii+2, jnr+2, PF_INTER_COULOMB, fscal, dx33, dy33, dz33); /* Load parameters for j atom */ qq = qqMH; /* Calculate table index */ r = rsq34*rinv34; /* Calculate table index */ rt = r*tabscale; n0 = rt; eps = rt-n0; eps2 = eps*eps; nnn = 4*n0; /* Tabulated coulomb interaction */ Y = VFtab[nnn]; F = VFtab[nnn+1]; Geps = eps*VFtab[nnn+2]; Heps2 = eps2*VFtab[nnn+3]; Fp = F+Geps+Heps2; VV = Y+eps*Fp; FF = Fp+Geps+2.0*Heps2; vcoul = qq*VV; fijC = qq*FF; vctot = vctot + vcoul; fscal = -((fijC)*tabscale)*rinv34; /* Calculate temporary vectorial force */ tx = fscal*dx34; ty = fscal*dy34; tz = fscal*dz34; /* Increment i atom force */ fix3 = fix3 + tx; fiy3 = fiy3 + ty; fiz3 = fiz3 + tz; /* Decrement j atom force */ fjx4 = fjx4 - tx; fjy4 = fjy4 - ty; fjz4 = fjz4 - tz; /* pairwise forces */ if (pf_global->bInitialized) pf_atom_add_nonbonded_single(pf_global, ii+2, jnr+3, PF_INTER_COULOMB, fscal, dx34, dy34, dz34); /* Load parameters for j atom */ qq = qqMH; /* Calculate table index */ r = rsq42*rinv42; /* Calculate table index */ rt = r*tabscale; n0 = rt; eps = rt-n0; eps2 = eps*eps; nnn = 4*n0; /* Tabulated coulomb interaction */ Y = VFtab[nnn]; F = VFtab[nnn+1]; Geps = eps*VFtab[nnn+2]; Heps2 = eps2*VFtab[nnn+3]; Fp = F+Geps+Heps2; VV = Y+eps*Fp; FF = Fp+Geps+2.0*Heps2; vcoul = qq*VV; fijC = qq*FF; vctot = vctot + vcoul; fscal = -((fijC)*tabscale)*rinv42; /* Calculate temporary vectorial force */ tx = fscal*dx42; ty = fscal*dy42; tz = fscal*dz42; /* Increment i atom force */ fix4 = fix4 + tx; fiy4 = fiy4 + ty; fiz4 = fiz4 + tz; /* Decrement j atom force */ faction[j3+3] = fjx2 - tx; faction[j3+4] = fjy2 - ty; faction[j3+5] = fjz2 - tz; /* pairwise forces */ if (pf_global->bInitialized) pf_atom_add_nonbonded_single(pf_global, ii+3, jnr+1, PF_INTER_COULOMB, fscal, dx42, dy42, dz42); /* Load parameters for j atom */ qq = qqMH; /* Calculate table index */ r = rsq43*rinv43; /* Calculate table index */ rt = r*tabscale; n0 = rt; eps = rt-n0; eps2 = eps*eps; nnn = 4*n0; /* Tabulated coulomb interaction */ Y = VFtab[nnn]; F = VFtab[nnn+1]; Geps = eps*VFtab[nnn+2]; Heps2 = eps2*VFtab[nnn+3]; Fp = F+Geps+Heps2; VV = Y+eps*Fp; FF = Fp+Geps+2.0*Heps2; vcoul = qq*VV; fijC = qq*FF; vctot = vctot + vcoul; fscal = -((fijC)*tabscale)*rinv43; /* Calculate temporary vectorial force */ tx = fscal*dx43; ty = fscal*dy43; tz = fscal*dz43; /* Increment i atom force */ fix4 = fix4 + tx; fiy4 = fiy4 + ty; fiz4 = fiz4 + tz; /* Decrement j atom force */ faction[j3+6] = fjx3 - tx; faction[j3+7] = fjy3 - ty; faction[j3+8] = fjz3 - tz; /* pairwise forces */ if (pf_global->bInitialized) pf_atom_add_nonbonded_single(pf_global, ii+3, jnr+2, PF_INTER_COULOMB, fscal, dx43, dy43, dz43); /* Load parameters for j atom */ qq = qqMM; /* Calculate table index */ r = rsq44*rinv44; /* Calculate table index */ rt = r*tabscale; n0 = rt; eps = rt-n0; eps2 = eps*eps; nnn = 4*n0; /* Tabulated coulomb interaction */ Y = VFtab[nnn]; F = VFtab[nnn+1]; Geps = eps*VFtab[nnn+2]; Heps2 = eps2*VFtab[nnn+3]; Fp = F+Geps+Heps2; VV = Y+eps*Fp; FF = Fp+Geps+2.0*Heps2; vcoul = qq*VV; fijC = qq*FF; vctot = vctot + vcoul; fscal = -((fijC)*tabscale)*rinv44; /* Calculate temporary vectorial force */ tx = fscal*dx44; ty = fscal*dy44; tz = fscal*dz44; /* Increment i atom force */ fix4 = fix4 + tx; fiy4 = fiy4 + ty; fiz4 = fiz4 + tz; /* Decrement j atom force */ faction[j3+9] = fjx4 - tx; faction[j3+10] = fjy4 - ty; faction[j3+11] = fjz4 - tz; /* pairwise forces */ if (pf_global->bInitialized) pf_atom_add_nonbonded_single(pf_global, ii+3, jnr+3, PF_INTER_COULOMB, fscal, dx44, dy44, dz44); /* Inner loop uses 369 flops/iteration */ } /* Add i forces to mem and shifted force list */ faction[ii3+3] = faction[ii3+3] + fix2; faction[ii3+4] = faction[ii3+4] + fiy2; faction[ii3+5] = faction[ii3+5] + fiz2; faction[ii3+6] = faction[ii3+6] + fix3; faction[ii3+7] = faction[ii3+7] + fiy3; faction[ii3+8] = faction[ii3+8] + fiz3; faction[ii3+9] = faction[ii3+9] + fix4; faction[ii3+10] = faction[ii3+10] + fiy4; faction[ii3+11] = faction[ii3+11] + fiz4; fshift[is3] = fshift[is3]+fix2+fix3+fix4; fshift[is3+1] = fshift[is3+1]+fiy2+fiy3+fiy4; fshift[is3+2] = fshift[is3+2]+fiz2+fiz3+fiz4; /* Add potential energies to the group for this list */ ggid = gid[n]; Vc[ggid] = Vc[ggid] + vctot; /* Increment number of inner iterations */ ninner = ninner + nj1 - nj0; /* Outer loop uses 28 flops/iteration */ } /* Increment number of outer iterations */ nouter = nouter + nn1 - nn0; } while (nn1<nri); /* Write outer/inner iteration count to pointers */ *outeriter = nouter; *inneriter = ninner; }
void nb_kernel_allvsall(t_forcerec * fr, t_mdatoms * mdatoms, t_blocka * excl, real * x, real * f, real * Vc, real * Vvdw, int * outeriter, int * inneriter, void * work) { gmx_allvsall_data_t *aadata; int natoms; int ni0,ni1; int nj0,nj1,nj2; int i,j,k; real * charge; int * type; real facel; real * pvdw; int ggid; int * mask; real ix,iy,iz,iq; real fix,fiy,fiz; real jx,jy,jz,qq; real dx,dy,dz; real tx,ty,tz; real rsq,rinv,rinvsq,rinvsix; real vcoul,vctot; real c6,c12,Vvdw6,Vvdw12,Vvdwtot; real fscal; charge = mdatoms->chargeA; type = mdatoms->typeA; facel = fr->epsfac; natoms = mdatoms->nr; ni0 = mdatoms->start; ni1 = mdatoms->start+mdatoms->homenr; aadata = *((gmx_allvsall_data_t **)work); if(aadata==NULL) { setup_aadata(&aadata,excl,natoms,type,fr->ntype,fr->nbfp); *((gmx_allvsall_data_t **)work) = aadata; } for(i=ni0; i<ni1; i++) { /* We assume shifts are NOT used for all-vs-all interactions */ /* Load i atom data */ ix = x[3*i]; iy = x[3*i+1]; iz = x[3*i+2]; iq = facel*charge[i]; pvdw = aadata->pvdwparam[type[i]]; /* Zero the potential energy for this list */ Vvdwtot = 0.0; vctot = 0.0; /* Clear i atom forces */ fix = 0.0; fiy = 0.0; fiz = 0.0; /* Load limits for loop over neighbors */ nj0 = aadata->jindex[3*i]; nj1 = aadata->jindex[3*i+1]; nj2 = aadata->jindex[3*i+2]; mask = aadata->exclusion_mask[i]; /* Prologue part, including exclusion mask */ for(j=nj0; j<nj1; j++,mask++) { if(*mask!=0) { k = j%natoms; /* load j atom coordinates */ jx = x[3*k]; jy = x[3*k+1]; jz = x[3*k+2]; /* Calculate distance */ dx = ix - jx; dy = iy - jy; dz = iz - jz; rsq = dx*dx+dy*dy+dz*dz; /* Calculate 1/r and 1/r2 */ rinv = gmx_invsqrt(rsq); rinvsq = rinv*rinv; /* Load parameters for j atom */ qq = iq*charge[k]; c6 = pvdw[2*k]; c12 = pvdw[2*k+1]; /* Coulomb interaction */ vcoul = qq*rinv; vctot = vctot+vcoul; /* Lennard-Jones interaction */ rinvsix = rinvsq*rinvsq*rinvsq; Vvdw6 = c6*rinvsix; Vvdw12 = c12*rinvsix*rinvsix; Vvdwtot = Vvdwtot+Vvdw12-Vvdw6; fscal = (vcoul+12.0*Vvdw12-6.0*Vvdw6)*rinvsq; /* Calculate temporary vectorial force */ tx = fscal*dx; ty = fscal*dy; tz = fscal*dz; /* Increment i atom force */ fix = fix + tx; fiy = fiy + ty; fiz = fiz + tz; /* Decrement j atom force */ f[3*k] = f[3*k] - tx; f[3*k+1] = f[3*k+1] - ty; f[3*k+2] = f[3*k+2] - tz; } /* Inner loop uses 38 flops/iteration */ } /* Main part, no exclusions */ for(j=nj1; j<nj2; j++) { k = j%natoms; /* load j atom coordinates */ jx = x[3*k]; jy = x[3*k+1]; jz = x[3*k+2]; /* Calculate distance */ dx = ix - jx; dy = iy - jy; dz = iz - jz; rsq = dx*dx+dy*dy+dz*dz; /* Calculate 1/r and 1/r2 */ rinv = gmx_invsqrt(rsq); rinvsq = rinv*rinv; /* Load parameters for j atom */ qq = iq*charge[k]; c6 = pvdw[2*k]; c12 = pvdw[2*k+1]; /* Coulomb interaction */ vcoul = qq*rinv; vctot = vctot+vcoul; /* Lennard-Jones interaction */ rinvsix = rinvsq*rinvsq*rinvsq; Vvdw6 = c6*rinvsix; Vvdw12 = c12*rinvsix*rinvsix; Vvdwtot = Vvdwtot+Vvdw12-Vvdw6; fscal = (vcoul+12.0*Vvdw12-6.0*Vvdw6)*rinvsq; /* Calculate temporary vectorial force */ tx = fscal*dx; ty = fscal*dy; tz = fscal*dz; /* Increment i atom force */ fix = fix + tx; fiy = fiy + ty; fiz = fiz + tz; /* Decrement j atom force */ f[3*k] = f[3*k] - tx; f[3*k+1] = f[3*k+1] - ty; f[3*k+2] = f[3*k+2] - tz; /* Inner loop uses 38 flops/iteration */ } f[3*i] += fix; f[3*i+1] += fiy; f[3*i+2] += fiz; /* Add potential energies to the group for this list */ ggid = 0; Vc[ggid] = Vc[ggid] + vctot; Vvdw[ggid] = Vvdw[ggid] + Vvdwtot; /* Outer loop uses 6 flops/iteration */ } /* Write outer/inner iteration count to pointers */ *outeriter = ni1-ni0; *inneriter = (ni1-ni0)*natoms/2; }
real ta_disres(int nfa,const t_iatom forceatoms[],const t_iparams ip[], const rvec x[],rvec f[],rvec fshift[], const t_pbc *pbc,const t_graph *g, real lambda,real *dvdlambda, const t_mdatoms *md,t_fcdata *fcd, int *global_atom_index) { const real sixth=1.0/6.0; const real seven_three=7.0/3.0; atom_id ai,aj; int fa,res,npair,p,pair,ki=CENTRAL,m; int type; rvec dx; real weight_rt_1; real smooth_fc,Rt,Rtav,rt2,*Rtl_6,*Rt_6,*Rtav_6; real k0,f_scal=0,fmax_scal,fk_scal,fij; real tav_viol,instant_viol,mixed_viol,violtot,vtot; real tav_viol_Rtav7,instant_viol_Rtav7; real up1,up2,low; gmx_bool bConservative,bMixed,bViolation; ivec it,jt,dt; t_disresdata *dd; int dr_weighting; gmx_bool dr_bMixed; dd = &(fcd->disres); dr_weighting = dd->dr_weighting; dr_bMixed = dd->dr_bMixed; Rtl_6 = dd->Rtl_6; Rt_6 = dd->Rt_6; Rtav_6 = dd->Rtav_6; tav_viol=instant_viol=mixed_viol=tav_viol_Rtav7=instant_viol_Rtav7=0; smooth_fc = dd->dr_fc; if (dd->dr_tau != 0) { /* scaling factor to smoothly turn on the restraint forces * * when using time averaging */ smooth_fc *= (1.0 - dd->exp_min_t_tau); } violtot = 0; vtot = 0; /* 'loop' over all atom pairs (pair_nr=fa/3) involved in restraints, * * the total number of atoms pairs is nfa/3 */ res = 0; fa = 0; while (fa < nfa) { type = forceatoms[fa]; /* Take action depending on restraint, calculate scalar force */ npair = ip[type].disres.npair; up1 = ip[type].disres.up1; up2 = ip[type].disres.up2; low = ip[type].disres.low; k0 = smooth_fc*ip[type].disres.kfac; /* save some flops when there is only one pair */ if (ip[type].disres.type != 2) { bConservative = (dr_weighting == edrwConservative) && (npair > 1); bMixed = dr_bMixed; Rt = pow(Rt_6[res],-sixth); Rtav = pow(Rtav_6[res],-sixth); } else { /* When rtype=2 use instantaneous not ensemble avereged distance */ bConservative = (npair > 1); bMixed = FALSE; Rt = pow(Rtl_6[res],-sixth); Rtav = Rt; } if (Rtav > up1) { bViolation = TRUE; tav_viol = Rtav - up1; } else if (Rtav < low) { bViolation = TRUE; tav_viol = Rtav - low; } else { bViolation = FALSE; } if (bViolation) { /* NOTE: * there is no real potential when time averaging is applied */ vtot += 0.5*k0*sqr(tav_viol); if (1/vtot == 0) { printf("vtot is inf: %f\n",vtot); } if (!bMixed) { f_scal = -k0*tav_viol; violtot += fabs(tav_viol); } else { if (Rt > up1) { if (tav_viol > 0) { instant_viol = Rt - up1; } else { bViolation = FALSE; } } else if (Rt < low) { if (tav_viol < 0) { instant_viol = Rt - low; } else { bViolation = FALSE; } } else { bViolation = FALSE; } if (bViolation) { mixed_viol = sqrt(tav_viol*instant_viol); f_scal = -k0*mixed_viol; violtot += mixed_viol; } } } if (bViolation) { fmax_scal = -k0*(up2-up1); /* Correct the force for the number of restraints */ if (bConservative) { f_scal = max(f_scal,fmax_scal); if (!bMixed) { f_scal *= Rtav/Rtav_6[res]; } else { f_scal /= 2*mixed_viol; tav_viol_Rtav7 = tav_viol*Rtav/Rtav_6[res]; instant_viol_Rtav7 = instant_viol*Rt/Rt_6[res]; } } else { f_scal /= (real)npair; f_scal = max(f_scal,fmax_scal); } /* Exert the force ... */ /* Loop over the atom pairs of 'this' restraint */ for(p=0; p<npair; p++) { pair = fa/3; ai = forceatoms[fa+1]; aj = forceatoms[fa+2]; if (pbc) { ki = pbc_dx_aiuc(pbc,x[ai],x[aj],dx); } else { rvec_sub(x[ai],x[aj],dx); } rt2 = iprod(dx,dx); weight_rt_1 = gmx_invsqrt(rt2); if (bConservative) { if (!dr_bMixed) { weight_rt_1 *= pow(dd->rm3tav[pair],seven_three); } else { weight_rt_1 *= tav_viol_Rtav7*pow(dd->rm3tav[pair],seven_three)+ instant_viol_Rtav7*pow(dd->rt[pair],-7); } } fk_scal = f_scal*weight_rt_1; if (g) { ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,aj),dt); ki=IVEC2IS(dt); } for(m=0; m<DIM; m++) { fij = fk_scal*dx[m]; f[ai][m] += fij; f[aj][m] -= fij; fshift[ki][m] += fij; fshift[CENTRAL][m] -= fij; } fa += 3; } } else { /* No violation so force and potential contributions */ fa += 3*npair; } res++; } dd->sumviol = violtot; /* Return energy */ return vtot; }
/* Compute factors for restricted dihedral potential * For explanations on formula used see file "restcbt.h" */ void compute_factors_restrdihs(int type, const t_iparams forceparams[], rvec delta_ante, rvec delta_crnt, rvec delta_post, real *factor_phi_ai_ante, real *factor_phi_ai_crnt, real *factor_phi_ai_post, real *factor_phi_aj_ante, real *factor_phi_aj_crnt, real *factor_phi_aj_post, real *factor_phi_ak_ante, real *factor_phi_ak_crnt, real *factor_phi_ak_post, real *factor_phi_al_ante, real *factor_phi_al_crnt, real *factor_phi_al_post, real *prefactor_phi, real *v) { real phi0, cosine_phi0; real k_torsion; real c_self_ante, c_self_crnt, c_self_post; real c_cros_ante, c_cros_acrs, c_cros_post; real c_prod, d_post, d_ante; real sine_phi_sq, cosine_phi; real delta_cosine, term_phi_phi0; real ratio_phi_ante, ratio_phi_post; real norm_phi; /* Read parameters phi0 and k_torsion */ phi0 = forceparams[type].pdihs.phiA * DEG2RAD; cosine_phi0 = cos(phi0); k_torsion = forceparams[type].pdihs.cpA; /* Computation of the cosine of the dihedral angle. The scalar ("dot") product method * is used. c_*_* cummulate the scalar products of the differences of particles * positions while c_prod, d_ante and d_post are differences of products of scalar * terms that are parts of the derivatives of forces */ c_self_ante = iprod(delta_ante, delta_ante); c_self_crnt = iprod(delta_crnt, delta_crnt); c_self_post = iprod(delta_post, delta_post); c_cros_ante = iprod(delta_ante, delta_crnt); c_cros_acrs = iprod(delta_ante, delta_post); c_cros_post = iprod(delta_crnt, delta_post); c_prod = c_cros_ante * c_cros_post - c_self_crnt * c_cros_acrs; d_ante = c_self_ante * c_self_crnt - c_cros_ante * c_cros_ante; d_post = c_self_post * c_self_crnt - c_cros_post * c_cros_post; /* When three consecutive beads align, we obtain values close to zero. * Here we avoid small values to prevent round-off errors. */ if (d_ante < GMX_REAL_EPS) { d_ante = GMX_REAL_EPS; } if (d_post < GMX_REAL_EPS) { d_post = GMX_REAL_EPS; } /* Computes the square of the sinus of phi in sine_phi_sq */ norm_phi = gmx_invsqrt(d_ante * d_post); cosine_phi = c_prod * norm_phi; sine_phi_sq = 1.0 - cosine_phi * cosine_phi; /* It is possible that cosine_phi is slightly bigger than 1.0 due to round-off errors. */ if (sine_phi_sq < 0.0) { sine_phi_sq = 0.0; } /* Computation of the differences of cosines (delta_cosine) and a term (term_phi_phi0) * that is part of the common prefactor_phi */ delta_cosine = cosine_phi - cosine_phi0; term_phi_phi0 = 1 - cosine_phi * cosine_phi0; /* Computation of ratios */ ratio_phi_ante = c_prod / d_ante; ratio_phi_post = c_prod / d_post; /* Computation of the prefactor - common term for all forces */ *prefactor_phi = -(k_torsion) * delta_cosine * norm_phi * term_phi_phi0 / (sine_phi_sq * sine_phi_sq); /* Computation of force factors. Factors factor_phi_* are coming from the * derivatives of the torsion angle (phi) with respect to the beads ai, aj, al, ak, * (four) coordinates and they are multiplied in the force computations with the * differences of the particles positions stored in parameters delta_ante, * delta_crnt, delta_post. For formulas see file "restcbt.h" */ *factor_phi_ai_ante = ratio_phi_ante * c_self_crnt; *factor_phi_ai_crnt = -c_cros_post - ratio_phi_ante * c_cros_ante; *factor_phi_ai_post = c_self_crnt; *factor_phi_aj_ante = -c_cros_post - ratio_phi_ante * (c_self_crnt + c_cros_ante); *factor_phi_aj_crnt = c_cros_post + c_cros_acrs * 2.0 + ratio_phi_ante * (c_self_ante + c_cros_ante) + ratio_phi_post * c_self_post; *factor_phi_aj_post = -(c_cros_ante + c_self_crnt) - ratio_phi_post * c_cros_post; *factor_phi_ak_ante = c_cros_post + c_self_crnt + ratio_phi_ante * c_cros_ante; *factor_phi_ak_crnt = -(c_cros_ante + c_cros_acrs * 2.0)- ratio_phi_ante * c_self_ante - ratio_phi_post * (c_self_post + c_cros_post); *factor_phi_ak_post = c_cros_ante + ratio_phi_post * (c_self_crnt + c_cros_post); *factor_phi_al_ante = -c_self_crnt; *factor_phi_al_crnt = c_cros_ante + ratio_phi_post * c_cros_post; *factor_phi_al_post = -ratio_phi_post * c_self_crnt; /* Contribution to energy - see formula in file "restcbt.h"*/ *v = k_torsion * 0.5 * delta_cosine * delta_cosine / sine_phi_sq; }
void calc_disres_R_6(const gmx_multisim_t *ms, int nfa,const t_iatom forceatoms[],const t_iparams ip[], const rvec x[],const t_pbc *pbc, t_fcdata *fcd,history_t *hist) { atom_id ai,aj; int fa,res,i,pair,ki,kj,m; int type,npair,np; rvec dx; real *rt,*rm3tav,*Rtl_6,*Rt_6,*Rtav_6; real rt_1,rt_3,rt2; ivec it,jt,dt; t_disresdata *dd; real ETerm,ETerm1,cf1=0,cf2=0,invn=0; gmx_bool bTav; dd = &(fcd->disres); bTav = (dd->dr_tau != 0); ETerm = dd->ETerm; ETerm1 = dd->ETerm1; rt = dd->rt; rm3tav = dd->rm3tav; Rtl_6 = dd->Rtl_6; Rt_6 = dd->Rt_6; Rtav_6 = dd->Rtav_6; if (bTav) { /* scaling factor to smoothly turn on the restraint forces * * when using time averaging */ dd->exp_min_t_tau = hist->disre_initf*ETerm; cf1 = dd->exp_min_t_tau; cf2 = 1.0/(1.0 - dd->exp_min_t_tau); } if (dd->nsystems > 1) { invn = 1.0/dd->nsystems; } /* 'loop' over all atom pairs (pair_nr=fa/3) involved in restraints, * * the total number of atoms pairs is nfa/3 */ res = 0; fa = 0; while (fa < nfa) { type = forceatoms[fa]; npair = ip[type].disres.npair; Rtav_6[res] = 0.0; Rt_6[res] = 0.0; /* Loop over the atom pairs of 'this' restraint */ np = 0; while (fa < nfa && np < npair) { pair = fa/3; ai = forceatoms[fa+1]; aj = forceatoms[fa+2]; if (pbc) { pbc_dx_aiuc(pbc,x[ai],x[aj],dx); } else { rvec_sub(x[ai],x[aj],dx); } rt2 = iprod(dx,dx); rt_1 = gmx_invsqrt(rt2); rt_3 = rt_1*rt_1*rt_1; rt[pair] = sqrt(rt2); if (bTav) { /* Here we update rm3tav in t_fcdata using the data * in history_t. * Thus the results stay correct when this routine * is called multiple times. */ rm3tav[pair] = cf2*((ETerm - cf1)*hist->disre_rm3tav[pair] + ETerm1*rt_3); } else { rm3tav[pair] = rt_3; } Rt_6[res] += rt_3*rt_3; Rtav_6[res] += rm3tav[pair]*rm3tav[pair]; fa += 3; np++; } if (dd->nsystems > 1) { Rtl_6[res] = Rt_6[res]; Rt_6[res] *= invn; Rtav_6[res] *= invn; } res++; } #ifdef GMX_MPI if (dd->nsystems > 1) { gmx_sum_comm(2*dd->nres,Rt_6,dd->mpi_comm_ensemble); } #endif }
/* * Gromacs nonbonded kernel nb_kernel410 * Coulomb interaction: Generalized-Born * VdW interaction: Lennard-Jones * water optimization: No * Calculate forces: yes */ void nb_kernel410( int * p_nri, int * iinr, int * jindex, int * jjnr, int * shift, real * shiftvec, real * fshift, int * gid, real * pos, real * faction, real * charge, real * p_facel, real * p_krf, real * p_crf, real * Vc, int * type, int * p_ntype, real * vdwparam, real * Vvdw, real * p_tabscale, real * VFtab, real * invsqrta, real * dvda, real * p_gbtabscale, real * GBtab, int * p_nthreads, int * count, void * mtx, int * outeriter, int * inneriter, real * work) { int nri,ntype,nthreads; real facel,krf,crf,tabscale,gbtabscale; int n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid; int nn0,nn1,nouter,ninner; real shX,shY,shZ; real fscal,tx,ty,tz; real rinvsq; real iq; real qq,vcoul,vctot; int nti; int tj; real rinvsix; real Vvdw6,Vvdwtot; real Vvdw12; real r,rt,eps,eps2; int n0,nnn; real Y,F,Geps,Heps2,Fp,VV; real FF; real fijC; real isai,isaj,isaprod,gbscale,vgb,vgbtot; real dvdasum,dvdatmp,dvdaj,fgb; real ix1,iy1,iz1,fix1,fiy1,fiz1; real jx1,jy1,jz1; real dx11,dy11,dz11,rsq11,rinv11; real c6,c12; gmx_gbdata_t *gbdata; real * gpol; real scale_gb; gbdata = (gmx_gbdata_t *)work; gpol = gbdata->gpol; nri = *p_nri; ntype = *p_ntype; nthreads = *p_nthreads; facel = *p_facel; scale_gb = (1.0/gbdata->epsilon_r) - (1.0/gbdata->gb_epsilon_solvent); krf = *p_krf; crf = *p_crf; tabscale = *p_tabscale; gbtabscale = *p_gbtabscale; /* Reset outer and inner iteration counters */ nouter = 0; ninner = 0; /* Loop over thread workunits */ do { #ifdef GMX_THREAD_SHM_FDECOMP tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx); nn0 = *count; /* Take successively smaller chunks (at least 10 lists) */ nn1 = nn0+(nri-nn0)/(2*nthreads)+10; *count = nn1; tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx); if(nn1>nri) nn1=nri; #else nn0 = 0; nn1 = nri; #endif /* Start outer loop over neighborlists */ for(n=nn0; (n<nn1); n++) { /* Load shift vector for this list */ is3 = 3*shift[n]; shX = shiftvec[is3]; shY = shiftvec[is3+1]; shZ = shiftvec[is3+2]; /* Load limits for loop over neighbors */ nj0 = jindex[n]; nj1 = jindex[n+1]; /* Get outer coordinate index */ ii = iinr[n]; ii3 = 3*ii; /* Load i atom data, add shift vector */ ix1 = shX + pos[ii3+0]; iy1 = shY + pos[ii3+1]; iz1 = shZ + pos[ii3+2]; /* Load parameters for i atom */ iq = facel*charge[ii]; isai = invsqrta[ii]; nti = 2*ntype*type[ii]; /* Zero the potential energy for this list */ vctot = 0; Vvdwtot = 0; vgbtot = 0; dvdasum = 0; /* Clear i atom forces */ fix1 = 0; fiy1 = 0; fiz1 = 0; for(k=nj0; (k<nj1); k++) { /* Get j neighbor index, and coordinate index */ jnr = jjnr[k]; j3 = 3*jnr; /* load j atom coordinates */ jx1 = pos[j3+0]; jy1 = pos[j3+1]; jz1 = pos[j3+2]; /* Calculate distance */ dx11 = ix1 - jx1; dy11 = iy1 - jy1; dz11 = iz1 - jz1; rsq11 = dx11*dx11+dy11*dy11+dz11*dz11; /* Calculate 1/r and 1/r2 */ rinv11 = gmx_invsqrt(rsq11); /* Load parameters for j atom */ isaj = invsqrta[jnr]; isaprod = isai*isaj; qq = iq*charge[jnr]; vcoul = qq*rinv11; fscal = vcoul*rinv11; qq = isaprod*(-qq)*scale_gb; gbscale = isaprod*gbtabscale; tj = nti+2*type[jnr]; c6 = vdwparam[tj]; c12 = vdwparam[tj+1]; rinvsq = rinv11*rinv11; /* Tabulated Generalized-Born interaction */ dvdaj = dvda[jnr]; r = rsq11*rinv11; /* Calculate table index */ rt = r*gbscale; n0 = rt; eps = rt-n0; eps2 = eps*eps; nnn = 4*n0; Y = GBtab[nnn]; F = GBtab[nnn+1]; Geps = eps*GBtab[nnn+2]; Heps2 = eps2*GBtab[nnn+3]; Fp = F+Geps+Heps2; VV = Y+eps*Fp; FF = Fp+Geps+2.0*Heps2; vgb = qq*VV; fijC = qq*FF*gbscale; dvdatmp = -0.5*(vgb+fijC*r); dvdasum = dvdasum + dvdatmp; dvda[jnr] = dvdaj+dvdatmp*isaj*isaj; vctot = vctot + vcoul; vgbtot = vgbtot + vgb; /* Lennard-Jones interaction */ rinvsix = rinvsq*rinvsq*rinvsq; Vvdw6 = c6*rinvsix; Vvdw12 = c12*rinvsix*rinvsix; Vvdwtot = Vvdwtot+Vvdw12-Vvdw6; fscal = (12.0*Vvdw12-6.0*Vvdw6)*rinvsq-(fijC-fscal)*rinv11; /* Calculate temporary vectorial force */ tx = fscal*dx11; ty = fscal*dy11; tz = fscal*dz11; /* Increment i atom force */ fix1 = fix1 + tx; fiy1 = fiy1 + ty; fiz1 = fiz1 + tz; /* Decrement j atom force */ faction[j3+0] = faction[j3+0] - tx; faction[j3+1] = faction[j3+1] - ty; faction[j3+2] = faction[j3+2] - tz; /* Inner loop uses 62 flops/iteration */ } /* Add i forces to mem and shifted force list */ faction[ii3+0] = faction[ii3+0] + fix1; faction[ii3+1] = faction[ii3+1] + fiy1; faction[ii3+2] = faction[ii3+2] + fiz1; fshift[is3] = fshift[is3]+fix1; fshift[is3+1] = fshift[is3+1]+fiy1; fshift[is3+2] = fshift[is3+2]+fiz1; /* Add potential energies to the group for this list */ ggid = gid[n]; Vc[ggid] = Vc[ggid] + vctot; gpol[ggid] = gpol[ggid] + vgbtot; Vvdw[ggid] = Vvdw[ggid] + Vvdwtot; dvda[ii] = dvda[ii] + dvdasum*isai*isai; /* Increment number of inner iterations */ ninner = ninner + nj1 - nj0; /* Outer loop uses 13 flops/iteration */ } /* Increment number of outer iterations */ nouter = nouter + nn1 - nn0; } while (nn1<nri); /* Write outer/inner iteration count to pointers */ *outeriter = nouter; *inneriter = ninner; }
/* * Gromacs nonbonded kernel pf_nb_kernel302nf * Coulomb interaction: Tabulated * VdW interaction: Not calculated * water optimization: pairs of SPC/TIP3P interactions * Calculate forces: no */ void pf_nb_kernel302nf( int * p_nri, int * iinr, int * jindex, int * jjnr, int * shift, real * shiftvec, real * fshift, int * gid, real * pos, real * faction, real * charge, real * p_facel, real * p_krf, real * p_crf, real * Vc, int * type, int * p_ntype, real * vdwparam, real * Vvdw, real * p_tabscale, real * VFtab, real * invsqrta, real * dvda, real * p_gbtabscale, real * GBtab, int * p_nthreads, int * count, void * mtx, int * outeriter, int * inneriter, real * work, t_pf_global * pf_global) { int nri,ntype,nthreads; real facel,krf,crf,tabscale,gbtabscale; int n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid; int nn0,nn1,nouter,ninner; real shX,shY,shZ; real qq,vcoul,vctot; real r,rt,eps,eps2; int n0,nnn; real Y,F,Geps,Heps2,Fp,VV; real ix1,iy1,iz1; real ix2,iy2,iz2; real ix3,iy3,iz3; real jx1,jy1,jz1; real jx2,jy2,jz2; real jx3,jy3,jz3; real dx11,dy11,dz11,rsq11,rinv11; real dx12,dy12,dz12,rsq12,rinv12; real dx13,dy13,dz13,rsq13,rinv13; real dx21,dy21,dz21,rsq21,rinv21; real dx22,dy22,dz22,rsq22,rinv22; real dx23,dy23,dz23,rsq23,rinv23; real dx31,dy31,dz31,rsq31,rinv31; real dx32,dy32,dz32,rsq32,rinv32; real dx33,dy33,dz33,rsq33,rinv33; real qO,qH,qqOO,qqOH,qqHH; nri = *p_nri; ntype = *p_ntype; nthreads = *p_nthreads; facel = *p_facel; krf = *p_krf; crf = *p_crf; tabscale = *p_tabscale; /* Initialize water data */ ii = iinr[0]; qO = charge[ii]; qH = charge[ii+1]; qqOO = facel*qO*qO; qqOH = facel*qO*qH; qqHH = facel*qH*qH; /* Reset outer and inner iteration counters */ nouter = 0; ninner = 0; /* Loop over thread workunits */ do { #ifdef GMX_THREAD_SHM_FDECOMP tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx); nn0 = *count; /* Take successively smaller chunks (at least 10 lists) */ nn1 = nn0+(nri-nn0)/(2*nthreads)+10; *count = nn1; tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx); if(nn1>nri) nn1=nri; #else nn0 = 0; nn1 = nri; #endif /* Start outer loop over neighborlists */ for(n=nn0; (n<nn1); n++) { /* Load shift vector for this list */ is3 = 3*shift[n]; shX = shiftvec[is3]; shY = shiftvec[is3+1]; shZ = shiftvec[is3+2]; /* Load limits for loop over neighbors */ nj0 = jindex[n]; nj1 = jindex[n+1]; /* Get outer coordinate index */ ii = iinr[n]; ii3 = 3*ii; /* Load i atom data, add shift vector */ ix1 = shX + pos[ii3+0]; iy1 = shY + pos[ii3+1]; iz1 = shZ + pos[ii3+2]; ix2 = shX + pos[ii3+3]; iy2 = shY + pos[ii3+4]; iz2 = shZ + pos[ii3+5]; ix3 = shX + pos[ii3+6]; iy3 = shY + pos[ii3+7]; iz3 = shZ + pos[ii3+8]; /* Zero the potential energy for this list */ vctot = 0; /* Clear i atom forces */ for(k=nj0; (k<nj1); k++) { /* Get j neighbor index, and coordinate index */ jnr = jjnr[k]; j3 = 3*jnr; /* load j atom coordinates */ jx1 = pos[j3+0]; jy1 = pos[j3+1]; jz1 = pos[j3+2]; jx2 = pos[j3+3]; jy2 = pos[j3+4]; jz2 = pos[j3+5]; jx3 = pos[j3+6]; jy3 = pos[j3+7]; jz3 = pos[j3+8]; /* Calculate distance */ dx11 = ix1 - jx1; dy11 = iy1 - jy1; dz11 = iz1 - jz1; rsq11 = dx11*dx11+dy11*dy11+dz11*dz11; dx12 = ix1 - jx2; dy12 = iy1 - jy2; dz12 = iz1 - jz2; rsq12 = dx12*dx12+dy12*dy12+dz12*dz12; dx13 = ix1 - jx3; dy13 = iy1 - jy3; dz13 = iz1 - jz3; rsq13 = dx13*dx13+dy13*dy13+dz13*dz13; dx21 = ix2 - jx1; dy21 = iy2 - jy1; dz21 = iz2 - jz1; rsq21 = dx21*dx21+dy21*dy21+dz21*dz21; dx22 = ix2 - jx2; dy22 = iy2 - jy2; dz22 = iz2 - jz2; rsq22 = dx22*dx22+dy22*dy22+dz22*dz22; dx23 = ix2 - jx3; dy23 = iy2 - jy3; dz23 = iz2 - jz3; rsq23 = dx23*dx23+dy23*dy23+dz23*dz23; dx31 = ix3 - jx1; dy31 = iy3 - jy1; dz31 = iz3 - jz1; rsq31 = dx31*dx31+dy31*dy31+dz31*dz31; dx32 = ix3 - jx2; dy32 = iy3 - jy2; dz32 = iz3 - jz2; rsq32 = dx32*dx32+dy32*dy32+dz32*dz32; dx33 = ix3 - jx3; dy33 = iy3 - jy3; dz33 = iz3 - jz3; rsq33 = dx33*dx33+dy33*dy33+dz33*dz33; /* Calculate 1/r and 1/r2 */ rinv11 = gmx_invsqrt(rsq11); rinv12 = gmx_invsqrt(rsq12); rinv13 = gmx_invsqrt(rsq13); rinv21 = gmx_invsqrt(rsq21); rinv22 = gmx_invsqrt(rsq22); rinv23 = gmx_invsqrt(rsq23); rinv31 = gmx_invsqrt(rsq31); rinv32 = gmx_invsqrt(rsq32); rinv33 = gmx_invsqrt(rsq33); /* Load parameters for j atom */ qq = qqOO; /* Calculate table index */ r = rsq11*rinv11; /* Calculate table index */ rt = r*tabscale; n0 = rt; eps = rt-n0; eps2 = eps*eps; nnn = 4*n0; /* Tabulated coulomb interaction */ Y = VFtab[nnn]; F = VFtab[nnn+1]; Geps = eps*VFtab[nnn+2]; Heps2 = eps2*VFtab[nnn+3]; Fp = F+Geps+Heps2; VV = Y+eps*Fp; vcoul = qq*VV; vctot = vctot + vcoul; /* Load parameters for j atom */ qq = qqOH; /* Calculate table index */ r = rsq12*rinv12; /* Calculate table index */ rt = r*tabscale; n0 = rt; eps = rt-n0; eps2 = eps*eps; nnn = 4*n0; /* Tabulated coulomb interaction */ Y = VFtab[nnn]; F = VFtab[nnn+1]; Geps = eps*VFtab[nnn+2]; Heps2 = eps2*VFtab[nnn+3]; Fp = F+Geps+Heps2; VV = Y+eps*Fp; vcoul = qq*VV; vctot = vctot + vcoul; /* Load parameters for j atom */ qq = qqOH; /* Calculate table index */ r = rsq13*rinv13; /* Calculate table index */ rt = r*tabscale; n0 = rt; eps = rt-n0; eps2 = eps*eps; nnn = 4*n0; /* Tabulated coulomb interaction */ Y = VFtab[nnn]; F = VFtab[nnn+1]; Geps = eps*VFtab[nnn+2]; Heps2 = eps2*VFtab[nnn+3]; Fp = F+Geps+Heps2; VV = Y+eps*Fp; vcoul = qq*VV; vctot = vctot + vcoul; /* Load parameters for j atom */ qq = qqOH; /* Calculate table index */ r = rsq21*rinv21; /* Calculate table index */ rt = r*tabscale; n0 = rt; eps = rt-n0; eps2 = eps*eps; nnn = 4*n0; /* Tabulated coulomb interaction */ Y = VFtab[nnn]; F = VFtab[nnn+1]; Geps = eps*VFtab[nnn+2]; Heps2 = eps2*VFtab[nnn+3]; Fp = F+Geps+Heps2; VV = Y+eps*Fp; vcoul = qq*VV; vctot = vctot + vcoul; /* Load parameters for j atom */ qq = qqHH; /* Calculate table index */ r = rsq22*rinv22; /* Calculate table index */ rt = r*tabscale; n0 = rt; eps = rt-n0; eps2 = eps*eps; nnn = 4*n0; /* Tabulated coulomb interaction */ Y = VFtab[nnn]; F = VFtab[nnn+1]; Geps = eps*VFtab[nnn+2]; Heps2 = eps2*VFtab[nnn+3]; Fp = F+Geps+Heps2; VV = Y+eps*Fp; vcoul = qq*VV; vctot = vctot + vcoul; /* Load parameters for j atom */ qq = qqHH; /* Calculate table index */ r = rsq23*rinv23; /* Calculate table index */ rt = r*tabscale; n0 = rt; eps = rt-n0; eps2 = eps*eps; nnn = 4*n0; /* Tabulated coulomb interaction */ Y = VFtab[nnn]; F = VFtab[nnn+1]; Geps = eps*VFtab[nnn+2]; Heps2 = eps2*VFtab[nnn+3]; Fp = F+Geps+Heps2; VV = Y+eps*Fp; vcoul = qq*VV; vctot = vctot + vcoul; /* Load parameters for j atom */ qq = qqOH; /* Calculate table index */ r = rsq31*rinv31; /* Calculate table index */ rt = r*tabscale; n0 = rt; eps = rt-n0; eps2 = eps*eps; nnn = 4*n0; /* Tabulated coulomb interaction */ Y = VFtab[nnn]; F = VFtab[nnn+1]; Geps = eps*VFtab[nnn+2]; Heps2 = eps2*VFtab[nnn+3]; Fp = F+Geps+Heps2; VV = Y+eps*Fp; vcoul = qq*VV; vctot = vctot + vcoul; /* Load parameters for j atom */ qq = qqHH; /* Calculate table index */ r = rsq32*rinv32; /* Calculate table index */ rt = r*tabscale; n0 = rt; eps = rt-n0; eps2 = eps*eps; nnn = 4*n0; /* Tabulated coulomb interaction */ Y = VFtab[nnn]; F = VFtab[nnn+1]; Geps = eps*VFtab[nnn+2]; Heps2 = eps2*VFtab[nnn+3]; Fp = F+Geps+Heps2; VV = Y+eps*Fp; vcoul = qq*VV; vctot = vctot + vcoul; /* Load parameters for j atom */ qq = qqHH; /* Calculate table index */ r = rsq33*rinv33; /* Calculate table index */ rt = r*tabscale; n0 = rt; eps = rt-n0; eps2 = eps*eps; nnn = 4*n0; /* Tabulated coulomb interaction */ Y = VFtab[nnn]; F = VFtab[nnn+1]; Geps = eps*VFtab[nnn+2]; Heps2 = eps2*VFtab[nnn+3]; Fp = F+Geps+Heps2; VV = Y+eps*Fp; vcoul = qq*VV; vctot = vctot + vcoul; /* Inner loop uses 225 flops/iteration */ } /* Add i forces to mem and shifted force list */ /* Add potential energies to the group for this list */ ggid = gid[n]; Vc[ggid] = Vc[ggid] + vctot; /* Increment number of inner iterations */ ninner = ninner + nj1 - nj0; /* Outer loop uses 10 flops/iteration */ } /* Increment number of outer iterations */ nouter = nouter + nn1 - nn0; } while (nn1<nri); /* Write outer/inner iteration count to pointers */ *outeriter = nouter; *inneriter = ninner; }
/* * Gromacs nonbonded kernel nb_kernel213nf * Coulomb interaction: Reaction field * VdW interaction: Lennard-Jones * water optimization: TIP4P - other atoms * Calculate forces: no */ void nb_kernel213nf( int * p_nri, int * iinr, int * jindex, int * jjnr, int * shift, real * shiftvec, real * fshift, int * gid, real * pos, real * faction, real * charge, real * p_facel, real * p_krf, real * p_crf, real * Vc, int * type, int * p_ntype, real * vdwparam, real * Vvdw, real * p_tabscale, real * VFtab, real * invsqrta, real * dvda, real * p_gbtabscale, real * GBtab, int * p_nthreads, int * count, void * mtx, int * outeriter, int * inneriter, real * work) { int nri,ntype,nthreads; real facel,krf,crf,tabscale,gbtabscale; int n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid; int nn0,nn1,nouter,ninner; real shX,shY,shZ; real rinvsq; real jq; real qq,vcoul,vctot; int nti; int tj; real rinvsix; real Vvdw6,Vvdwtot; real Vvdw12; real krsq; real ix1,iy1,iz1; real ix2,iy2,iz2; real ix3,iy3,iz3; real ix4,iy4,iz4; real jx1,jy1,jz1; real dx11,dy11,dz11,rsq11; real dx21,dy21,dz21,rsq21,rinv21; real dx31,dy31,dz31,rsq31,rinv31; real dx41,dy41,dz41,rsq41,rinv41; real qH,qM; real c6,c12; nri = *p_nri; ntype = *p_ntype; nthreads = *p_nthreads; facel = *p_facel; krf = *p_krf; crf = *p_crf; tabscale = *p_tabscale; /* Initialize water data */ ii = iinr[0]; qH = facel*charge[ii+1]; qM = facel*charge[ii+3]; nti = 2*ntype*type[ii]; /* Reset outer and inner iteration counters */ nouter = 0; ninner = 0; /* Loop over thread workunits */ do { #ifdef GMX_THREAD_SHM_FDECOMP tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx); nn0 = *count; /* Take successively smaller chunks (at least 10 lists) */ nn1 = nn0+(nri-nn0)/(2*nthreads)+10; *count = nn1; tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx); if(nn1>nri) nn1=nri; #else nn0 = 0; nn1 = nri; #endif /* Start outer loop over neighborlists */ for(n=nn0; (n<nn1); n++) { /* Load shift vector for this list */ is3 = 3*shift[n]; shX = shiftvec[is3]; shY = shiftvec[is3+1]; shZ = shiftvec[is3+2]; /* Load limits for loop over neighbors */ nj0 = jindex[n]; nj1 = jindex[n+1]; /* Get outer coordinate index */ ii = iinr[n]; ii3 = 3*ii; /* Load i atom data, add shift vector */ ix1 = shX + pos[ii3+0]; iy1 = shY + pos[ii3+1]; iz1 = shZ + pos[ii3+2]; ix2 = shX + pos[ii3+3]; iy2 = shY + pos[ii3+4]; iz2 = shZ + pos[ii3+5]; ix3 = shX + pos[ii3+6]; iy3 = shY + pos[ii3+7]; iz3 = shZ + pos[ii3+8]; ix4 = shX + pos[ii3+9]; iy4 = shY + pos[ii3+10]; iz4 = shZ + pos[ii3+11]; /* Zero the potential energy for this list */ vctot = 0; Vvdwtot = 0; /* Clear i atom forces */ for(k=nj0; (k<nj1); k++) { /* Get j neighbor index, and coordinate index */ jnr = jjnr[k]; j3 = 3*jnr; /* load j atom coordinates */ jx1 = pos[j3+0]; jy1 = pos[j3+1]; jz1 = pos[j3+2]; /* Calculate distance */ dx11 = ix1 - jx1; dy11 = iy1 - jy1; dz11 = iz1 - jz1; rsq11 = dx11*dx11+dy11*dy11+dz11*dz11; dx21 = ix2 - jx1; dy21 = iy2 - jy1; dz21 = iz2 - jz1; rsq21 = dx21*dx21+dy21*dy21+dz21*dz21; dx31 = ix3 - jx1; dy31 = iy3 - jy1; dz31 = iz3 - jz1; rsq31 = dx31*dx31+dy31*dy31+dz31*dz31; dx41 = ix4 - jx1; dy41 = iy4 - jy1; dz41 = iz4 - jz1; rsq41 = dx41*dx41+dy41*dy41+dz41*dz41; /* Calculate 1/r and 1/r2 */ rinvsq = 1.0/rsq11; rinv21 = gmx_invsqrt(rsq21); rinv31 = gmx_invsqrt(rsq31); rinv41 = gmx_invsqrt(rsq41); /* Load parameters for j atom */ tj = nti+2*type[jnr]; c6 = vdwparam[tj]; c12 = vdwparam[tj+1]; /* Lennard-Jones interaction */ rinvsix = rinvsq*rinvsq*rinvsq; Vvdw6 = c6*rinvsix; Vvdw12 = c12*rinvsix*rinvsix; Vvdwtot = Vvdwtot+Vvdw12-Vvdw6; /* Load parameters for j atom */ jq = charge[jnr+0]; qq = qH*jq; /* Coulomb reaction-field interaction */ krsq = krf*rsq21; vcoul = qq*(rinv21+krsq-crf); vctot = vctot+vcoul; /* Load parameters for j atom */ /* Coulomb reaction-field interaction */ krsq = krf*rsq31; vcoul = qq*(rinv31+krsq-crf); vctot = vctot+vcoul; /* Load parameters for j atom */ qq = qM*jq; /* Coulomb reaction-field interaction */ krsq = krf*rsq41; vcoul = qq*(rinv41+krsq-crf); vctot = vctot+vcoul; /* Inner loop uses 75 flops/iteration */ } /* Add i forces to mem and shifted force list */ /* Add potential energies to the group for this list */ ggid = gid[n]; Vc[ggid] = Vc[ggid] + vctot; Vvdw[ggid] = Vvdw[ggid] + Vvdwtot; /* Increment number of inner iterations */ ninner = ninner + nj1 - nj0; /* Outer loop uses 14 flops/iteration */ } /* Increment number of outer iterations */ nouter = nouter + nn1 - nn0; } while (nn1<nri); /* Write outer/inner iteration count to pointers */ *outeriter = nouter; *inneriter = ninner; }
/* * Gromacs nonbonded kernel pf_nb_kernel104nf * Coulomb interaction: Normal Coulomb * VdW interaction: Not calculated * water optimization: pairs of TIP4P interactions * Calculate forces: no */ void pf_nb_kernel104nf( int * p_nri, int * iinr, int * jindex, int * jjnr, int * shift, real * shiftvec, real * fshift, int * gid, real * pos, real * faction, real * charge, real * p_facel, real * p_krf, real * p_crf, real * Vc, int * type, int * p_ntype, real * vdwparam, real * Vvdw, real * p_tabscale, real * VFtab, real * invsqrta, real * dvda, real * p_gbtabscale, real * GBtab, int * p_nthreads, int * count, void * mtx, int * outeriter, int * inneriter, real * work, t_pf_global * pf_global) { int nri,ntype,nthreads; real facel,krf,crf,tabscale,gbtabscale; int n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid; int nn0,nn1,nouter,ninner; real shX,shY,shZ; real qq,vcoul,vctot; real ix2,iy2,iz2; real ix3,iy3,iz3; real ix4,iy4,iz4; real jx2,jy2,jz2; real jx3,jy3,jz3; real jx4,jy4,jz4; real dx22,dy22,dz22,rsq22,rinv22; real dx23,dy23,dz23,rsq23,rinv23; real dx24,dy24,dz24,rsq24,rinv24; real dx32,dy32,dz32,rsq32,rinv32; real dx33,dy33,dz33,rsq33,rinv33; real dx34,dy34,dz34,rsq34,rinv34; real dx42,dy42,dz42,rsq42,rinv42; real dx43,dy43,dz43,rsq43,rinv43; real dx44,dy44,dz44,rsq44,rinv44; real qH,qM,qqMM,qqMH,qqHH; nri = *p_nri; ntype = *p_ntype; nthreads = *p_nthreads; facel = *p_facel; krf = *p_krf; crf = *p_crf; tabscale = *p_tabscale; /* Initialize water data */ ii = iinr[0]; qH = charge[ii+1]; qM = charge[ii+3]; qqMM = facel*qM*qM; qqMH = facel*qM*qH; qqHH = facel*qH*qH; /* Reset outer and inner iteration counters */ nouter = 0; ninner = 0; /* Loop over thread workunits */ do { #ifdef GMX_THREAD_SHM_FDECOMP tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx); nn0 = *count; /* Take successively smaller chunks (at least 10 lists) */ nn1 = nn0+(nri-nn0)/(2*nthreads)+10; *count = nn1; tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx); if(nn1>nri) nn1=nri; #else nn0 = 0; nn1 = nri; #endif /* Start outer loop over neighborlists */ for(n=nn0; (n<nn1); n++) { /* Load shift vector for this list */ is3 = 3*shift[n]; shX = shiftvec[is3]; shY = shiftvec[is3+1]; shZ = shiftvec[is3+2]; /* Load limits for loop over neighbors */ nj0 = jindex[n]; nj1 = jindex[n+1]; /* Get outer coordinate index */ ii = iinr[n]; ii3 = 3*ii; /* Load i atom data, add shift vector */ ix2 = shX + pos[ii3+3]; iy2 = shY + pos[ii3+4]; iz2 = shZ + pos[ii3+5]; ix3 = shX + pos[ii3+6]; iy3 = shY + pos[ii3+7]; iz3 = shZ + pos[ii3+8]; ix4 = shX + pos[ii3+9]; iy4 = shY + pos[ii3+10]; iz4 = shZ + pos[ii3+11]; /* Zero the potential energy for this list */ vctot = 0; /* Clear i atom forces */ for(k=nj0; (k<nj1); k++) { /* Get j neighbor index, and coordinate index */ jnr = jjnr[k]; j3 = 3*jnr; /* load j atom coordinates */ jx2 = pos[j3+3]; jy2 = pos[j3+4]; jz2 = pos[j3+5]; jx3 = pos[j3+6]; jy3 = pos[j3+7]; jz3 = pos[j3+8]; jx4 = pos[j3+9]; jy4 = pos[j3+10]; jz4 = pos[j3+11]; /* Calculate distance */ dx22 = ix2 - jx2; dy22 = iy2 - jy2; dz22 = iz2 - jz2; rsq22 = dx22*dx22+dy22*dy22+dz22*dz22; dx23 = ix2 - jx3; dy23 = iy2 - jy3; dz23 = iz2 - jz3; rsq23 = dx23*dx23+dy23*dy23+dz23*dz23; dx24 = ix2 - jx4; dy24 = iy2 - jy4; dz24 = iz2 - jz4; rsq24 = dx24*dx24+dy24*dy24+dz24*dz24; dx32 = ix3 - jx2; dy32 = iy3 - jy2; dz32 = iz3 - jz2; rsq32 = dx32*dx32+dy32*dy32+dz32*dz32; dx33 = ix3 - jx3; dy33 = iy3 - jy3; dz33 = iz3 - jz3; rsq33 = dx33*dx33+dy33*dy33+dz33*dz33; dx34 = ix3 - jx4; dy34 = iy3 - jy4; dz34 = iz3 - jz4; rsq34 = dx34*dx34+dy34*dy34+dz34*dz34; dx42 = ix4 - jx2; dy42 = iy4 - jy2; dz42 = iz4 - jz2; rsq42 = dx42*dx42+dy42*dy42+dz42*dz42; dx43 = ix4 - jx3; dy43 = iy4 - jy3; dz43 = iz4 - jz3; rsq43 = dx43*dx43+dy43*dy43+dz43*dz43; dx44 = ix4 - jx4; dy44 = iy4 - jy4; dz44 = iz4 - jz4; rsq44 = dx44*dx44+dy44*dy44+dz44*dz44; /* Calculate 1/r and 1/r2 */ rinv22 = gmx_invsqrt(rsq22); rinv23 = gmx_invsqrt(rsq23); rinv24 = gmx_invsqrt(rsq24); rinv32 = gmx_invsqrt(rsq32); rinv33 = gmx_invsqrt(rsq33); rinv34 = gmx_invsqrt(rsq34); rinv42 = gmx_invsqrt(rsq42); rinv43 = gmx_invsqrt(rsq43); rinv44 = gmx_invsqrt(rsq44); /* Load parameters for j atom */ qq = qqHH; /* Coulomb interaction */ vcoul = qq*rinv22; vctot = vctot+vcoul; /* Load parameters for j atom */ qq = qqHH; /* Coulomb interaction */ vcoul = qq*rinv23; vctot = vctot+vcoul; /* Load parameters for j atom */ qq = qqMH; /* Coulomb interaction */ vcoul = qq*rinv24; vctot = vctot+vcoul; /* Load parameters for j atom */ qq = qqHH; /* Coulomb interaction */ vcoul = qq*rinv32; vctot = vctot+vcoul; /* Load parameters for j atom */ qq = qqHH; /* Coulomb interaction */ vcoul = qq*rinv33; vctot = vctot+vcoul; /* Load parameters for j atom */ qq = qqMH; /* Coulomb interaction */ vcoul = qq*rinv34; vctot = vctot+vcoul; /* Load parameters for j atom */ qq = qqMH; /* Coulomb interaction */ vcoul = qq*rinv42; vctot = vctot+vcoul; /* Load parameters for j atom */ qq = qqMH; /* Coulomb interaction */ vcoul = qq*rinv43; vctot = vctot+vcoul; /* Load parameters for j atom */ qq = qqMM; /* Coulomb interaction */ vcoul = qq*rinv44; vctot = vctot+vcoul; /* Inner loop uses 135 flops/iteration */ } /* Add i forces to mem and shifted force list */ /* Add potential energies to the group for this list */ ggid = gid[n]; Vc[ggid] = Vc[ggid] + vctot; /* Increment number of inner iterations */ ninner = ninner + nj1 - nj0; /* Outer loop uses 10 flops/iteration */ } /* Increment number of outer iterations */ nouter = nouter + nn1 - nn0; } while (nn1<nri); /* Write outer/inner iteration count to pointers */ *outeriter = nouter; *inneriter = ninner; }
void gmx_nb_generic_adress_kernel(t_nblist * nlist, rvec * xx, rvec * ff, t_forcerec * fr, t_mdatoms * mdatoms, nb_kernel_data_t * kernel_data, t_nrnb * nrnb) { int nri, ntype, table_nelements, ielec, ivdw; real facel, gbtabscale; int n, ii, is3, ii3, k, nj0, nj1, jnr, j3, ggid, nnn, n0; real shX, shY, shZ; real fscal, felec, fvdw, velec, vvdw, tx, ty, tz; real rinvsq; real iq; real qq, vctot; int nti, nvdwparam; int tj; real rt, r, eps, eps2, Y, F, Geps, Heps2, VV, FF, Fp, fijD, fijR; real rinvsix; real vvdwtot; real vvdw_rep, vvdw_disp; real ix, iy, iz, fix, fiy, fiz; real jx, jy, jz; real dx, dy, dz, rsq, rinv; real c6, c12, cexp1, cexp2, br; real * charge; real * shiftvec; real * vdwparam; int * shift; int * type; real * fshift; real * velecgrp; real * vvdwgrp; real tabscale; real * VFtab; real * x; real * f; int ewitab; real ewtabscale, eweps, sh_ewald, ewrt, ewtabhalfspace; real * ewtab; real rcoulomb2, rvdw, rvdw2, sh_dispersion, sh_repulsion; real rcutoff, rcutoff2; real rswitch_elec, rswitch_vdw, d, d2, sw, dsw, rinvcorr; real elec_swV3, elec_swV4, elec_swV5, elec_swF2, elec_swF3, elec_swF4; real vdw_swV3, vdw_swV4, vdw_swV5, vdw_swF2, vdw_swF3, vdw_swF4; gmx_bool bExactElecCutoff, bExactVdwCutoff, bExactCutoff; real * wf; real weight_cg1; real weight_cg2; real weight_product; real hybscal; /* the multiplicator to the force for hybrid interactions*/ real force_cap; gmx_bool bCG; int egp_nr; wf = mdatoms->wf; force_cap = fr->adress_ex_forcecap; x = xx[0]; f = ff[0]; ielec = nlist->ielec; ivdw = nlist->ivdw; fshift = fr->fshift[0]; velecgrp = kernel_data->energygrp_elec; vvdwgrp = kernel_data->energygrp_vdw; tabscale = kernel_data->table_elec_vdw->scale; VFtab = kernel_data->table_elec_vdw->data; sh_ewald = fr->ic->sh_ewald; ewtab = fr->ic->tabq_coul_FDV0; ewtabscale = fr->ic->tabq_scale; ewtabhalfspace = 0.5/ewtabscale; rcoulomb2 = fr->rcoulomb*fr->rcoulomb; rvdw = fr->rvdw; rvdw2 = rvdw*rvdw; sh_dispersion = fr->ic->dispersion_shift.cpot; sh_repulsion = fr->ic->repulsion_shift.cpot; if (fr->coulomb_modifier == eintmodPOTSWITCH) { d = fr->rcoulomb-fr->rcoulomb_switch; elec_swV3 = -10.0/(d*d*d); elec_swV4 = 15.0/(d*d*d*d); elec_swV5 = -6.0/(d*d*d*d*d); elec_swF2 = -30.0/(d*d*d); elec_swF3 = 60.0/(d*d*d*d); elec_swF4 = -30.0/(d*d*d*d*d); } else { /* Avoid warnings from stupid compilers (looking at you, Clang!) */ elec_swV3 = elec_swV4 = elec_swV5 = elec_swF2 = elec_swF3 = elec_swF4 = 0.0; } if (fr->vdw_modifier == eintmodPOTSWITCH) { d = fr->rvdw-fr->rvdw_switch; vdw_swV3 = -10.0/(d*d*d); vdw_swV4 = 15.0/(d*d*d*d); vdw_swV5 = -6.0/(d*d*d*d*d); vdw_swF2 = -30.0/(d*d*d); vdw_swF3 = 60.0/(d*d*d*d); vdw_swF4 = -30.0/(d*d*d*d*d); } else { /* Avoid warnings from stupid compilers (looking at you, Clang!) */ vdw_swV3 = vdw_swV4 = vdw_swV5 = vdw_swF2 = vdw_swF3 = vdw_swF4 = 0.0; } bExactElecCutoff = (fr->coulomb_modifier != eintmodNONE) || fr->eeltype == eelRF_ZERO; bExactVdwCutoff = (fr->vdw_modifier != eintmodNONE); bExactCutoff = bExactElecCutoff || bExactVdwCutoff; if (bExactCutoff) { rcutoff = ( fr->rcoulomb > fr->rvdw ) ? fr->rcoulomb : fr->rvdw; rcutoff2 = rcutoff*rcutoff; } else { /* Fix warnings for stupid compilers */ rcutoff = rcutoff2 = 1e30; } /* avoid compiler warnings for cases that cannot happen */ nnn = 0; eps = 0.0; eps2 = 0.0; /* 3 VdW parameters for buckingham, otherwise 2 */ nvdwparam = (ivdw == GMX_NBKERNEL_VDW_BUCKINGHAM) ? 3 : 2; table_nelements = 12; charge = mdatoms->chargeA; type = mdatoms->typeA; facel = fr->epsfac; shiftvec = fr->shift_vec[0]; vdwparam = fr->nbfp; ntype = fr->ntype; for (n = 0; (n < nlist->nri); n++) { is3 = 3*nlist->shift[n]; shX = shiftvec[is3]; shY = shiftvec[is3+1]; shZ = shiftvec[is3+2]; nj0 = nlist->jindex[n]; nj1 = nlist->jindex[n+1]; ii = nlist->iinr[n]; ii3 = 3*ii; ix = shX + x[ii3+0]; iy = shY + x[ii3+1]; iz = shZ + x[ii3+2]; iq = facel*charge[ii]; nti = nvdwparam*ntype*type[ii]; vctot = 0; vvdwtot = 0; fix = 0; fiy = 0; fiz = 0; /* We need to find out if this i atom is part of an all-atom or CG energy group */ egp_nr = mdatoms->cENER[ii]; bCG = !fr->adress_group_explicit[egp_nr]; weight_cg1 = wf[ii]; if ((!bCG) && weight_cg1 < ALMOST_ZERO) { continue; } for (k = nj0; (k < nj1); k++) { jnr = nlist->jjnr[k]; weight_cg2 = wf[jnr]; weight_product = weight_cg1*weight_cg2; if (weight_product < ALMOST_ZERO) { /* if it's a explicit loop, skip this atom */ if (!bCG) { continue; } else /* if it's a coarse grained loop, include this atom */ { hybscal = 1.0; } } else if (weight_product >= ALMOST_ONE) { /* if it's a explicit loop, include this atom */ if (!bCG) { hybscal = 1.0; } else /* if it's a coarse grained loop, skip this atom */ { continue; } } /* both have double identity, get hybrid scaling factor */ else { hybscal = weight_product; if (bCG) { hybscal = 1.0 - hybscal; } } j3 = 3*jnr; jx = x[j3+0]; jy = x[j3+1]; jz = x[j3+2]; dx = ix - jx; dy = iy - jy; dz = iz - jz; rsq = dx*dx+dy*dy+dz*dz; rinv = gmx_invsqrt(rsq); rinvsq = rinv*rinv; felec = 0; fvdw = 0; velec = 0; vvdw = 0; if (bExactCutoff && rsq > rcutoff2) { continue; } if (ielec == GMX_NBKERNEL_ELEC_CUBICSPLINETABLE || ivdw == GMX_NBKERNEL_VDW_CUBICSPLINETABLE) { r = rsq*rinv; rt = r*tabscale; n0 = rt; eps = rt-n0; eps2 = eps*eps; nnn = table_nelements*n0; } /* Coulomb interaction. ielec==0 means no interaction */ if (ielec != GMX_NBKERNEL_ELEC_NONE) { qq = iq*charge[jnr]; switch (ielec) { case GMX_NBKERNEL_ELEC_NONE: break; case GMX_NBKERNEL_ELEC_COULOMB: /* Vanilla cutoff coulomb */ velec = qq*rinv; felec = velec*rinvsq; break; case GMX_NBKERNEL_ELEC_REACTIONFIELD: /* Reaction-field */ velec = qq*(rinv+fr->k_rf*rsq-fr->c_rf); felec = qq*(rinv*rinvsq-2.0*fr->k_rf); break; case GMX_NBKERNEL_ELEC_CUBICSPLINETABLE: /* Tabulated coulomb */ Y = VFtab[nnn]; F = VFtab[nnn+1]; Geps = eps*VFtab[nnn+2]; Heps2 = eps2*VFtab[nnn+3]; Fp = F+Geps+Heps2; VV = Y+eps*Fp; FF = Fp+Geps+2.0*Heps2; velec = qq*VV; felec = -qq*FF*tabscale*rinv; break; case GMX_NBKERNEL_ELEC_GENERALIZEDBORN: /* GB */ gmx_fatal(FARGS, "Death & horror! GB generic interaction not implemented.\n"); break; case GMX_NBKERNEL_ELEC_EWALD: ewrt = rsq*rinv*ewtabscale; ewitab = ewrt; eweps = ewrt-ewitab; ewitab = 4*ewitab; felec = ewtab[ewitab]+eweps*ewtab[ewitab+1]; rinvcorr = (fr->coulomb_modifier == eintmodPOTSHIFT) ? rinv-fr->ic->sh_ewald : rinv; velec = qq*(rinvcorr-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec))); felec = qq*rinv*(rinvsq-felec); break; default: gmx_fatal(FARGS, "Death & horror! No generic coulomb interaction for ielec=%d.\n", ielec); break; } if (fr->coulomb_modifier == eintmodPOTSWITCH) { d = rsq*rinv-fr->rcoulomb_switch; d = (d > 0.0) ? d : 0.0; d2 = d*d; sw = 1.0+d2*d*(elec_swV3+d*(elec_swV4+d*elec_swV5)); dsw = d2*(elec_swF2+d*(elec_swF3+d*elec_swF4)); /* Apply switch function. Note that felec=f/r since it will be multiplied * by the i-j displacement vector. This means felec'=f'/r=-(v*sw)'/r= * -(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=felec*sw-v*dsw/r */ felec = felec*sw - rinv*velec*dsw; /* Once we have used velec to update felec we can modify velec too */ velec *= sw; } if (bExactElecCutoff) { felec = (rsq <= rcoulomb2) ? felec : 0.0; velec = (rsq <= rcoulomb2) ? velec : 0.0; } vctot += velec; } /* End of coulomb interactions */ /* VdW interaction. ivdw==0 means no interaction */ if (ivdw != GMX_NBKERNEL_VDW_NONE) { tj = nti+nvdwparam*type[jnr]; switch (ivdw) { case GMX_NBKERNEL_VDW_NONE: break; case GMX_NBKERNEL_VDW_LENNARDJONES: /* Vanilla Lennard-Jones cutoff */ c6 = vdwparam[tj]; c12 = vdwparam[tj+1]; rinvsix = rinvsq*rinvsq*rinvsq; vvdw_disp = c6*rinvsix; vvdw_rep = c12*rinvsix*rinvsix; fvdw = (vvdw_rep-vvdw_disp)*rinvsq; if (fr->vdw_modifier == eintmodPOTSHIFT) { vvdw = (vvdw_rep + c12*sh_repulsion)/12.0 - (vvdw_disp + c6*sh_dispersion)/6.0; } else { vvdw = vvdw_rep/12.0-vvdw_disp/6.0; } break; case GMX_NBKERNEL_VDW_BUCKINGHAM: /* Buckingham */ c6 = vdwparam[tj]; cexp1 = vdwparam[tj+1]; cexp2 = vdwparam[tj+2]; rinvsix = rinvsq*rinvsq*rinvsq; vvdw_disp = c6*rinvsix; br = cexp2*rsq*rinv; vvdw_rep = cexp1*exp(-br); fvdw = (br*vvdw_rep-vvdw_disp)*rinvsq; if (fr->vdw_modifier == eintmodPOTSHIFT) { vvdw = (vvdw_rep-cexp1*exp(-cexp2*rvdw)) - (vvdw_disp + c6*sh_dispersion)/6.0; } else { vvdw = vvdw_rep-vvdw_disp/6.0; } break; case GMX_NBKERNEL_VDW_CUBICSPLINETABLE: /* Tabulated VdW */ c6 = vdwparam[tj]; c12 = vdwparam[tj+1]; Y = VFtab[nnn+4]; F = VFtab[nnn+5]; Geps = eps*VFtab[nnn+6]; Heps2 = eps2*VFtab[nnn+7]; Fp = F+Geps+Heps2; VV = Y+eps*Fp; FF = Fp+Geps+2.0*Heps2; vvdw_disp = c6*VV; fijD = c6*FF; Y = VFtab[nnn+8]; F = VFtab[nnn+9]; Geps = eps*VFtab[nnn+10]; Heps2 = eps2*VFtab[nnn+11]; Fp = F+Geps+Heps2; VV = Y+eps*Fp; FF = Fp+Geps+2.0*Heps2; vvdw_rep = c12*VV; fijR = c12*FF; fvdw = -(fijD+fijR)*tabscale*rinv; vvdw = vvdw_disp + vvdw_rep; break; default: gmx_fatal(FARGS, "Death & horror! No generic VdW interaction for ivdw=%d.\n", ivdw); break; } if (fr->vdw_modifier == eintmodPOTSWITCH) { d = rsq*rinv-fr->rvdw_switch; d = (d > 0.0) ? d : 0.0; d2 = d*d; sw = 1.0+d2*d*(vdw_swV3+d*(vdw_swV4+d*vdw_swV5)); dsw = d2*(vdw_swF2+d*(vdw_swF3+d*vdw_swF4)); /* See coulomb interaction for the force-switch formula */ fvdw = fvdw*sw - rinv*vvdw*dsw; vvdw *= sw; } if (bExactVdwCutoff) { fvdw = (rsq <= rvdw2) ? fvdw : 0.0; vvdw = (rsq <= rvdw2) ? vvdw : 0.0; } vvdwtot += vvdw; } /* end VdW interactions */ fscal = felec+fvdw; if (!bCG && force_cap > 0 && (fabs(fscal) > force_cap)) { fscal = force_cap*fscal/fabs(fscal); } fscal *= hybscal; tx = fscal*dx; ty = fscal*dy; tz = fscal*dz; fix = fix + tx; fiy = fiy + ty; fiz = fiz + tz; f[j3+0] = f[j3+0] - tx; f[j3+1] = f[j3+1] - ty; f[j3+2] = f[j3+2] - tz; } f[ii3+0] = f[ii3+0] + fix; f[ii3+1] = f[ii3+1] + fiy; f[ii3+2] = f[ii3+2] + fiz; fshift[is3] = fshift[is3]+fix; fshift[is3+1] = fshift[is3+1]+fiy; fshift[is3+2] = fshift[is3+2]+fiz; ggid = nlist->gid[n]; velecgrp[ggid] += vctot; vvdwgrp[ggid] += vvdwtot; } /* Estimate flops, average for generic adress kernel: * 14 flops per outer iteration * 54 flops per inner iteration */ inc_nrnb(nrnb, eNR_NBKERNEL_GENERIC_ADRESS, nlist->nri*14 + nlist->jindex[n]*54); }
/* * Gromacs nonbonded kernel pf_nb_kernel112 * Coulomb interaction: Normal Coulomb * VdW interaction: Lennard-Jones * water optimization: pairs of SPC/TIP3P interactions * Calculate forces: yes */ void pf_nb_kernel112( int * p_nri, int * iinr, int * jindex, int * jjnr, int * shift, real * shiftvec, real * fshift, int * gid, real * pos, real * faction, real * charge, real * p_facel, real * p_krf, real * p_crf, real * Vc, int * type, int * p_ntype, real * vdwparam, real * Vvdw, real * p_tabscale, real * VFtab, real * invsqrta, real * dvda, real * p_gbtabscale, real * GBtab, int * p_nthreads, int * count, void * mtx, int * outeriter, int * inneriter, real * work, t_pf_global * pf_global) { int nri,ntype,nthreads; real facel,krf,crf,tabscale,gbtabscale; int n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid; int nn0,nn1,nouter,ninner; real shX,shY,shZ; real fscal,tx,ty,tz; real rinvsq; real qq,vcoul,vctot; int tj; real rinvsix; real Vvdw6,Vvdwtot; real Vvdw12; real ix1,iy1,iz1,fix1,fiy1,fiz1; real ix2,iy2,iz2,fix2,fiy2,fiz2; real ix3,iy3,iz3,fix3,fiy3,fiz3; real jx1,jy1,jz1,fjx1,fjy1,fjz1; real jx2,jy2,jz2,fjx2,fjy2,fjz2; real jx3,jy3,jz3,fjx3,fjy3,fjz3; real dx11,dy11,dz11,rsq11,rinv11; real dx12,dy12,dz12,rsq12,rinv12; real dx13,dy13,dz13,rsq13,rinv13; real dx21,dy21,dz21,rsq21,rinv21; real dx22,dy22,dz22,rsq22,rinv22; real dx23,dy23,dz23,rsq23,rinv23; real dx31,dy31,dz31,rsq31,rinv31; real dx32,dy32,dz32,rsq32,rinv32; real dx33,dy33,dz33,rsq33,rinv33; real qO,qH,qqOO,qqOH,qqHH; real c6,c12; real pf_coul, pf_lj; nri = *p_nri; ntype = *p_ntype; nthreads = *p_nthreads; facel = *p_facel; krf = *p_krf; crf = *p_crf; tabscale = *p_tabscale; /* Initialize water data */ ii = iinr[0]; qO = charge[ii]; qH = charge[ii+1]; qqOO = facel*qO*qO; qqOH = facel*qO*qH; qqHH = facel*qH*qH; tj = 2*(ntype+1)*type[ii]; c6 = vdwparam[tj]; c12 = vdwparam[tj+1]; /* Reset outer and inner iteration counters */ nouter = 0; ninner = 0; /* Loop over thread workunits */ do { #ifdef GMX_THREAD_SHM_FDECOMP tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx); nn0 = *count; /* Take successively smaller chunks (at least 10 lists) */ nn1 = nn0+(nri-nn0)/(2*nthreads)+10; *count = nn1; tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx); if(nn1>nri) nn1=nri; #else nn0 = 0; nn1 = nri; #endif /* Start outer loop over neighborlists */ for(n=nn0; (n<nn1); n++) { /* Load shift vector for this list */ is3 = 3*shift[n]; shX = shiftvec[is3]; shY = shiftvec[is3+1]; shZ = shiftvec[is3+2]; /* Load limits for loop over neighbors */ nj0 = jindex[n]; nj1 = jindex[n+1]; /* Get outer coordinate index */ ii = iinr[n]; ii3 = 3*ii; /* Load i atom data, add shift vector */ ix1 = shX + pos[ii3+0]; iy1 = shY + pos[ii3+1]; iz1 = shZ + pos[ii3+2]; ix2 = shX + pos[ii3+3]; iy2 = shY + pos[ii3+4]; iz2 = shZ + pos[ii3+5]; ix3 = shX + pos[ii3+6]; iy3 = shY + pos[ii3+7]; iz3 = shZ + pos[ii3+8]; /* Zero the potential energy for this list */ vctot = 0; Vvdwtot = 0; /* Clear i atom forces */ fix1 = 0; fiy1 = 0; fiz1 = 0; fix2 = 0; fiy2 = 0; fiz2 = 0; fix3 = 0; fiy3 = 0; fiz3 = 0; for(k=nj0; (k<nj1); k++) { /* Get j neighbor index, and coordinate index */ jnr = jjnr[k]; j3 = 3*jnr; /* load j atom coordinates */ jx1 = pos[j3+0]; jy1 = pos[j3+1]; jz1 = pos[j3+2]; jx2 = pos[j3+3]; jy2 = pos[j3+4]; jz2 = pos[j3+5]; jx3 = pos[j3+6]; jy3 = pos[j3+7]; jz3 = pos[j3+8]; /* Calculate distance */ dx11 = ix1 - jx1; dy11 = iy1 - jy1; dz11 = iz1 - jz1; rsq11 = dx11*dx11+dy11*dy11+dz11*dz11; dx12 = ix1 - jx2; dy12 = iy1 - jy2; dz12 = iz1 - jz2; rsq12 = dx12*dx12+dy12*dy12+dz12*dz12; dx13 = ix1 - jx3; dy13 = iy1 - jy3; dz13 = iz1 - jz3; rsq13 = dx13*dx13+dy13*dy13+dz13*dz13; dx21 = ix2 - jx1; dy21 = iy2 - jy1; dz21 = iz2 - jz1; rsq21 = dx21*dx21+dy21*dy21+dz21*dz21; dx22 = ix2 - jx2; dy22 = iy2 - jy2; dz22 = iz2 - jz2; rsq22 = dx22*dx22+dy22*dy22+dz22*dz22; dx23 = ix2 - jx3; dy23 = iy2 - jy3; dz23 = iz2 - jz3; rsq23 = dx23*dx23+dy23*dy23+dz23*dz23; dx31 = ix3 - jx1; dy31 = iy3 - jy1; dz31 = iz3 - jz1; rsq31 = dx31*dx31+dy31*dy31+dz31*dz31; dx32 = ix3 - jx2; dy32 = iy3 - jy2; dz32 = iz3 - jz2; rsq32 = dx32*dx32+dy32*dy32+dz32*dz32; dx33 = ix3 - jx3; dy33 = iy3 - jy3; dz33 = iz3 - jz3; rsq33 = dx33*dx33+dy33*dy33+dz33*dz33; /* Calculate 1/r and 1/r2 */ rinv11 = gmx_invsqrt(rsq11); rinv12 = gmx_invsqrt(rsq12); rinv13 = gmx_invsqrt(rsq13); rinv21 = gmx_invsqrt(rsq21); rinv22 = gmx_invsqrt(rsq22); rinv23 = gmx_invsqrt(rsq23); rinv31 = gmx_invsqrt(rsq31); rinv32 = gmx_invsqrt(rsq32); rinv33 = gmx_invsqrt(rsq33); /* Load parameters for j atom */ qq = qqOO; rinvsq = rinv11*rinv11; /* Coulomb interaction */ vcoul = qq*rinv11; vctot = vctot+vcoul; pf_coul = vcoul*rinvsq; /* Lennard-Jones interaction */ rinvsix = rinvsq*rinvsq*rinvsq; Vvdw6 = c6*rinvsix; Vvdw12 = c12*rinvsix*rinvsix; Vvdwtot = Vvdwtot+Vvdw12-Vvdw6; pf_lj = (12.0*Vvdw12-6.0*Vvdw6)*rinvsq; fscal = (vcoul+12.0*Vvdw12-6.0*Vvdw6)*rinvsq; /* Calculate temporary vectorial force */ tx = fscal*dx11; ty = fscal*dy11; tz = fscal*dz11; /* Increment i atom force */ fix1 = fix1 + tx; fiy1 = fiy1 + ty; fiz1 = fiz1 + tz; /* Decrement j atom force */ fjx1 = faction[j3+0] - tx; fjy1 = faction[j3+1] - ty; fjz1 = faction[j3+2] - tz; /* pairwise forces */ if (pf_global->bInitialized) pf_atom_add_nonbonded(pf_global, ii, jnr, pf_coul, pf_lj, dx11, dy11, dz11); /* Load parameters for j atom */ qq = qqOH; rinvsq = rinv12*rinv12; /* Coulomb interaction */ vcoul = qq*rinv12; vctot = vctot+vcoul; fscal = (vcoul)*rinvsq; /* Calculate temporary vectorial force */ tx = fscal*dx12; ty = fscal*dy12; tz = fscal*dz12; /* Increment i atom force */ fix1 = fix1 + tx; fiy1 = fiy1 + ty; fiz1 = fiz1 + tz; /* Decrement j atom force */ fjx2 = faction[j3+3] - tx; fjy2 = faction[j3+4] - ty; fjz2 = faction[j3+5] - tz; /* pairwise forces */ if (pf_global->bInitialized) pf_atom_add_nonbonded_single(pf_global, ii, jnr+1, PF_INTER_COULOMB, fscal, dx12, dy12, dz12); /* Load parameters for j atom */ qq = qqOH; rinvsq = rinv13*rinv13; /* Coulomb interaction */ vcoul = qq*rinv13; vctot = vctot+vcoul; fscal = (vcoul)*rinvsq; /* Calculate temporary vectorial force */ tx = fscal*dx13; ty = fscal*dy13; tz = fscal*dz13; /* Increment i atom force */ fix1 = fix1 + tx; fiy1 = fiy1 + ty; fiz1 = fiz1 + tz; /* Decrement j atom force */ fjx3 = faction[j3+6] - tx; fjy3 = faction[j3+7] - ty; fjz3 = faction[j3+8] - tz; /* pairwise forces */ if (pf_global->bInitialized) pf_atom_add_nonbonded_single(pf_global, ii, jnr+2, PF_INTER_COULOMB, fscal, dx13, dy13, dz13); /* Load parameters for j atom */ qq = qqOH; rinvsq = rinv21*rinv21; /* Coulomb interaction */ vcoul = qq*rinv21; vctot = vctot+vcoul; fscal = (vcoul)*rinvsq; /* Calculate temporary vectorial force */ tx = fscal*dx21; ty = fscal*dy21; tz = fscal*dz21; /* Increment i atom force */ fix2 = fix2 + tx; fiy2 = fiy2 + ty; fiz2 = fiz2 + tz; /* Decrement j atom force */ fjx1 = fjx1 - tx; fjy1 = fjy1 - ty; fjz1 = fjz1 - tz; /* pairwise forces */ if (pf_global->bInitialized) pf_atom_add_nonbonded_single(pf_global, ii+1, jnr, PF_INTER_COULOMB, fscal, dx21, dy21, dz21); /* Load parameters for j atom */ qq = qqHH; rinvsq = rinv22*rinv22; /* Coulomb interaction */ vcoul = qq*rinv22; vctot = vctot+vcoul; fscal = (vcoul)*rinvsq; /* Calculate temporary vectorial force */ tx = fscal*dx22; ty = fscal*dy22; tz = fscal*dz22; /* Increment i atom force */ fix2 = fix2 + tx; fiy2 = fiy2 + ty; fiz2 = fiz2 + tz; /* Decrement j atom force */ fjx2 = fjx2 - tx; fjy2 = fjy2 - ty; fjz2 = fjz2 - tz; /* pairwise forces */ if (pf_global->bInitialized) pf_atom_add_nonbonded_single(pf_global, ii+1, jnr+1, PF_INTER_COULOMB, fscal, dx22, dy22, dz22); /* Load parameters for j atom */ qq = qqHH; rinvsq = rinv23*rinv23; /* Coulomb interaction */ vcoul = qq*rinv23; vctot = vctot+vcoul; fscal = (vcoul)*rinvsq; /* Calculate temporary vectorial force */ tx = fscal*dx23; ty = fscal*dy23; tz = fscal*dz23; /* Increment i atom force */ fix2 = fix2 + tx; fiy2 = fiy2 + ty; fiz2 = fiz2 + tz; /* Decrement j atom force */ fjx3 = fjx3 - tx; fjy3 = fjy3 - ty; fjz3 = fjz3 - tz; /* pairwise forces */ if (pf_global->bInitialized) pf_atom_add_nonbonded_single(pf_global, ii+1, jnr+2, PF_INTER_COULOMB, fscal, dx23, dy23, dz23); /* Load parameters for j atom */ qq = qqOH; rinvsq = rinv31*rinv31; /* Coulomb interaction */ vcoul = qq*rinv31; vctot = vctot+vcoul; fscal = (vcoul)*rinvsq; /* Calculate temporary vectorial force */ tx = fscal*dx31; ty = fscal*dy31; tz = fscal*dz31; /* Increment i atom force */ fix3 = fix3 + tx; fiy3 = fiy3 + ty; fiz3 = fiz3 + tz; /* Decrement j atom force */ faction[j3+0] = fjx1 - tx; faction[j3+1] = fjy1 - ty; faction[j3+2] = fjz1 - tz; /* pairwise forces */ if (pf_global->bInitialized) pf_atom_add_nonbonded_single(pf_global, ii+2, jnr, PF_INTER_COULOMB, fscal, dx31, dy31, dz31); /* Load parameters for j atom */ qq = qqHH; rinvsq = rinv32*rinv32; /* Coulomb interaction */ vcoul = qq*rinv32; vctot = vctot+vcoul; fscal = (vcoul)*rinvsq; /* Calculate temporary vectorial force */ tx = fscal*dx32; ty = fscal*dy32; tz = fscal*dz32; /* Increment i atom force */ fix3 = fix3 + tx; fiy3 = fiy3 + ty; fiz3 = fiz3 + tz; /* Decrement j atom force */ faction[j3+3] = fjx2 - tx; faction[j3+4] = fjy2 - ty; faction[j3+5] = fjz2 - tz; /* pairwise forces */ if (pf_global->bInitialized) pf_atom_add_nonbonded_single(pf_global, ii+2, jnr+1, PF_INTER_COULOMB, fscal, dx32, dy32, dz32); /* Load parameters for j atom */ qq = qqHH; rinvsq = rinv33*rinv33; /* Coulomb interaction */ vcoul = qq*rinv33; vctot = vctot+vcoul; fscal = (vcoul)*rinvsq; /* Calculate temporary vectorial force */ tx = fscal*dx33; ty = fscal*dy33; tz = fscal*dz33; /* Increment i atom force */ fix3 = fix3 + tx; fiy3 = fiy3 + ty; fiz3 = fiz3 + tz; /* Decrement j atom force */ faction[j3+6] = fjx3 - tx; faction[j3+7] = fjy3 - ty; faction[j3+8] = fjz3 - tz; /* pairwise forces */ if (pf_global->bInitialized) pf_atom_add_nonbonded_single(pf_global, ii+2, jnr+2, PF_INTER_COULOMB, fscal, dx33, dy33, dz33); /* Inner loop uses 245 flops/iteration */ } /* Add i forces to mem and shifted force list */ faction[ii3+0] = faction[ii3+0] + fix1; faction[ii3+1] = faction[ii3+1] + fiy1; faction[ii3+2] = faction[ii3+2] + fiz1; faction[ii3+3] = faction[ii3+3] + fix2; faction[ii3+4] = faction[ii3+4] + fiy2; faction[ii3+5] = faction[ii3+5] + fiz2; faction[ii3+6] = faction[ii3+6] + fix3; faction[ii3+7] = faction[ii3+7] + fiy3; faction[ii3+8] = faction[ii3+8] + fiz3; fshift[is3] = fshift[is3]+fix1+fix2+fix3; fshift[is3+1] = fshift[is3+1]+fiy1+fiy2+fiy3; fshift[is3+2] = fshift[is3+2]+fiz1+fiz2+fiz3; /* Add potential energies to the group for this list */ ggid = gid[n]; Vc[ggid] = Vc[ggid] + vctot; Vvdw[ggid] = Vvdw[ggid] + Vvdwtot; /* Increment number of inner iterations */ ninner = ninner + nj1 - nj0; /* Outer loop uses 29 flops/iteration */ } /* Increment number of outer iterations */ nouter = nouter + nn1 - nn0; } while (nn1<nri); /* Write outer/inner iteration count to pointers */ *outeriter = nouter; *inneriter = ninner; }