gmx_bool bshakef(FILE *log, gmx_shakedata_t shaked, real invmass[], int nblocks, int sblock[], t_idef *idef, t_inputrec *ir, rvec x_s[], rvec prime[], t_nrnb *nrnb, real *scaled_lagrange_multiplier, real lambda, real *dvdlambda, real invdt, rvec *v, gmx_bool bCalcVir, tensor vir_r_m_dr, gmx_bool bDumpOnError, int econq) { t_iatom *iatoms; real dt_2, dvdl; int i, n0, ncon, blen, type, ll; int tnit = 0, trij = 0; #ifdef DEBUG fprintf(log, "nblocks=%d, sblock[0]=%d\n", nblocks, sblock[0]); #endif ncon = idef->il[F_CONSTR].nr/3; for (ll = 0; ll < ncon; ll++) { scaled_lagrange_multiplier[ll] = 0; } iatoms = &(idef->il[F_CONSTR].iatoms[sblock[0]]); for (i = 0; (i < nblocks); ) { blen = (sblock[i+1]-sblock[i]); blen /= 3; n0 = vec_shakef(log, shaked, invmass, blen, idef->iparams, iatoms, ir->shake_tol, x_s, prime, shaked->omega, ir->efep != efepNO, lambda, scaled_lagrange_multiplier, invdt, v, bCalcVir, vir_r_m_dr, econq); #ifdef DEBUGSHAKE check_cons(log, blen, x_s, prime, v, idef->iparams, iatoms, invmass, econq); #endif if (n0 == 0) { if (bDumpOnError && log) { { check_cons(log, blen, x_s, prime, v, idef->iparams, iatoms, invmass, econq); } } return FALSE; } tnit += n0*blen; trij += blen; iatoms += 3*blen; /* Increment pointer! */ scaled_lagrange_multiplier += blen; i++; } /* only for position part? */ if (econq == econqCoord) { if (ir->efep != efepNO) { real bondA, bondB; /* TODO This should probably use invdt, so that sd integrator scaling works properly */ dt_2 = 1/gmx::square(ir->delta_t); dvdl = 0; for (ll = 0; ll < ncon; ll++) { type = idef->il[F_CONSTR].iatoms[3*ll]; /* Per equations in the manual, dv/dl = -2 \sum_ll lagrangian_ll * r_ll * (d_B - d_A) */ /* The vector scaled_lagrange_multiplier[ll] contains the value -2 r_ll eta_ll (eta_ll is the estimate of the Langrangian, definition on page 336 of Ryckaert et al 1977), so the pre-factors are already present. */ bondA = idef->iparams[type].constr.dA; bondB = idef->iparams[type].constr.dB; dvdl += scaled_lagrange_multiplier[ll] * dt_2 * (bondB - bondA); } *dvdlambda += dvdl; } } #ifdef DEBUG fprintf(log, "tnit: %5d omega: %10.5f\n", tnit, omega); #endif if (ir->bShakeSOR) { if (tnit > shaked->gamma) { shaked->delta *= -0.5; } shaked->omega += shaked->delta; shaked->gamma = tnit; } inc_nrnb(nrnb, eNR_SHAKE, tnit); inc_nrnb(nrnb, eNR_SHAKE_RIJ, trij); if (v) { inc_nrnb(nrnb, eNR_CONSTR_V, trij*2); } if (bCalcVir) { inc_nrnb(nrnb, eNR_CONSTR_VIR, trij); } return TRUE; }
void csettle(gmx_settledata_t settled, int nsettle, t_iatom iatoms[], const t_pbc *pbc, real b4[], real after[], real invdt, real *v, int CalcVirAtomEnd, tensor vir_r_m_dr, int *error) { /* ***************************************************************** */ /* ** */ /* Subroutine : setlep - reset positions of TIP3P waters ** */ /* Author : Shuichi Miyamoto ** */ /* Date of last update : Oct. 1, 1992 ** */ /* ** */ /* Reference for the SETTLE algorithm ** */ /* S. Miyamoto et al., J. Comp. Chem., 13, 952 (1992). ** */ /* ** */ /* ***************************************************************** */ /* Initialized data */ settleparam_t *p; real wh, ra, rb, rc, irc2; real mO, mH; /* Local variables */ real gama, beta, alpa, xcom, ycom, zcom, al2be2, tmp, tmp2; real axlng, aylng, azlng, trns11, trns21, trns31, trns12, trns22, trns32, trns13, trns23, trns33, cosphi, costhe, sinphi, sinthe, cospsi, xaksxd, yaksxd, xakszd, yakszd, zakszd, zaksxd, xaksyd, xb0, yb0, zb0, xc0, yc0, zc0, xa1; real ya1, za1, xb1, yb1; real zb1, xc1, yc1, zc1, yaksyd, zaksyd, sinpsi, xa3, ya3, za3, xb3, yb3, zb3, xc3, yc3, zc3, xb0d, yb0d, xc0d, yc0d, za1d, xb1d, yb1d, zb1d, xc1d, yc1d, zc1d, ya2d, xb2d, yb2d, yc2d, xa3d, ya3d, za3d, xb3d, yb3d, zb3d, xc3d, yc3d, zc3d; real t1, t2; real dax, day, daz, dbx, dby, dbz, dcx, dcy, dcz; real mdax, mday, mdaz, mdbx, mdby, mdbz, mdcx, mdcy, mdcz; gmx_bool bOK; int i, ow1, hw2, hw3; rvec dx, sh_hw2 = {0, 0, 0}, sh_hw3 = {0, 0, 0}; rvec doh2, doh3; int is; *error = -1; CalcVirAtomEnd *= 3; p = &settled->massw; wh = p->wh; rc = p->rc; ra = p->ra; rb = p->rb; irc2 = p->irc2; mO = p->mO; mH = p->mH; #ifdef PRAGMAS #pragma ivdep #endif for (i = 0; i < nsettle; ++i) { bOK = TRUE; /* --- Step1 A1' --- */ ow1 = iatoms[i*4+1] * 3; hw2 = iatoms[i*4+2] * 3; hw3 = iatoms[i*4+3] * 3; if (pbc == NULL) { xb0 = b4[hw2 + XX] - b4[ow1 + XX]; yb0 = b4[hw2 + YY] - b4[ow1 + YY]; zb0 = b4[hw2 + ZZ] - b4[ow1 + ZZ]; xc0 = b4[hw3 + XX] - b4[ow1 + XX]; yc0 = b4[hw3 + YY] - b4[ow1 + YY]; zc0 = b4[hw3 + ZZ] - b4[ow1 + ZZ]; /* 6 flops */ rvec_sub(after+hw2, after+ow1, doh2); rvec_sub(after+hw3, after+ow1, doh3); /* 6 flops */ } else { pbc_dx_aiuc(pbc, b4+hw2, b4+ow1, dx); xb0 = dx[XX]; yb0 = dx[YY]; zb0 = dx[ZZ]; pbc_dx_aiuc(pbc, b4+hw3, b4+ow1, dx); xc0 = dx[XX]; yc0 = dx[YY]; zc0 = dx[ZZ]; /* Tedious way of doing pbc */ is = pbc_dx_aiuc(pbc, after+hw2, after+ow1, doh2); if (is == CENTRAL) { clear_rvec(sh_hw2); } else { sh_hw2[XX] = after[hw2 + XX] - (after[ow1 + XX] + doh2[XX]); sh_hw2[YY] = after[hw2 + YY] - (after[ow1 + YY] + doh2[YY]); sh_hw2[ZZ] = after[hw2 + ZZ] - (after[ow1 + ZZ] + doh2[ZZ]); rvec_dec(after+hw2, sh_hw2); } is = pbc_dx_aiuc(pbc, after+hw3, after+ow1, doh3); if (is == CENTRAL) { clear_rvec(sh_hw3); } else { sh_hw3[XX] = after[hw3 + XX] - (after[ow1 + XX] + doh3[XX]); sh_hw3[YY] = after[hw3 + YY] - (after[ow1 + YY] + doh3[YY]); sh_hw3[ZZ] = after[hw3 + ZZ] - (after[ow1 + ZZ] + doh3[ZZ]); rvec_dec(after+hw3, sh_hw3); } } /* Not calculating the center of mass using the oxygen position * and the O-H distances, as done below, will make SETTLE * the largest source of energy drift for simulations of water, * as then the oxygen coordinate is multiplied by 0.89 at every step, * which can then transfer a systematic rounding to the oxygen velocity. */ xa1 = -(doh2[XX] + doh3[XX]) * wh; ya1 = -(doh2[YY] + doh3[YY]) * wh; za1 = -(doh2[ZZ] + doh3[ZZ]) * wh; xcom = after[ow1 + XX] - xa1; ycom = after[ow1 + YY] - ya1; zcom = after[ow1 + ZZ] - za1; xb1 = after[hw2 + XX] - xcom; yb1 = after[hw2 + YY] - ycom; zb1 = after[hw2 + ZZ] - zcom; xc1 = after[hw3 + XX] - xcom; yc1 = after[hw3 + YY] - ycom; zc1 = after[hw3 + ZZ] - zcom; /* 15 flops */ xakszd = yb0 * zc0 - zb0 * yc0; yakszd = zb0 * xc0 - xb0 * zc0; zakszd = xb0 * yc0 - yb0 * xc0; xaksxd = ya1 * zakszd - za1 * yakszd; yaksxd = za1 * xakszd - xa1 * zakszd; zaksxd = xa1 * yakszd - ya1 * xakszd; xaksyd = yakszd * zaksxd - zakszd * yaksxd; yaksyd = zakszd * xaksxd - xakszd * zaksxd; zaksyd = xakszd * yaksxd - yakszd * xaksxd; /* 27 flops */ axlng = gmx_invsqrt(xaksxd * xaksxd + yaksxd * yaksxd + zaksxd * zaksxd); aylng = gmx_invsqrt(xaksyd * xaksyd + yaksyd * yaksyd + zaksyd * zaksyd); azlng = gmx_invsqrt(xakszd * xakszd + yakszd * yakszd + zakszd * zakszd); trns11 = xaksxd * axlng; trns21 = yaksxd * axlng; trns31 = zaksxd * axlng; trns12 = xaksyd * aylng; trns22 = yaksyd * aylng; trns32 = zaksyd * aylng; trns13 = xakszd * azlng; trns23 = yakszd * azlng; trns33 = zakszd * azlng; /* 24 flops */ xb0d = trns11 * xb0 + trns21 * yb0 + trns31 * zb0; yb0d = trns12 * xb0 + trns22 * yb0 + trns32 * zb0; xc0d = trns11 * xc0 + trns21 * yc0 + trns31 * zc0; yc0d = trns12 * xc0 + trns22 * yc0 + trns32 * zc0; /* xa1d = trns11 * xa1 + trns21 * ya1 + trns31 * za1; ya1d = trns12 * xa1 + trns22 * ya1 + trns32 * za1; */ za1d = trns13 * xa1 + trns23 * ya1 + trns33 * za1; xb1d = trns11 * xb1 + trns21 * yb1 + trns31 * zb1; yb1d = trns12 * xb1 + trns22 * yb1 + trns32 * zb1; zb1d = trns13 * xb1 + trns23 * yb1 + trns33 * zb1; xc1d = trns11 * xc1 + trns21 * yc1 + trns31 * zc1; yc1d = trns12 * xc1 + trns22 * yc1 + trns32 * zc1; zc1d = trns13 * xc1 + trns23 * yc1 + trns33 * zc1; /* 65 flops */ sinphi = za1d * gmx_invsqrt(ra*ra); tmp = 1.0 - sinphi * sinphi; if (tmp <= 0) { bOK = FALSE; } else { tmp2 = gmx_invsqrt(tmp); cosphi = tmp*tmp2; sinpsi = (zb1d - zc1d) * irc2 * tmp2; tmp2 = 1.0 - sinpsi * sinpsi; if (tmp2 <= 0) { bOK = FALSE; } else { cospsi = tmp2*gmx_invsqrt(tmp2); } } /* 46 flops */ if (bOK) { ya2d = ra * cosphi; xb2d = -rc * cospsi; t1 = -rb * cosphi; t2 = rc * sinpsi * sinphi; yb2d = t1 - t2; yc2d = t1 + t2; /* 7 flops */ /* --- Step3 al,be,ga --- */ alpa = xb2d * (xb0d - xc0d) + yb0d * yb2d + yc0d * yc2d; beta = xb2d * (yc0d - yb0d) + xb0d * yb2d + xc0d * yc2d; gama = xb0d * yb1d - xb1d * yb0d + xc0d * yc1d - xc1d * yc0d; al2be2 = alpa * alpa + beta * beta; tmp2 = (al2be2 - gama * gama); sinthe = (alpa * gama - beta * tmp2*gmx_invsqrt(tmp2)) * gmx_invsqrt(al2be2*al2be2); /* 47 flops */ /* --- Step4 A3' --- */ tmp2 = 1.0 - sinthe * sinthe; costhe = tmp2*gmx_invsqrt(tmp2); xa3d = -ya2d * sinthe; ya3d = ya2d * costhe; za3d = za1d; xb3d = xb2d * costhe - yb2d * sinthe; yb3d = xb2d * sinthe + yb2d * costhe; zb3d = zb1d; xc3d = -xb2d * costhe - yc2d * sinthe; yc3d = -xb2d * sinthe + yc2d * costhe; zc3d = zc1d; /* 26 flops */ /* --- Step5 A3 --- */ xa3 = trns11 * xa3d + trns12 * ya3d + trns13 * za3d; ya3 = trns21 * xa3d + trns22 * ya3d + trns23 * za3d; za3 = trns31 * xa3d + trns32 * ya3d + trns33 * za3d; xb3 = trns11 * xb3d + trns12 * yb3d + trns13 * zb3d; yb3 = trns21 * xb3d + trns22 * yb3d + trns23 * zb3d; zb3 = trns31 * xb3d + trns32 * yb3d + trns33 * zb3d; xc3 = trns11 * xc3d + trns12 * yc3d + trns13 * zc3d; yc3 = trns21 * xc3d + trns22 * yc3d + trns23 * zc3d; zc3 = trns31 * xc3d + trns32 * yc3d + trns33 * zc3d; /* 45 flops */ after[ow1] = xcom + xa3; after[ow1 + 1] = ycom + ya3; after[ow1 + 2] = zcom + za3; after[hw2] = xcom + xb3; after[hw2 + 1] = ycom + yb3; after[hw2 + 2] = zcom + zb3; after[hw3] = xcom + xc3; after[hw3 + 1] = ycom + yc3; after[hw3 + 2] = zcom + zc3; /* 9 flops */ if (pbc != NULL) { rvec_inc(after+hw2, sh_hw2); rvec_inc(after+hw3, sh_hw3); } dax = xa3 - xa1; day = ya3 - ya1; daz = za3 - za1; dbx = xb3 - xb1; dby = yb3 - yb1; dbz = zb3 - zb1; dcx = xc3 - xc1; dcy = yc3 - yc1; dcz = zc3 - zc1; /* 9 flops, counted with the virial */ if (v != NULL) { v[ow1] += dax*invdt; v[ow1 + 1] += day*invdt; v[ow1 + 2] += daz*invdt; v[hw2] += dbx*invdt; v[hw2 + 1] += dby*invdt; v[hw2 + 2] += dbz*invdt; v[hw3] += dcx*invdt; v[hw3 + 1] += dcy*invdt; v[hw3 + 2] += dcz*invdt; /* 3*6 flops */ } if (ow1 < CalcVirAtomEnd) { mdax = mO*dax; mday = mO*day; mdaz = mO*daz; mdbx = mH*dbx; mdby = mH*dby; mdbz = mH*dbz; mdcx = mH*dcx; mdcy = mH*dcy; mdcz = mH*dcz; vir_r_m_dr[XX][XX] -= b4[ow1 ]*mdax + (b4[ow1 ]+xb0)*mdbx + (b4[ow1 ]+xc0)*mdcx; vir_r_m_dr[XX][YY] -= b4[ow1 ]*mday + (b4[ow1 ]+xb0)*mdby + (b4[ow1 ]+xc0)*mdcy; vir_r_m_dr[XX][ZZ] -= b4[ow1 ]*mdaz + (b4[ow1 ]+xb0)*mdbz + (b4[ow1 ]+xc0)*mdcz; vir_r_m_dr[YY][XX] -= b4[ow1+1]*mdax + (b4[ow1+1]+yb0)*mdbx + (b4[ow1+1]+yc0)*mdcx; vir_r_m_dr[YY][YY] -= b4[ow1+1]*mday + (b4[ow1+1]+yb0)*mdby + (b4[ow1+1]+yc0)*mdcy; vir_r_m_dr[YY][ZZ] -= b4[ow1+1]*mdaz + (b4[ow1+1]+yb0)*mdbz + (b4[ow1+1]+yc0)*mdcz; vir_r_m_dr[ZZ][XX] -= b4[ow1+2]*mdax + (b4[ow1+2]+zb0)*mdbx + (b4[ow1+2]+zc0)*mdcx; vir_r_m_dr[ZZ][YY] -= b4[ow1+2]*mday + (b4[ow1+2]+zb0)*mdby + (b4[ow1+2]+zc0)*mdcy; vir_r_m_dr[ZZ][ZZ] -= b4[ow1+2]*mdaz + (b4[ow1+2]+zb0)*mdbz + (b4[ow1+2]+zc0)*mdcz; /* 3*24 - 9 flops */ } } else { *error = i; } #ifdef DEBUG if (debug) { check_cons(debug, "settle", after, ow1, hw2, hw3); } #endif } }
void csettle(FILE *fp,int nsettle, t_iatom iatoms[],real b4[], real after[], real dOH,real dHH,real mO,real mH, real invdt,real *v,bool bCalcVir,tensor rmdr,int *error) { /* ***************************************************************** */ /* ** */ /* Subroutine : setlep - reset positions of TIP3P waters ** */ /* Author : Shuichi Miyamoto ** */ /* Date of last update : Oct. 1, 1992 ** */ /* ** */ /* Reference for the SETTLE algorithm ** */ /* S. Miyamoto et al., J. Comp. Chem., 13, 952 (1992). ** */ /* ** */ /* ***************************************************************** */ /* Initialized data */ static bool bFirst=TRUE; /* These three weights need have double precision. Using single precision * can result in huge velocity and pressure deviations. */ static double wo,wh,wohh; static real ra,rb,rc,rc2,rone; #ifdef DEBUG_PRES static int step = 0; #endif /* Local variables */ real gama, beta, alpa, xcom, ycom, zcom, al2be2, tmp, tmp2; real axlng, aylng, azlng, trns11, trns21, trns31, trns12, trns22, trns32, trns13, trns23, trns33, cosphi, costhe, sinphi, sinthe, cospsi, xaksxd, yaksxd, xakszd, yakszd, zakszd, zaksxd, xaksyd, xb0, yb0, zb0, xc0, yc0, zc0, xa1; real ya1, za1, xb1, yb1; real zb1, xc1, yc1, zc1, yaksyd, zaksyd, sinpsi, xa3, ya3, za3, xb3, yb3, zb3, xc3, yc3, zc3, xb0d, yb0d, xc0d, yc0d, za1d, xb1d, yb1d, zb1d, xc1d, yc1d, zc1d, ya2d, xb2d, yb2d, yc2d, xa3d, ya3d, za3d, xb3d, yb3d, zb3d, xc3d, yc3d, zc3d; real t1,t2; real dax, day, daz, dbx, dby, dbz, dcx, dcy, dcz; real mdax, mday, mdaz, mdbx, mdby, mdbz, mdcx, mdcy, mdcz; int doshake; int i, shakeret, ow1, hw2, hw3; *error=-1; if (bFirst) { if (fp) fprintf(fp,"Going to use C-settle (%d waters)\n",nsettle); wo = mO; wh = mH; wohh = mO+2.0*mH; rc = dHH/2.0; ra = 2.0*wh*sqrt(dOH*dOH-rc*rc)/wohh; rb = sqrt(dOH*dOH-rc*rc)-ra; rc2 = dHH; rone = 1.0; wo /= wohh; wh /= wohh; if (fp) { fprintf(fp,"wo = %g, wh =%g, wohh = %g, rc = %g, ra = %g\n", wo,wh,wohh,rc,ra); fprintf(fp,"rb = %g, rc2 = %g, rone = %g, dHH = %g, dOH = %g\n", rb,rc2,rone,dHH,dOH); } bFirst = FALSE; } #ifdef PRAGMAS #pragma ivdep #endif for (i = 0; i < nsettle; ++i) { doshake = 0; /* --- Step1 A1' --- */ ow1 = iatoms[i*2+1] * 3; hw2 = ow1 + 3; hw3 = ow1 + 6; xb0 = b4[hw2 ] - b4[ow1]; yb0 = b4[hw2 + 1] - b4[ow1 + 1]; zb0 = b4[hw2 + 2] - b4[ow1 + 2]; xc0 = b4[hw3 ] - b4[ow1]; yc0 = b4[hw3 + 1] - b4[ow1 + 1]; zc0 = b4[hw3 + 2] - b4[ow1 + 2]; /* 6 flops */ xcom = (after[ow1 ] * wo + (after[hw2 ] + after[hw3 ]) * wh); ycom = (after[ow1 + 1] * wo + (after[hw2 + 1] + after[hw3 + 1]) * wh); zcom = (after[ow1 + 2] * wo + (after[hw2 + 2] + after[hw3 + 2]) * wh); /* 12 flops */ xa1 = after[ow1 ] - xcom; ya1 = after[ow1 + 1] - ycom; za1 = after[ow1 + 2] - zcom; xb1 = after[hw2 ] - xcom; yb1 = after[hw2 + 1] - ycom; zb1 = after[hw2 + 2] - zcom; xc1 = after[hw3 ] - xcom; yc1 = after[hw3 + 1] - ycom; zc1 = after[hw3 + 2] - zcom; /* 9 flops */ xakszd = yb0 * zc0 - zb0 * yc0; yakszd = zb0 * xc0 - xb0 * zc0; zakszd = xb0 * yc0 - yb0 * xc0; xaksxd = ya1 * zakszd - za1 * yakszd; yaksxd = za1 * xakszd - xa1 * zakszd; zaksxd = xa1 * yakszd - ya1 * xakszd; xaksyd = yakszd * zaksxd - zakszd * yaksxd; yaksyd = zakszd * xaksxd - xakszd * zaksxd; zaksyd = xakszd * yaksxd - yakszd * xaksxd; /* 27 flops */ axlng = invsqrt(xaksxd * xaksxd + yaksxd * yaksxd + zaksxd * zaksxd); aylng = invsqrt(xaksyd * xaksyd + yaksyd * yaksyd + zaksyd * zaksyd); azlng = invsqrt(xakszd * xakszd + yakszd * yakszd + zakszd * zakszd); trns11 = xaksxd * axlng; trns21 = yaksxd * axlng; trns31 = zaksxd * axlng; trns12 = xaksyd * aylng; trns22 = yaksyd * aylng; trns32 = zaksyd * aylng; trns13 = xakszd * azlng; trns23 = yakszd * azlng; trns33 = zakszd * azlng; /* 24 flops */ xb0d = trns11 * xb0 + trns21 * yb0 + trns31 * zb0; yb0d = trns12 * xb0 + trns22 * yb0 + trns32 * zb0; xc0d = trns11 * xc0 + trns21 * yc0 + trns31 * zc0; yc0d = trns12 * xc0 + trns22 * yc0 + trns32 * zc0; /* xa1d = trns11 * xa1 + trns21 * ya1 + trns31 * za1; ya1d = trns12 * xa1 + trns22 * ya1 + trns32 * za1; */ za1d = trns13 * xa1 + trns23 * ya1 + trns33 * za1; xb1d = trns11 * xb1 + trns21 * yb1 + trns31 * zb1; yb1d = trns12 * xb1 + trns22 * yb1 + trns32 * zb1; zb1d = trns13 * xb1 + trns23 * yb1 + trns33 * zb1; xc1d = trns11 * xc1 + trns21 * yc1 + trns31 * zc1; yc1d = trns12 * xc1 + trns22 * yc1 + trns32 * zc1; zc1d = trns13 * xc1 + trns23 * yc1 + trns33 * zc1; /* 65 flops */ sinphi = za1d / ra; tmp = rone - sinphi * sinphi; if (tmp <= 0) { *error = i; doshake = 1; cosphi = 0; } else cosphi = tmp*invsqrt(tmp); sinpsi = (zb1d - zc1d) / (rc2 * cosphi); tmp2 = rone - sinpsi * sinpsi; if (tmp2 <= 0) { *error = i; doshake = 1; cospsi = 0; } else cospsi = tmp2*invsqrt(tmp2); /* 46 flops */ if(!doshake) { ya2d = ra * cosphi; xb2d = -rc * cospsi; t1 = -rb * cosphi; t2 = rc * sinpsi * sinphi; yb2d = t1 - t2; yc2d = t1 + t2; /* 7 flops */ /* --- Step3 al,be,ga --- */ alpa = xb2d * (xb0d - xc0d) + yb0d * yb2d + yc0d * yc2d; beta = xb2d * (yc0d - yb0d) + xb0d * yb2d + xc0d * yc2d; gama = xb0d * yb1d - xb1d * yb0d + xc0d * yc1d - xc1d * yc0d; al2be2 = alpa * alpa + beta * beta; tmp2 = (al2be2 - gama * gama); sinthe = (alpa * gama - beta * tmp2*invsqrt(tmp2)) / al2be2; /* 47 flops */ /* --- Step4 A3' --- */ tmp2 = rone - sinthe *sinthe; costhe = tmp2*invsqrt(tmp2); xa3d = -ya2d * sinthe; ya3d = ya2d * costhe; za3d = za1d; xb3d = xb2d * costhe - yb2d * sinthe; yb3d = xb2d * sinthe + yb2d * costhe; zb3d = zb1d; xc3d = -xb2d * costhe - yc2d * sinthe; yc3d = -xb2d * sinthe + yc2d * costhe; zc3d = zc1d; /* 26 flops */ /* --- Step5 A3 --- */ xa3 = trns11 * xa3d + trns12 * ya3d + trns13 * za3d; ya3 = trns21 * xa3d + trns22 * ya3d + trns23 * za3d; za3 = trns31 * xa3d + trns32 * ya3d + trns33 * za3d; xb3 = trns11 * xb3d + trns12 * yb3d + trns13 * zb3d; yb3 = trns21 * xb3d + trns22 * yb3d + trns23 * zb3d; zb3 = trns31 * xb3d + trns32 * yb3d + trns33 * zb3d; xc3 = trns11 * xc3d + trns12 * yc3d + trns13 * zc3d; yc3 = trns21 * xc3d + trns22 * yc3d + trns23 * zc3d; zc3 = trns31 * xc3d + trns32 * yc3d + trns33 * zc3d; /* 45 flops */ after[ow1] = xcom + xa3; after[ow1 + 1] = ycom + ya3; after[ow1 + 2] = zcom + za3; after[hw2] = xcom + xb3; after[hw2 + 1] = ycom + yb3; after[hw2 + 2] = zcom + zb3; after[hw3] = xcom + xc3; after[hw3 + 1] = ycom + yc3; after[hw3 + 2] = zcom + zc3; /* 9 flops */ dax = xa3 - xa1; day = ya3 - ya1; daz = za3 - za1; dbx = xb3 - xb1; dby = yb3 - yb1; dbz = zb3 - zb1; dcx = xc3 - xc1; dcy = yc3 - yc1; dcz = zc3 - zc1; /* 9 flops, counted with the virial */ if (v) { v[ow1] += dax*invdt; v[ow1 + 1] += day*invdt; v[ow1 + 2] += daz*invdt; v[hw2] += dbx*invdt; v[hw2 + 1] += dby*invdt; v[hw2 + 2] += dbz*invdt; v[hw3] += dcx*invdt; v[hw3 + 1] += dcy*invdt; v[hw3 + 2] += dcz*invdt; /* 3*6 flops */ } if (bCalcVir) { mdax = mO*dax; mday = mO*day; mdaz = mO*daz; mdbx = mH*dbx; mdby = mH*dby; mdbz = mH*dbz; mdcx = mH*dcx; mdcy = mH*dcy; mdcz = mH*dcz; rmdr[XX][XX] -= b4[ow1]*mdax + b4[hw2]*mdbx + b4[hw3]*mdcx; rmdr[XX][YY] -= b4[ow1]*mday + b4[hw2]*mdby + b4[hw3]*mdcy; rmdr[XX][ZZ] -= b4[ow1]*mdaz + b4[hw2]*mdbz + b4[hw3]*mdcz; rmdr[YY][XX] -= b4[ow1+1]*mdax + b4[hw2+1]*mdbx + b4[hw3+1]*mdcx; rmdr[YY][YY] -= b4[ow1+1]*mday + b4[hw2+1]*mdby + b4[hw3+1]*mdcy; rmdr[YY][ZZ] -= b4[ow1+1]*mdaz + b4[hw2+1]*mdbz + b4[hw3+1]*mdcz; rmdr[ZZ][XX] -= b4[ow1+2]*mdax + b4[hw2+2]*mdbx + b4[hw3+2]*mdcx; rmdr[ZZ][YY] -= b4[ow1+2]*mday + b4[hw2+2]*mdby + b4[hw3+2]*mdcy; rmdr[ZZ][ZZ] -= b4[ow1+2]*mdaz + b4[hw2+2]*mdbz + b4[hw3+2]*mdcz; /* 3*24 - 9 flops */ } } else { /* If we couldn't settle this water, try a simplified iterative shake instead */ if(xshake(b4+ow1,after+ow1,dOH,dHH,mO,mH)!=0) *error=i; } #ifdef DEBUG check_cons(fp,"settle",after,ow1,hw2,hw3); #endif } }
void csettle(gmx_settledata_t settled, int nsettle, t_iatom iatoms[],real b4[], real after[], real invdt,real *v,gmx_bool bCalcVir,tensor rmdr,int *error,t_vetavars *vetavar) { /* ***************************************************************** */ /* ** */ /* Subroutine : setlep - reset positions of TIP3P waters ** */ /* Author : Shuichi Miyamoto ** */ /* Date of last update : Oct. 1, 1992 ** */ /* ** */ /* Reference for the SETTLE algorithm ** */ /* S. Miyamoto et al., J. Comp. Chem., 13, 952 (1992). ** */ /* ** */ /* ***************************************************************** */ /* Initialized data */ settleparam_t *p; real mO,mH,mOs,mHs,invdts; /* These three weights need have double precision. Using single precision * can result in huge velocity and pressure deviations. */ double wo,wh,wohh; real ra,rb,rc,rc2,dOH,dHH; /* Local variables */ real gama, beta, alpa, xcom, ycom, zcom, al2be2, tmp, tmp2; real axlng, aylng, azlng, trns11, trns21, trns31, trns12, trns22, trns32, trns13, trns23, trns33, cosphi, costhe, sinphi, sinthe, cospsi, xaksxd, yaksxd, xakszd, yakszd, zakszd, zaksxd, xaksyd, xb0, yb0, zb0, xc0, yc0, zc0, xa1; real ya1, za1, xb1, yb1; real zb1, xc1, yc1, zc1, yaksyd, zaksyd, sinpsi, xa3, ya3, za3, xb3, yb3, zb3, xc3, yc3, zc3, xb0d, yb0d, xc0d, yc0d, za1d, xb1d, yb1d, zb1d, xc1d, yc1d, zc1d, ya2d, xb2d, yb2d, yc2d, xa3d, ya3d, za3d, xb3d, yb3d, zb3d, xc3d, yc3d, zc3d; real t1,t2; real dax, day, daz, dbx, dby, dbz, dcx, dcy, dcz; real mdax, mday, mdaz, mdbx, mdby, mdbz, mdcx, mdcy, mdcz; int doshake; int i, shakeret, ow1, hw2, hw3; *error = -1; p = &settled->massw; mO = p->mO; mH = p->mH; wo = p->wo; wh = p->wh; wohh = p->wohh; rc = p->rc; ra = p->ra; rb = p->rb; rc2 = p->rc2; dOH = p->dOH; dHH = p->dHH; mOs = mO / vetavar->rvscale; mHs = mH / vetavar->rvscale; invdts = invdt/(vetavar->rscale); #ifdef PRAGMAS #pragma ivdep #endif for (i = 0; i < nsettle; ++i) { doshake = 0; /* --- Step1 A1' --- */ ow1 = iatoms[i*2+1] * 3; hw2 = ow1 + 3; hw3 = ow1 + 6; xb0 = b4[hw2 ] - b4[ow1]; yb0 = b4[hw2 + 1] - b4[ow1 + 1]; zb0 = b4[hw2 + 2] - b4[ow1 + 2]; xc0 = b4[hw3 ] - b4[ow1]; yc0 = b4[hw3 + 1] - b4[ow1 + 1]; zc0 = b4[hw3 + 2] - b4[ow1 + 2]; /* 6 flops */ xcom = (after[ow1 ] * wo + (after[hw2 ] + after[hw3 ]) * wh); ycom = (after[ow1 + 1] * wo + (after[hw2 + 1] + after[hw3 + 1]) * wh); zcom = (after[ow1 + 2] * wo + (after[hw2 + 2] + after[hw3 + 2]) * wh); /* 12 flops */ xa1 = after[ow1 ] - xcom; ya1 = after[ow1 + 1] - ycom; za1 = after[ow1 + 2] - zcom; xb1 = after[hw2 ] - xcom; yb1 = after[hw2 + 1] - ycom; zb1 = after[hw2 + 2] - zcom; xc1 = after[hw3 ] - xcom; yc1 = after[hw3 + 1] - ycom; zc1 = after[hw3 + 2] - zcom; /* 9 flops */ xakszd = yb0 * zc0 - zb0 * yc0; yakszd = zb0 * xc0 - xb0 * zc0; zakszd = xb0 * yc0 - yb0 * xc0; xaksxd = ya1 * zakszd - za1 * yakszd; yaksxd = za1 * xakszd - xa1 * zakszd; zaksxd = xa1 * yakszd - ya1 * xakszd; xaksyd = yakszd * zaksxd - zakszd * yaksxd; yaksyd = zakszd * xaksxd - xakszd * zaksxd; zaksyd = xakszd * yaksxd - yakszd * xaksxd; /* 27 flops */ axlng = gmx_invsqrt(xaksxd * xaksxd + yaksxd * yaksxd + zaksxd * zaksxd); aylng = gmx_invsqrt(xaksyd * xaksyd + yaksyd * yaksyd + zaksyd * zaksyd); azlng = gmx_invsqrt(xakszd * xakszd + yakszd * yakszd + zakszd * zakszd); trns11 = xaksxd * axlng; trns21 = yaksxd * axlng; trns31 = zaksxd * axlng; trns12 = xaksyd * aylng; trns22 = yaksyd * aylng; trns32 = zaksyd * aylng; trns13 = xakszd * azlng; trns23 = yakszd * azlng; trns33 = zakszd * azlng; /* 24 flops */ xb0d = trns11 * xb0 + trns21 * yb0 + trns31 * zb0; yb0d = trns12 * xb0 + trns22 * yb0 + trns32 * zb0; xc0d = trns11 * xc0 + trns21 * yc0 + trns31 * zc0; yc0d = trns12 * xc0 + trns22 * yc0 + trns32 * zc0; /* xa1d = trns11 * xa1 + trns21 * ya1 + trns31 * za1; ya1d = trns12 * xa1 + trns22 * ya1 + trns32 * za1; */ za1d = trns13 * xa1 + trns23 * ya1 + trns33 * za1; xb1d = trns11 * xb1 + trns21 * yb1 + trns31 * zb1; yb1d = trns12 * xb1 + trns22 * yb1 + trns32 * zb1; zb1d = trns13 * xb1 + trns23 * yb1 + trns33 * zb1; xc1d = trns11 * xc1 + trns21 * yc1 + trns31 * zc1; yc1d = trns12 * xc1 + trns22 * yc1 + trns32 * zc1; zc1d = trns13 * xc1 + trns23 * yc1 + trns33 * zc1; /* 65 flops */ sinphi = za1d / ra; tmp = 1.0 - sinphi * sinphi; if (tmp <= 0) { *error = i; doshake = 1; cosphi = 0; } else cosphi = tmp*gmx_invsqrt(tmp); sinpsi = (zb1d - zc1d) / (rc2 * cosphi); tmp2 = 1.0 - sinpsi * sinpsi; if (tmp2 <= 0) { *error = i; doshake = 1; cospsi = 0; } else cospsi = tmp2*gmx_invsqrt(tmp2); /* 46 flops */ if(!doshake) { ya2d = ra * cosphi; xb2d = -rc * cospsi; t1 = -rb * cosphi; t2 = rc * sinpsi * sinphi; yb2d = t1 - t2; yc2d = t1 + t2; /* 7 flops */ /* --- Step3 al,be,ga --- */ alpa = xb2d * (xb0d - xc0d) + yb0d * yb2d + yc0d * yc2d; beta = xb2d * (yc0d - yb0d) + xb0d * yb2d + xc0d * yc2d; gama = xb0d * yb1d - xb1d * yb0d + xc0d * yc1d - xc1d * yc0d; al2be2 = alpa * alpa + beta * beta; tmp2 = (al2be2 - gama * gama); sinthe = (alpa * gama - beta * tmp2*gmx_invsqrt(tmp2)) / al2be2; /* 47 flops */ /* --- Step4 A3' --- */ tmp2 = 1.0 - sinthe *sinthe; costhe = tmp2*gmx_invsqrt(tmp2); xa3d = -ya2d * sinthe; ya3d = ya2d * costhe; za3d = za1d; xb3d = xb2d * costhe - yb2d * sinthe; yb3d = xb2d * sinthe + yb2d * costhe; zb3d = zb1d; xc3d = -xb2d * costhe - yc2d * sinthe; yc3d = -xb2d * sinthe + yc2d * costhe; zc3d = zc1d; /* 26 flops */ /* --- Step5 A3 --- */ xa3 = trns11 * xa3d + trns12 * ya3d + trns13 * za3d; ya3 = trns21 * xa3d + trns22 * ya3d + trns23 * za3d; za3 = trns31 * xa3d + trns32 * ya3d + trns33 * za3d; xb3 = trns11 * xb3d + trns12 * yb3d + trns13 * zb3d; yb3 = trns21 * xb3d + trns22 * yb3d + trns23 * zb3d; zb3 = trns31 * xb3d + trns32 * yb3d + trns33 * zb3d; xc3 = trns11 * xc3d + trns12 * yc3d + trns13 * zc3d; yc3 = trns21 * xc3d + trns22 * yc3d + trns23 * zc3d; zc3 = trns31 * xc3d + trns32 * yc3d + trns33 * zc3d; /* 45 flops */ after[ow1] = xcom + xa3; after[ow1 + 1] = ycom + ya3; after[ow1 + 2] = zcom + za3; after[hw2] = xcom + xb3; after[hw2 + 1] = ycom + yb3; after[hw2 + 2] = zcom + zb3; after[hw3] = xcom + xc3; after[hw3 + 1] = ycom + yc3; after[hw3 + 2] = zcom + zc3; /* 9 flops */ dax = xa3 - xa1; day = ya3 - ya1; daz = za3 - za1; dbx = xb3 - xb1; dby = yb3 - yb1; dbz = zb3 - zb1; dcx = xc3 - xc1; dcy = yc3 - yc1; dcz = zc3 - zc1; /* 9 flops, counted with the virial */ if (v) { v[ow1] += dax*invdts; v[ow1 + 1] += day*invdts; v[ow1 + 2] += daz*invdts; v[hw2] += dbx*invdts; v[hw2 + 1] += dby*invdts; v[hw2 + 2] += dbz*invdts; v[hw3] += dcx*invdts; v[hw3 + 1] += dcy*invdts; v[hw3 + 2] += dcz*invdts; /* 3*6 flops */ } if (bCalcVir) { mdax = mOs*dax; mday = mOs*day; mdaz = mOs*daz; mdbx = mHs*dbx; mdby = mHs*dby; mdbz = mHs*dbz; mdcx = mHs*dcx; mdcy = mHs*dcy; mdcz = mHs*dcz; rmdr[XX][XX] -= b4[ow1]*mdax + b4[hw2]*mdbx + b4[hw3]*mdcx; rmdr[XX][YY] -= b4[ow1]*mday + b4[hw2]*mdby + b4[hw3]*mdcy; rmdr[XX][ZZ] -= b4[ow1]*mdaz + b4[hw2]*mdbz + b4[hw3]*mdcz; rmdr[YY][XX] -= b4[ow1+1]*mdax + b4[hw2+1]*mdbx + b4[hw3+1]*mdcx; rmdr[YY][YY] -= b4[ow1+1]*mday + b4[hw2+1]*mdby + b4[hw3+1]*mdcy; rmdr[YY][ZZ] -= b4[ow1+1]*mdaz + b4[hw2+1]*mdbz + b4[hw3+1]*mdcz; rmdr[ZZ][XX] -= b4[ow1+2]*mdax + b4[hw2+2]*mdbx + b4[hw3+2]*mdcx; rmdr[ZZ][YY] -= b4[ow1+2]*mday + b4[hw2+2]*mdby + b4[hw3+2]*mdcy; rmdr[ZZ][ZZ] -= b4[ow1+2]*mdaz + b4[hw2+2]*mdbz + b4[hw3+2]*mdcz; /* 3*24 - 9 flops */ } } else { /* If we couldn't settle this water, try a simplified iterative shake instead */ /* no pressure control in here yet */ if(xshake(b4+ow1,after+ow1,dOH,dHH,mO,mH)!=0) *error=i; } #ifdef DEBUG if (debug) { check_cons(debug,"settle",after,ow1,hw2,hw3); } #endif } }