static void do_lincs(rvec *x,rvec *xp,matrix box,t_pbc *pbc, struct gmx_lincsdata *lincsd,real *invmass, t_commrec *cr, real wangle,int *warn, real invdt,rvec *v, gmx_bool bCalcVir,tensor rmdr) { int b,i,j,k,n,iter; real tmp0,tmp1,tmp2,im1,im2,mvb,rlen,len,len2,dlen2,wfac,lam; rvec dx; int ncons,*bla,*blnr,*blbnb; rvec *r; real *blc,*blmf,*bllen,*blcc,*rhs1,*rhs2,*sol,*lambda; int *nlocat; ncons = lincsd->nc; bla = lincsd->bla; r = lincsd->tmpv; blnr = lincsd->blnr; blbnb = lincsd->blbnb; blc = lincsd->blc; blmf = lincsd->blmf; bllen = lincsd->bllen; blcc = lincsd->tmpncc; rhs1 = lincsd->tmp1; rhs2 = lincsd->tmp2; sol = lincsd->tmp3; lambda = lincsd->lambda; if (DOMAINDECOMP(cr) && cr->dd->constraints) { nlocat = dd_constraints_nlocalatoms(cr->dd); } else if (PARTDECOMP(cr)) { nlocat = pd_constraints_nlocalatoms(cr->pd); } else { nlocat = NULL; } *warn = 0; if (pbc) { /* Compute normalized i-j vectors */ for(b=0; b<ncons; b++) { pbc_dx_aiuc(pbc,x[bla[2*b]],x[bla[2*b+1]],dx); unitv(dx,r[b]); } for(b=0; b<ncons; b++) { for(n=blnr[b]; n<blnr[b+1]; n++) { blcc[n] = blmf[n]*iprod(r[b],r[blbnb[n]]); } pbc_dx_aiuc(pbc,xp[bla[2*b]],xp[bla[2*b+1]],dx); mvb = blc[b]*(iprod(r[b],dx) - bllen[b]); rhs1[b] = mvb; sol[b] = mvb; } } else { /* Compute normalized i-j vectors */ for(b=0; b<ncons; b++) { i = bla[2*b]; j = bla[2*b+1]; tmp0 = x[i][0] - x[j][0]; tmp1 = x[i][1] - x[j][1]; tmp2 = x[i][2] - x[j][2]; rlen = gmx_invsqrt(tmp0*tmp0+tmp1*tmp1+tmp2*tmp2); r[b][0] = rlen*tmp0; r[b][1] = rlen*tmp1; r[b][2] = rlen*tmp2; } /* 16 ncons flops */ for(b=0; b<ncons; b++) { tmp0 = r[b][0]; tmp1 = r[b][1]; tmp2 = r[b][2]; len = bllen[b]; i = bla[2*b]; j = bla[2*b+1]; for(n=blnr[b]; n<blnr[b+1]; n++) { k = blbnb[n]; blcc[n] = blmf[n]*(tmp0*r[k][0] + tmp1*r[k][1] + tmp2*r[k][2]); } /* 6 nr flops */ mvb = blc[b]*(tmp0*(xp[i][0] - xp[j][0]) + tmp1*(xp[i][1] - xp[j][1]) + tmp2*(xp[i][2] - xp[j][2]) - len); rhs1[b] = mvb; sol[b] = mvb; /* 10 flops */ } /* Together: 26*ncons + 6*nrtot flops */ } lincs_matrix_expand(lincsd,blcc,rhs1,rhs2,sol); /* nrec*(ncons+2*nrtot) flops */ for(b=0; b<ncons; b++) { i = bla[2*b]; j = bla[2*b+1]; mvb = blc[b]*sol[b]; lambda[b] = -mvb; im1 = invmass[i]; im2 = invmass[j]; tmp0 = r[b][0]*mvb; tmp1 = r[b][1]*mvb; tmp2 = r[b][2]*mvb; xp[i][0] -= tmp0*im1; xp[i][1] -= tmp1*im1; xp[i][2] -= tmp2*im1; xp[j][0] += tmp0*im2; xp[j][1] += tmp1*im2; xp[j][2] += tmp2*im2; } /* 16 ncons flops */ /* ******** Correction for centripetal effects ******** */ wfac = cos(DEG2RAD*wangle); wfac = wfac*wfac; for(iter=0; iter<lincsd->nIter; iter++) { if (DOMAINDECOMP(cr) && cr->dd->constraints) { /* Communicate the corrected non-local coordinates */ dd_move_x_constraints(cr->dd,box,xp,NULL); } else if (PARTDECOMP(cr)) { pd_move_x_constraints(cr,xp,NULL); } for(b=0; b<ncons; b++) { len = bllen[b]; if (pbc) { pbc_dx_aiuc(pbc,xp[bla[2*b]],xp[bla[2*b+1]],dx); } else { rvec_sub(xp[bla[2*b]],xp[bla[2*b+1]],dx); } len2 = len*len; dlen2 = 2*len2 - norm2(dx); if (dlen2 < wfac*len2 && (nlocat==NULL || nlocat[b])) { *warn = b; } if (dlen2 > 0) { mvb = blc[b]*(len - dlen2*gmx_invsqrt(dlen2)); } else { mvb = blc[b]*len; } rhs1[b] = mvb; sol[b] = mvb; } /* 20*ncons flops */ lincs_matrix_expand(lincsd,blcc,rhs1,rhs2,sol); /* nrec*(ncons+2*nrtot) flops */ for(b=0; b<ncons; b++) { i = bla[2*b]; j = bla[2*b+1]; lam = lambda[b]; mvb = blc[b]*sol[b]; lambda[b] = lam - mvb; im1 = invmass[i]; im2 = invmass[j]; tmp0 = r[b][0]*mvb; tmp1 = r[b][1]*mvb; tmp2 = r[b][2]*mvb; xp[i][0] -= tmp0*im1; xp[i][1] -= tmp1*im1; xp[i][2] -= tmp2*im1; xp[j][0] += tmp0*im2; xp[j][1] += tmp1*im2; xp[j][2] += tmp2*im2; } /* 17 ncons flops */ } /* nit*ncons*(37+9*nrec) flops */ if (v) { /* Correct the velocities */ for(b=0; b<ncons; b++) { i = bla[2*b]; j = bla[2*b+1]; im1 = invmass[i]*lambda[b]*invdt; im2 = invmass[j]*lambda[b]*invdt; v[i][0] += im1*r[b][0]; v[i][1] += im1*r[b][1]; v[i][2] += im1*r[b][2]; v[j][0] -= im2*r[b][0]; v[j][1] -= im2*r[b][1]; v[j][2] -= im2*r[b][2]; } /* 16 ncons flops */ } if (nlocat) { /* Only account for local atoms */ for(b=0; b<ncons; b++) { lambda[b] *= 0.5*nlocat[b]; } } if (bCalcVir) { /* Constraint virial */ for(b=0; b<ncons; b++) { tmp0 = bllen[b]*lambda[b]; for(i=0; i<DIM; i++) { tmp1 = tmp0*r[b][i]; for(j=0; j<DIM; j++) { rmdr[i][j] -= tmp1*r[b][j]; } } } /* 22 ncons flops */ } /* Total: * 26*ncons + 6*nrtot + nrec*(ncons+2*nrtot) * + nit * (20*ncons + nrec*(ncons+2*nrtot) + 17 ncons) * * (26+nrec)*ncons + (6+2*nrec)*nrtot * + nit * ((37+nrec)*ncons + 2*nrec*nrtot) * if nit=1 * (63+nrec)*ncons + (6+4*nrec)*nrtot */ }
void set_lincs(t_idef *idef,t_mdatoms *md, gmx_bool bDynamics,t_commrec *cr, struct gmx_lincsdata *li) { int start,natoms,nflexcon; t_blocka at2con; t_iatom *iatom; int i,k,ncc_alloc,ni,con,nconnect,concon; int type,a1,a2; real lenA=0,lenB; gmx_bool bLocal; li->nc = 0; li->ncc = 0; /* This is the local topology, so there are only F_CONSTR constraints */ if (idef->il[F_CONSTR].nr == 0) { /* There are no constraints, * we do not need to fill any data structures. */ return; } if (debug) { fprintf(debug,"Building the LINCS connectivity\n"); } if (DOMAINDECOMP(cr)) { if (cr->dd->constraints) { dd_get_constraint_range(cr->dd,&start,&natoms); } else { natoms = cr->dd->nat_home; } start = 0; } else if(PARTDECOMP(cr)) { pd_get_constraint_range(cr->pd,&start,&natoms); } else { start = md->start; natoms = md->homenr; } at2con = make_at2con(start,natoms,idef->il,idef->iparams,bDynamics, &nflexcon); if (idef->il[F_CONSTR].nr/3 > li->nc_alloc || li->nc_alloc == 0) { li->nc_alloc = over_alloc_dd(idef->il[F_CONSTR].nr/3); srenew(li->bllen0,li->nc_alloc); srenew(li->ddist,li->nc_alloc); srenew(li->bla,2*li->nc_alloc); srenew(li->blc,li->nc_alloc); srenew(li->blc1,li->nc_alloc); srenew(li->blnr,li->nc_alloc+1); srenew(li->bllen,li->nc_alloc); srenew(li->tmpv,li->nc_alloc); srenew(li->tmp1,li->nc_alloc); srenew(li->tmp2,li->nc_alloc); srenew(li->tmp3,li->nc_alloc); srenew(li->lambda,li->nc_alloc); if (li->ncg_triangle > 0) { /* This is allocating too much, but it is difficult to improve */ srenew(li->triangle,li->nc_alloc); srenew(li->tri_bits,li->nc_alloc); } } iatom = idef->il[F_CONSTR].iatoms; ncc_alloc = li->ncc_alloc; li->blnr[0] = 0; ni = idef->il[F_CONSTR].nr/3; con = 0; nconnect = 0; li->blnr[con] = nconnect; for(i=0; i<ni; i++) { bLocal = TRUE; type = iatom[3*i]; a1 = iatom[3*i+1]; a2 = iatom[3*i+2]; lenA = idef->iparams[type].constr.dA; lenB = idef->iparams[type].constr.dB; /* Skip the flexible constraints when not doing dynamics */ if (bDynamics || lenA!=0 || lenB!=0) { li->bllen0[con] = lenA; li->ddist[con] = lenB - lenA; /* Set the length to the topology A length */ li->bllen[con] = li->bllen0[con]; li->bla[2*con] = a1; li->bla[2*con+1] = a2; /* Construct the constraint connection matrix blbnb */ for(k=at2con.index[a1-start]; k<at2con.index[a1-start+1]; k++) { concon = at2con.a[k]; if (concon != i) { if (nconnect >= ncc_alloc) { ncc_alloc = over_alloc_small(nconnect+1); srenew(li->blbnb,ncc_alloc); } li->blbnb[nconnect++] = concon; } } for(k=at2con.index[a2-start]; k<at2con.index[a2-start+1]; k++) { concon = at2con.a[k]; if (concon != i) { if (nconnect+1 > ncc_alloc) { ncc_alloc = over_alloc_small(nconnect+1); srenew(li->blbnb,ncc_alloc); } li->blbnb[nconnect++] = concon; } } li->blnr[con+1] = nconnect; if (cr->dd == NULL) { /* Order the blbnb matrix to optimize memory access */ qsort(&(li->blbnb[li->blnr[con]]),li->blnr[con+1]-li->blnr[con], sizeof(li->blbnb[0]),int_comp); } /* Increase the constraint count */ con++; } } done_blocka(&at2con); /* This is the real number of constraints, * without dynamics the flexible constraints are not present. */ li->nc = con; li->ncc = li->blnr[con]; if (cr->dd == NULL) { /* Since the matrix is static, we can free some memory */ ncc_alloc = li->ncc; srenew(li->blbnb,ncc_alloc); } if (ncc_alloc > li->ncc_alloc) { li->ncc_alloc = ncc_alloc; srenew(li->blmf,li->ncc_alloc); srenew(li->blmf1,li->ncc_alloc); srenew(li->tmpncc,li->ncc_alloc); } if (debug) { fprintf(debug,"Number of constraints is %d, couplings %d\n", li->nc,li->ncc); } set_lincs_matrix(li,md->invmass,md->lambda); }
int relax_shell_flexcon(FILE *fplog,t_commrec *cr,gmx_bool bVerbose, gmx_large_int_t mdstep,t_inputrec *inputrec, gmx_bool bDoNS,int force_flags, gmx_bool bStopCM, gmx_localtop_t *top, gmx_mtop_t* mtop, gmx_constr_t constr, gmx_enerdata_t *enerd,t_fcdata *fcd, t_state *state,rvec f[], tensor force_vir, t_mdatoms *md, t_nrnb *nrnb,gmx_wallcycle_t wcycle, t_graph *graph, gmx_groups_t *groups, struct gmx_shellfc *shfc, t_forcerec *fr, gmx_bool bBornRadii, double t,rvec mu_tot, int natoms,gmx_bool *bConverged, gmx_vsite_t *vsite, FILE *fp_field) { int nshell; t_shell *shell; t_idef *idef; rvec *pos[2],*force[2],*acc_dir=NULL,*x_old=NULL; real Epot[2],df[2]; rvec dx; real sf_dir,invdt; real ftol,xiH,xiS,dum=0; char sbuf[22]; gmx_bool bCont,bInit; int nat,dd_ac0,dd_ac1=0,i; int start=md->start,homenr=md->homenr,end=start+homenr,cg0,cg1; int nflexcon,g,number_steps,d,Min=0,count=0; #define Try (1-Min) /* At start Try = 1 */ bCont = (mdstep == inputrec->init_step) && inputrec->bContinuation; bInit = (mdstep == inputrec->init_step) || shfc->bRequireInit; ftol = inputrec->em_tol; number_steps = inputrec->niter; nshell = shfc->nshell; shell = shfc->shell; nflexcon = shfc->nflexcon; idef = &top->idef; if (DOMAINDECOMP(cr)) { nat = dd_natoms_vsite(cr->dd); if (nflexcon > 0) { dd_get_constraint_range(cr->dd,&dd_ac0,&dd_ac1); nat = max(nat,dd_ac1); } } else { nat = state->natoms; } if (nat > shfc->x_nalloc) { /* Allocate local arrays */ shfc->x_nalloc = over_alloc_dd(nat); for(i=0; (i<2); i++) { srenew(shfc->x[i],shfc->x_nalloc); srenew(shfc->f[i],shfc->x_nalloc); } } for(i=0; (i<2); i++) { pos[i] = shfc->x[i]; force[i] = shfc->f[i]; } /* With particle decomposition this code only works * when all particles involved with each shell are in the same cg. */ if (bDoNS && inputrec->ePBC != epbcNONE && !DOMAINDECOMP(cr)) { /* This is the only time where the coordinates are used * before do_force is called, which normally puts all * charge groups in the box. */ if (PARTDECOMP(cr)) { pd_cg_range(cr,&cg0,&cg1); } else { cg0 = 0; cg1 = top->cgs.nr; } put_charge_groups_in_box(fplog,cg0,cg1,fr->ePBC,state->box, &(top->cgs),state->x,fr->cg_cm); if (graph) mk_mshift(fplog,graph,fr->ePBC,state->box,state->x); } /* After this all coordinate arrays will contain whole molecules */ if (graph) shift_self(graph,state->box,state->x); if (nflexcon) { if (nat > shfc->flex_nalloc) { shfc->flex_nalloc = over_alloc_dd(nat); srenew(shfc->acc_dir,shfc->flex_nalloc); srenew(shfc->x_old,shfc->flex_nalloc); } acc_dir = shfc->acc_dir; x_old = shfc->x_old; for(i=0; i<homenr; i++) { for(d=0; d<DIM; d++) shfc->x_old[i][d] = state->x[start+i][d] - state->v[start+i][d]*inputrec->delta_t; } } /* Do a prediction of the shell positions */ if (shfc->bPredict && !bCont) { predict_shells(fplog,state->x,state->v,inputrec->delta_t,nshell,shell, md->massT,NULL,bInit); } /* do_force expected the charge groups to be in the box */ if (graph) unshift_self(graph,state->box,state->x); /* Calculate the forces first time around */ if (gmx_debug_at) { pr_rvecs(debug,0,"x b4 do_force",state->x + start,homenr); } do_force(fplog,cr,inputrec,mdstep,nrnb,wcycle,top,mtop,groups, state->box,state->x,&state->hist, force[Min],force_vir,md,enerd,fcd, state->lambda,graph, fr,vsite,mu_tot,t,fp_field,NULL,bBornRadii, (bDoNS ? GMX_FORCE_NS : 0) | force_flags); sf_dir = 0; if (nflexcon) { init_adir(fplog,shfc, constr,idef,inputrec,cr,dd_ac1,mdstep,md,start,end, shfc->x_old-start,state->x,state->x,force[Min], shfc->acc_dir-start, fr->bMolPBC,state->box,state->lambda,&dum,nrnb); for(i=start; i<end; i++) sf_dir += md->massT[i]*norm2(shfc->acc_dir[i-start]); } Epot[Min] = enerd->term[F_EPOT]; df[Min]=rms_force(cr,shfc->f[Min],nshell,shell,nflexcon,&sf_dir,&Epot[Min]); df[Try]=0; if (debug) { fprintf(debug,"df = %g %g\n",df[Min],df[Try]); } if (gmx_debug_at) { pr_rvecs(debug,0,"force0",force[Min],md->nr); } if (nshell+nflexcon > 0) { /* Copy x to pos[Min] & pos[Try]: during minimization only the * shell positions are updated, therefore the other particles must * be set here. */ memcpy(pos[Min],state->x,nat*sizeof(state->x[0])); memcpy(pos[Try],state->x,nat*sizeof(state->x[0])); } if (bVerbose && MASTER(cr)) print_epot(stdout,mdstep,0,Epot[Min],df[Min],nflexcon,sf_dir); if (debug) { fprintf(debug,"%17s: %14.10e\n", interaction_function[F_EKIN].longname,enerd->term[F_EKIN]); fprintf(debug,"%17s: %14.10e\n", interaction_function[F_EPOT].longname,enerd->term[F_EPOT]); fprintf(debug,"%17s: %14.10e\n", interaction_function[F_ETOT].longname,enerd->term[F_ETOT]); fprintf(debug,"SHELLSTEP %s\n",gmx_step_str(mdstep,sbuf)); } /* First check whether we should do shells, or whether the force is * low enough even without minimization. */ *bConverged = (df[Min] < ftol); for(count=1; (!(*bConverged) && (count < number_steps)); count++) { if (vsite) construct_vsites(fplog,vsite,pos[Min],nrnb,inputrec->delta_t,state->v, idef->iparams,idef->il, fr->ePBC,fr->bMolPBC,graph,cr,state->box); if (nflexcon) { init_adir(fplog,shfc, constr,idef,inputrec,cr,dd_ac1,mdstep,md,start,end, x_old-start,state->x,pos[Min],force[Min],acc_dir-start, fr->bMolPBC,state->box,state->lambda,&dum,nrnb); directional_sd(fplog,pos[Min],pos[Try],acc_dir-start,start,end, fr->fc_stepsize); } /* New positions, Steepest descent */ shell_pos_sd(fplog,pos[Min],pos[Try],force[Min],nshell,shell,count); /* do_force expected the charge groups to be in the box */ if (graph) unshift_self(graph,state->box,pos[Try]); if (gmx_debug_at) { pr_rvecs(debug,0,"RELAX: pos[Min] ",pos[Min] + start,homenr); pr_rvecs(debug,0,"RELAX: pos[Try] ",pos[Try] + start,homenr); } /* Try the new positions */ do_force(fplog,cr,inputrec,1,nrnb,wcycle, top,mtop,groups,state->box,pos[Try],&state->hist, force[Try],force_vir, md,enerd,fcd,state->lambda,graph, fr,vsite,mu_tot,t,fp_field,NULL,bBornRadii, force_flags); if (gmx_debug_at) { pr_rvecs(debug,0,"RELAX: force[Min]",force[Min] + start,homenr); pr_rvecs(debug,0,"RELAX: force[Try]",force[Try] + start,homenr); } sf_dir = 0; if (nflexcon) { init_adir(fplog,shfc, constr,idef,inputrec,cr,dd_ac1,mdstep,md,start,end, x_old-start,state->x,pos[Try],force[Try],acc_dir-start, fr->bMolPBC,state->box,state->lambda,&dum,nrnb); for(i=start; i<end; i++) sf_dir += md->massT[i]*norm2(acc_dir[i-start]); } Epot[Try] = enerd->term[F_EPOT]; df[Try]=rms_force(cr,force[Try],nshell,shell,nflexcon,&sf_dir,&Epot[Try]); if (debug) fprintf(debug,"df = %g %g\n",df[Min],df[Try]); if (debug) { if (gmx_debug_at) pr_rvecs(debug,0,"F na do_force",force[Try] + start,homenr); if (gmx_debug_at) { fprintf(debug,"SHELL ITER %d\n",count); dump_shells(debug,pos[Try],force[Try],ftol,nshell,shell); } } if (bVerbose && MASTER(cr)) print_epot(stdout,mdstep,count,Epot[Try],df[Try],nflexcon,sf_dir); *bConverged = (df[Try] < ftol); if ((df[Try] < df[Min])) { if (debug) fprintf(debug,"Swapping Min and Try\n"); if (nflexcon) { /* Correct the velocities for the flexible constraints */ invdt = 1/inputrec->delta_t; for(i=start; i<end; i++) { for(d=0; d<DIM; d++) state->v[i][d] += (pos[Try][i][d] - pos[Min][i][d])*invdt; } } Min = Try; } else { decrease_step_size(nshell,shell); } } if (MASTER(cr) && !(*bConverged)) { /* Note that the energies and virial are incorrect when not converged */ if (fplog) fprintf(fplog, "step %s: EM did not converge in %d iterations, RMS force %.3f\n", gmx_step_str(mdstep,sbuf),number_steps,df[Min]); fprintf(stderr, "step %s: EM did not converge in %d iterations, RMS force %.3f\n", gmx_step_str(mdstep,sbuf),number_steps,df[Min]); } /* Copy back the coordinates and the forces */ memcpy(state->x,pos[Min],nat*sizeof(state->x[0])); memcpy(f,force[Min],nat*sizeof(f[0])); return count; }
gmx_bool constrain_lincs(FILE *fplog,gmx_bool bLog,gmx_bool bEner, t_inputrec *ir, gmx_large_int_t step, struct gmx_lincsdata *lincsd,t_mdatoms *md, t_commrec *cr, rvec *x,rvec *xprime,rvec *min_proj,matrix box, real lambda,real *dvdlambda, real invdt,rvec *v, gmx_bool bCalcVir,tensor rmdr, int econq, t_nrnb *nrnb, int maxwarn,int *warncount) { char buf[STRLEN],buf2[22],buf3[STRLEN]; int i,warn,p_imax,error; real ncons_loc,p_ssd,p_max; t_pbc pbc,*pbc_null; rvec dx; gmx_bool bOK; bOK = TRUE; if (lincsd->nc == 0 && cr->dd == NULL) { if (bLog || bEner) { lincsd->rmsd_data[0] = 0; if (ir->eI == eiSD2 && v == NULL) { i = 2; } else { i = 1; } lincsd->rmsd_data[i] = 0; } return bOK; } /* We do not need full pbc when constraints do not cross charge groups, * i.e. when dd->constraint_comm==NULL */ if ((cr->dd || ir->bPeriodicMols) && !(cr->dd && cr->dd->constraint_comm==NULL)) { /* With pbc=screw the screw has been changed to a shift * by the constraint coordinate communication routine, * so that here we can use normal pbc. */ pbc_null = set_pbc_dd(&pbc,ir->ePBC,cr->dd,FALSE,box); } else { pbc_null = NULL; } if (cr->dd) { /* Communicate the coordinates required for the non-local constraints */ dd_move_x_constraints(cr->dd,box,x,xprime); /* dump_conf(dd,lincsd,NULL,"con",TRUE,xprime,box); */ } else if (PARTDECOMP(cr)) { pd_move_x_constraints(cr,x,xprime); } if (econq == econqCoord) { if (ir->efep != efepNO) { if (md->nMassPerturbed && lincsd->matlam != md->lambda) { set_lincs_matrix(lincsd,md->invmass,md->lambda); } for(i=0; i<lincsd->nc; i++) { lincsd->bllen[i] = lincsd->bllen0[i] + lambda*lincsd->ddist[i]; } } if (lincsd->ncg_flex) { /* Set the flexible constraint lengths to the old lengths */ if (pbc_null) { for(i=0; i<lincsd->nc; i++) { if (lincsd->bllen[i] == 0) { pbc_dx_aiuc(pbc_null,x[lincsd->bla[2*i]],x[lincsd->bla[2*i+1]],dx); lincsd->bllen[i] = norm(dx); } } } else { for(i=0; i<lincsd->nc; i++) { if (lincsd->bllen[i] == 0) { lincsd->bllen[i] = sqrt(distance2(x[lincsd->bla[2*i]], x[lincsd->bla[2*i+1]])); } } } } if (bLog && fplog) { cconerr(cr->dd,lincsd->nc,lincsd->bla,lincsd->bllen,xprime,pbc_null, &ncons_loc,&p_ssd,&p_max,&p_imax); } do_lincs(x,xprime,box,pbc_null,lincsd,md->invmass,cr, ir->LincsWarnAngle,&warn, invdt,v,bCalcVir,rmdr); if (ir->efep != efepNO) { real dt_2,dvdl=0; dt_2 = 1.0/(ir->delta_t*ir->delta_t); for(i=0; (i<lincsd->nc); i++) { dvdl += lincsd->lambda[i]*dt_2*lincsd->ddist[i]; } *dvdlambda += dvdl; } if (bLog && fplog && lincsd->nc > 0) { fprintf(fplog," Rel. Constraint Deviation: RMS MAX between atoms\n"); fprintf(fplog," Before LINCS %.6f %.6f %6d %6d\n", sqrt(p_ssd/ncons_loc),p_max, ddglatnr(cr->dd,lincsd->bla[2*p_imax]), ddglatnr(cr->dd,lincsd->bla[2*p_imax+1])); } if (bLog || bEner) { cconerr(cr->dd,lincsd->nc,lincsd->bla,lincsd->bllen,xprime,pbc_null, &ncons_loc,&p_ssd,&p_max,&p_imax); /* Check if we are doing the second part of SD */ if (ir->eI == eiSD2 && v == NULL) { i = 2; } else { i = 1; } lincsd->rmsd_data[0] = ncons_loc; lincsd->rmsd_data[i] = p_ssd; } else { lincsd->rmsd_data[0] = 0; lincsd->rmsd_data[1] = 0; lincsd->rmsd_data[2] = 0; } if (bLog && fplog && lincsd->nc > 0) { fprintf(fplog, " After LINCS %.6f %.6f %6d %6d\n\n", sqrt(p_ssd/ncons_loc),p_max, ddglatnr(cr->dd,lincsd->bla[2*p_imax]), ddglatnr(cr->dd,lincsd->bla[2*p_imax+1])); } if (warn > 0) { if (maxwarn >= 0) { cconerr(cr->dd,lincsd->nc,lincsd->bla,lincsd->bllen,xprime,pbc_null, &ncons_loc,&p_ssd,&p_max,&p_imax); if (MULTISIM(cr)) { sprintf(buf3," in simulation %d", cr->ms->sim); } else { buf3[0] = 0; } sprintf(buf,"\nStep %s, time %g (ps) LINCS WARNING%s\n" "relative constraint deviation after LINCS:\n" "rms %.6f, max %.6f (between atoms %d and %d)\n", gmx_step_str(step,buf2),ir->init_t+step*ir->delta_t, buf3, sqrt(p_ssd/ncons_loc),p_max, ddglatnr(cr->dd,lincsd->bla[2*p_imax]), ddglatnr(cr->dd,lincsd->bla[2*p_imax+1])); if (fplog) { fprintf(fplog,"%s",buf); } fprintf(stderr,"%s",buf); lincs_warning(fplog,cr->dd,x,xprime,pbc_null, lincsd->nc,lincsd->bla,lincsd->bllen, ir->LincsWarnAngle,maxwarn,warncount); } bOK = (p_max < 0.5); } if (lincsd->ncg_flex) { for(i=0; (i<lincsd->nc); i++) if (lincsd->bllen0[i] == 0 && lincsd->ddist[i] == 0) lincsd->bllen[i] = 0; } } else { do_lincsp(x,xprime,min_proj,pbc_null,lincsd,md->invmass,econq,dvdlambda, bCalcVir,rmdr); } /* count assuming nit=1 */ inc_nrnb(nrnb,eNR_LINCS,lincsd->nc); inc_nrnb(nrnb,eNR_LINCSMAT,(2+lincsd->nOrder)*lincsd->ncc); if (lincsd->ntriangle > 0) { inc_nrnb(nrnb,eNR_LINCSMAT,lincsd->nOrder*lincsd->ncc_triangle); } if (v) { inc_nrnb(nrnb,eNR_CONSTR_V,lincsd->nc*2); } if (bCalcVir) { inc_nrnb(nrnb,eNR_CONSTR_VIR,lincsd->nc); } return bOK; }
void finish_run(FILE *fplog,t_commrec *cr,char *confout, t_inputrec *inputrec, t_nrnb nrnb[],gmx_wallcycle_t wcycle, double nodetime,double realtime,int nsteps_done, bool bWriteStat) { int i,j; t_nrnb *nrnb_all=NULL,ntot; real delta_t; double nbfs,mflop; double cycles[ewcNR]; #ifdef GMX_MPI int sender; double nrnb_buf[4]; MPI_Status status; #endif wallcycle_sum(cr,wcycle,cycles); if (cr->nnodes > 1) { if (SIMMASTER(cr)) snew(nrnb_all,cr->nnodes); #ifdef GMX_MPI MPI_Gather(nrnb,sizeof(t_nrnb),MPI_BYTE, nrnb_all,sizeof(t_nrnb),MPI_BYTE, 0,cr->mpi_comm_mysim); #endif } else { nrnb_all = nrnb; } if (SIMMASTER(cr)) { for(i=0; (i<eNRNB); i++) ntot.n[i]=0; for(i=0; (i<cr->nnodes); i++) for(j=0; (j<eNRNB); j++) ntot.n[j] += nrnb_all[i].n[j]; print_flop(fplog,&ntot,&nbfs,&mflop); if (nrnb_all) { sfree(nrnb_all); } } if ((cr->duty & DUTY_PP) && DOMAINDECOMP(cr)) { print_dd_statistics(cr,inputrec,fplog); } if (SIMMASTER(cr)) { if (PARTDECOMP(cr)) { pr_load(fplog,cr,nrnb_all); } wallcycle_print(fplog,cr->nnodes,cr->npmenodes,realtime,wcycle,cycles); if (EI_DYNAMICS(inputrec->eI)) { delta_t = inputrec->delta_t; } else { delta_t = 0; } if (fplog) { print_perf(fplog,nodetime,realtime,cr->nnodes-cr->npmenodes, nsteps_done,delta_t,nbfs,mflop); } if (bWriteStat) { print_perf(stderr,nodetime,realtime,cr->nnodes-cr->npmenodes, nsteps_done,delta_t,nbfs,mflop); } /* runtime=inputrec->nsteps*inputrec->delta_t; if (bWriteStat) { if (cr->nnodes == 1) fprintf(stderr,"\n\n"); print_perf(stderr,nodetime,realtime,runtime,&ntot, cr->nnodes-cr->npmenodes,FALSE); } wallcycle_print(fplog,cr->nnodes,cr->npmenodes,realtime,wcycle,cycles); print_perf(fplog,nodetime,realtime,runtime,&ntot,cr->nnodes-cr->npmenodes, TRUE); if (PARTDECOMP(cr)) pr_load(fplog,cr,nrnb_all); if (cr->nnodes > 1) sfree(nrnb_all); */ } }
void do_force(FILE *fplog,t_commrec *cr, t_inputrec *inputrec, int step,t_nrnb *nrnb,gmx_wallcycle_t wcycle, gmx_localtop_t *top, gmx_groups_t *groups, matrix box,rvec x[],history_t *hist, rvec f[],rvec buf[], tensor vir_force, t_mdatoms *mdatoms, gmx_enerdata_t *enerd,t_fcdata *fcd, real lambda,t_graph *graph, t_forcerec *fr,gmx_vsite_t *vsite,rvec mu_tot, real t,FILE *field,gmx_edsam_t ed, int flags) { static rvec box_size; int cg0,cg1,i,j; int start,homenr; static double mu[2*DIM]; rvec mu_tot_AB[2]; bool bSepDVDL,bStateChanged,bNS,bFillGrid,bCalcCGCM,bBS,bDoForces; matrix boxs; real e,v,dvdl; t_pbc pbc; float cycles_ppdpme,cycles_pme,cycles_force; start = mdatoms->start; homenr = mdatoms->homenr; bSepDVDL = (fr->bSepDVDL && do_per_step(step,inputrec->nstlog)); clear_mat(vir_force); if (PARTDECOMP(cr)) { pd_cg_range(cr,&cg0,&cg1); } else { cg0 = 0; if (DOMAINDECOMP(cr)) cg1 = cr->dd->ncg_tot; else cg1 = top->cgs.nr; if (fr->n_tpi > 0) cg1--; } bStateChanged = (flags & GMX_FORCE_STATECHANGED); bNS = (flags & GMX_FORCE_NS); bFillGrid = (bNS && bStateChanged); bCalcCGCM = (bFillGrid && !DOMAINDECOMP(cr)); bDoForces = (flags & GMX_FORCE_FORCES); if (bStateChanged) { update_forcerec(fplog,fr,box); /* Calculate total (local) dipole moment in a temporary common array. * This makes it possible to sum them over nodes faster. */ calc_mu(start,homenr, x,mdatoms->chargeA,mdatoms->chargeB,mdatoms->nChargePerturbed, mu,mu+DIM); } if (fr->ePBC != epbcNONE) { /* Compute shift vectors every step, * because of pressure coupling or box deformation! */ if (DYNAMIC_BOX(*inputrec) && bStateChanged) calc_shifts(box,fr->shift_vec); if (bCalcCGCM) { put_charge_groups_in_box(fplog,cg0,cg1,fr->ePBC,box, &(top->cgs),x,fr->cg_cm); inc_nrnb(nrnb,eNR_CGCM,homenr); inc_nrnb(nrnb,eNR_RESETX,cg1-cg0); } else if (EI_ENERGY_MINIMIZATION(inputrec->eI) && graph) { unshift_self(graph,box,x); } } else if (bCalcCGCM) { calc_cgcm(fplog,cg0,cg1,&(top->cgs),x,fr->cg_cm); inc_nrnb(nrnb,eNR_CGCM,homenr); } if (bCalcCGCM) { if (PAR(cr)) { move_cgcm(fplog,cr,fr->cg_cm); } if (gmx_debug_at) pr_rvecs(debug,0,"cgcm",fr->cg_cm,top->cgs.nr); } #ifdef GMX_MPI if (!(cr->duty & DUTY_PME)) { /* Send particle coordinates to the pme nodes. * Since this is only implemented for domain decomposition * and domain decomposition does not use the graph, * we do not need to worry about shifting. */ wallcycle_start(wcycle,ewcPP_PMESENDX); GMX_MPE_LOG(ev_send_coordinates_start); bBS = (inputrec->nwall == 2); if (bBS) { copy_mat(box,boxs); svmul(inputrec->wall_ewald_zfac,boxs[ZZ],boxs[ZZ]); } gmx_pme_send_x(cr,bBS ? boxs : box,x,mdatoms->nChargePerturbed,lambda); GMX_MPE_LOG(ev_send_coordinates_finish); wallcycle_stop(wcycle,ewcPP_PMESENDX); } #endif /* GMX_MPI */ /* Communicate coordinates and sum dipole if necessary */ if (PAR(cr)) { wallcycle_start(wcycle,ewcMOVEX); if (DOMAINDECOMP(cr)) { dd_move_x(cr->dd,box,x,buf); } else { move_x(fplog,cr,GMX_LEFT,GMX_RIGHT,x,nrnb); } /* When we don't need the total dipole we sum it in global_stat */ if (NEED_MUTOT(*inputrec)) gmx_sumd(2*DIM,mu,cr); wallcycle_stop(wcycle,ewcMOVEX); } for(i=0; i<2; i++) for(j=0;j<DIM;j++) mu_tot_AB[i][j] = mu[i*DIM + j]; if (fr->efep == efepNO) copy_rvec(mu_tot_AB[0],mu_tot); else for(j=0; j<DIM; j++) mu_tot[j] = (1.0 - lambda)*mu_tot_AB[0][j] + lambda*mu_tot_AB[1][j]; /* Reset energies */ reset_energies(&(inputrec->opts),fr,bNS,enerd,MASTER(cr)); if (bNS) { wallcycle_start(wcycle,ewcNS); if (graph && bStateChanged) /* Calculate intramolecular shift vectors to make molecules whole */ mk_mshift(fplog,graph,fr->ePBC,box,x); /* Reset long range forces if necessary */ if (fr->bTwinRange) { clear_rvecs(fr->f_twin_n,fr->f_twin); clear_rvecs(SHIFTS,fr->fshift_twin); } /* Do the actual neighbour searching and if twin range electrostatics * also do the calculation of long range forces and energies. */ dvdl = 0; ns(fplog,fr,x,f,box,groups,&(inputrec->opts),top,mdatoms, cr,nrnb,step,lambda,&dvdl,&enerd->grpp,bFillGrid,bDoForces); if (bSepDVDL) fprintf(fplog,sepdvdlformat,"LR non-bonded",0,dvdl); enerd->dvdl_lr = dvdl; enerd->term[F_DVDL] += dvdl; wallcycle_stop(wcycle,ewcNS); } if (DOMAINDECOMP(cr)) { if (!(cr->duty & DUTY_PME)) { wallcycle_start(wcycle,ewcPPDURINGPME); dd_force_flop_start(cr->dd,nrnb); } } /* Start the force cycle counter. * This counter is stopped in do_forcelow_level. * No parallel communication should occur while this counter is running, * since that will interfere with the dynamic load balancing. */ wallcycle_start(wcycle,ewcFORCE); if (bDoForces) { /* Reset PME/Ewald forces if necessary */ if (fr->bF_NoVirSum) { GMX_BARRIER(cr->mpi_comm_mygroup); if (fr->bDomDec) clear_rvecs(fr->f_novirsum_n,fr->f_novirsum); else clear_rvecs(homenr,fr->f_novirsum+start); GMX_BARRIER(cr->mpi_comm_mygroup); } /* Copy long range forces into normal buffers */ if (fr->bTwinRange) { for(i=0; i<fr->f_twin_n; i++) copy_rvec(fr->f_twin[i],f[i]); for(i=0; i<SHIFTS; i++) copy_rvec(fr->fshift_twin[i],fr->fshift[i]); } else { if (DOMAINDECOMP(cr)) clear_rvecs(cr->dd->nat_tot,f); else clear_rvecs(mdatoms->nr,f); clear_rvecs(SHIFTS,fr->fshift); } clear_rvec(fr->vir_diag_posres); GMX_BARRIER(cr->mpi_comm_mygroup); } if (inputrec->ePull == epullCONSTRAINT) clear_pull_forces(inputrec->pull); /* update QMMMrec, if necessary */ if(fr->bQMMM) update_QMMMrec(cr,fr,x,mdatoms,box,top); if ((flags & GMX_FORCE_BONDED) && top->idef.il[F_POSRES].nr > 0) { /* Position restraints always require full pbc */ set_pbc(&pbc,inputrec->ePBC,box); v = posres(top->idef.il[F_POSRES].nr,top->idef.il[F_POSRES].iatoms, top->idef.iparams_posres, (const rvec*)x,fr->f_novirsum,fr->vir_diag_posres, inputrec->ePBC==epbcNONE ? NULL : &pbc,lambda,&dvdl, fr->rc_scaling,fr->ePBC,fr->posres_com,fr->posres_comB); if (bSepDVDL) { fprintf(fplog,sepdvdlformat, interaction_function[F_POSRES].longname,v,dvdl); } enerd->term[F_POSRES] += v; enerd->term[F_DVDL] += dvdl; inc_nrnb(nrnb,eNR_POSRES,top->idef.il[F_POSRES].nr/2); } /* Compute the bonded and non-bonded forces */ do_force_lowlevel(fplog,step,fr,inputrec,&(top->idef), cr,nrnb,wcycle,mdatoms,&(inputrec->opts), x,hist,f,enerd,fcd,box,lambda,graph,&(top->excls),mu_tot_AB, flags,&cycles_force); GMX_BARRIER(cr->mpi_comm_mygroup); if (ed) { do_flood(fplog,cr,x,f,ed,box,step); } if (DOMAINDECOMP(cr)) { dd_force_flop_stop(cr->dd,nrnb); if (wcycle) dd_cycles_add(cr->dd,cycles_force,ddCyclF); } if (bDoForces) { /* Compute forces due to electric field */ calc_f_el(MASTER(cr) ? field : NULL, start,homenr,mdatoms->chargeA,x,f,inputrec->ex,inputrec->et,t); /* When using PME/Ewald we compute the long range virial there. * otherwise we do it based on long range forces from twin range * cut-off based calculation (or not at all). */ /* Communicate the forces */ if (PAR(cr)) { wallcycle_start(wcycle,ewcMOVEF); if (DOMAINDECOMP(cr)) { dd_move_f(cr->dd,f,buf,fr->fshift); /* Position restraint do not introduce inter-cg forces */ if (EEL_FULL(fr->eeltype) && cr->dd->n_intercg_excl) dd_move_f(cr->dd,fr->f_novirsum,buf,NULL); } else { move_f(fplog,cr,GMX_LEFT,GMX_RIGHT,f,buf,nrnb); } wallcycle_stop(wcycle,ewcMOVEF); } } if (bDoForces) { if (vsite) { wallcycle_start(wcycle,ewcVSITESPREAD); spread_vsite_f(fplog,vsite,x,f,fr->fshift,nrnb, &top->idef,fr->ePBC,fr->bMolPBC,graph,box,cr); wallcycle_stop(wcycle,ewcVSITESPREAD); } /* Calculation of the virial must be done after vsites! */ calc_virial(fplog,mdatoms->start,mdatoms->homenr,x,f, vir_force,graph,box,nrnb,fr,inputrec->ePBC); } if (inputrec->ePull == epullUMBRELLA || inputrec->ePull == epullCONST_F) { /* Calculate the center of mass forces, this requires communication, * which is why pull_potential is called close to other communication. * The virial contribution is calculated directly, * which is why we call pull_potential after calc_virial. */ set_pbc(&pbc,inputrec->ePBC,box); dvdl = 0; enerd->term[F_COM_PULL] = pull_potential(inputrec->ePull,inputrec->pull,mdatoms,&pbc, cr,t,lambda,x,f,vir_force,&dvdl); if (bSepDVDL) fprintf(fplog,sepdvdlformat,"Com pull",enerd->term[F_COM_PULL],dvdl); enerd->term[F_DVDL] += dvdl; } if (!(cr->duty & DUTY_PME)) { cycles_ppdpme = wallcycle_stop(wcycle,ewcPPDURINGPME); dd_cycles_add(cr->dd,cycles_ppdpme,ddCyclPPduringPME); } #ifdef GMX_MPI if (PAR(cr) && !(cr->duty & DUTY_PME)) { /* In case of node-splitting, the PP nodes receive the long-range * forces, virial and energy from the PME nodes here. */ wallcycle_start(wcycle,ewcPP_PMEWAITRECVF); dvdl = 0; gmx_pme_receive_f(cr,fr->f_novirsum,fr->vir_el_recip,&e,&dvdl, &cycles_pme); if (bSepDVDL) fprintf(fplog,sepdvdlformat,"PME mesh",e,dvdl); enerd->term[F_COUL_RECIP] += e; enerd->term[F_DVDL] += dvdl; if (wcycle) dd_cycles_add(cr->dd,cycles_pme,ddCyclPME); wallcycle_stop(wcycle,ewcPP_PMEWAITRECVF); } #endif if (bDoForces && fr->bF_NoVirSum) { if (vsite) { /* Spread the mesh force on virtual sites to the other particles... * This is parallellized. MPI communication is performed * if the constructing atoms aren't local. */ wallcycle_start(wcycle,ewcVSITESPREAD); spread_vsite_f(fplog,vsite,x,fr->f_novirsum,NULL,nrnb, &top->idef,fr->ePBC,fr->bMolPBC,graph,box,cr); wallcycle_stop(wcycle,ewcVSITESPREAD); } /* Now add the forces, this is local */ if (fr->bDomDec) { sum_forces(0,fr->f_novirsum_n,f,fr->f_novirsum); } else { sum_forces(start,start+homenr,f,fr->f_novirsum); } if (EEL_FULL(fr->eeltype)) { /* Add the mesh contribution to the virial */ m_add(vir_force,fr->vir_el_recip,vir_force); } if (debug) pr_rvecs(debug,0,"vir_force",vir_force,DIM); } /* Sum the potential energy terms from group contributions */ sum_epot(&(inputrec->opts),enerd); if (fr->print_force >= 0 && bDoForces) print_large_forces(stderr,mdatoms,cr,step,fr->print_force,x,f); }
void init_pull(FILE *fplog, t_inputrec *ir, int nfile, const t_filenm fnm[], gmx_mtop_t *mtop, t_commrec *cr, const output_env_t oenv, real lambda, gmx_bool bOutFile, unsigned long Flags) { t_pull *pull; t_pull_group *pgrp; int c, g, start = 0, end = 0, m; pull = ir->pull; pull->ePBC = ir->ePBC; switch (pull->ePBC) { case epbcNONE: pull->npbcdim = 0; break; case epbcXY: pull->npbcdim = 2; break; default: pull->npbcdim = 3; break; } if (fplog) { gmx_bool bAbs, bCos; bAbs = FALSE; for (c = 0; c < pull->ncoord; c++) { if (pull->group[pull->coord[c].group[0]].nat == 0 || pull->group[pull->coord[c].group[1]].nat == 0) { bAbs = TRUE; } } fprintf(fplog, "\nWill apply %s COM pulling in geometry '%s'\n", EPULLTYPE(ir->ePull), EPULLGEOM(pull->eGeom)); fprintf(fplog, "with %d pull coordinate%s and %d group%s\n", pull->ncoord, pull->ncoord == 1 ? "" : "s", pull->ngroup, pull->ngroup == 1 ? "" : "s"); if (bAbs) { fprintf(fplog, "with an absolute reference\n"); } bCos = FALSE; for (g = 0; g < pull->ngroup; g++) { if (pull->group[g].nat > 1 && pull->group[g].pbcatom < 0) { /* We are using cosine weighting */ fprintf(fplog, "Cosine weighting is used for group %d\n", g); bCos = TRUE; } } if (bCos) { please_cite(fplog, "Engin2010"); } } /* We always add the virial contribution, * except for geometry = direction_periodic where this is impossible. */ pull->bVirial = (pull->eGeom != epullgDIRPBC); if (getenv("GMX_NO_PULLVIR") != NULL) { if (fplog) { fprintf(fplog, "Found env. var., will not add the virial contribution of the COM pull forces\n"); } pull->bVirial = FALSE; } if (cr && PARTDECOMP(cr)) { pd_at_range(cr, &start, &end); } pull->rbuf = NULL; pull->dbuf = NULL; pull->dbuf_cyl = NULL; pull->bRefAt = FALSE; pull->cosdim = -1; for (g = 0; g < pull->ngroup; g++) { pgrp = &pull->group[g]; pgrp->epgrppbc = epgrppbcNONE; if (pgrp->nat > 0) { /* Determine if we need to take PBC into account for calculating * the COM's of the pull groups. */ for (m = 0; m < pull->npbcdim; m++) { if (pull->dim[m] && pgrp->nat > 1) { if (pgrp->pbcatom >= 0) { pgrp->epgrppbc = epgrppbcREFAT; pull->bRefAt = TRUE; } else { if (pgrp->weight) { gmx_fatal(FARGS, "Pull groups can not have relative weights and cosine weighting at same time"); } pgrp->epgrppbc = epgrppbcCOS; if (pull->cosdim >= 0 && pull->cosdim != m) { gmx_fatal(FARGS, "Can only use cosine weighting with pulling in one dimension (use mdp option pull_dim)"); } pull->cosdim = m; } } } /* Set the indices */ init_pull_group_index(fplog, cr, start, end, g, pgrp, pull->dim, mtop, ir, lambda); if (PULL_CYL(pull) && pgrp->invtm == 0) { gmx_fatal(FARGS, "Can not have frozen atoms in a cylinder pull group"); } } else { /* Absolute reference, set the inverse mass to zero */ pgrp->invtm = 0; pgrp->wscale = 1; } } /* if we use dynamic reference groups, do some initialising for them */ if (PULL_CYL(pull)) { if (ir->ePull == epullCONSTRAINT && pull->ncoord > 1) { /* We can't easily update the single reference group with multiple * constraints. This would require recalculating COMs. */ gmx_fatal(FARGS, "Constraint COM pulling supports only one coordinate with geometry=cylinder, you can use umbrella pulling with multiple coordinates"); } for (c = 0; c < pull->ncoord; c++) { if (pull->group[pull->coord[c].group[0]].nat == 0) { gmx_fatal(FARGS, "Dynamic reference groups are not supported when using absolute reference!\n"); } } snew(pull->dyna, pull->ncoord); } /* Only do I/O when we are doing dynamics and if we are the MASTER */ pull->out_x = NULL; pull->out_f = NULL; if (bOutFile) { if (pull->nstxout > 0) { pull->out_x = open_pull_out(opt2fn("-px", nfile, fnm), pull, oenv, TRUE, Flags); } if (pull->nstfout > 0) { pull->out_f = open_pull_out(opt2fn("-pf", nfile, fnm), pull, oenv, FALSE, Flags); } } }
gmx_bool constrain(FILE *fplog,gmx_bool bLog,gmx_bool bEner, struct gmx_constr *constr, t_idef *idef,t_inputrec *ir,gmx_ekindata_t *ekind, t_commrec *cr, gmx_large_int_t step,int delta_step, t_mdatoms *md, rvec *x,rvec *xprime,rvec *min_proj, gmx_bool bMolPBC,matrix box, real lambda,real *dvdlambda, rvec *v,tensor *vir, t_nrnb *nrnb,int econq,gmx_bool bPscal, real veta, real vetanew) { gmx_bool bOK,bDump; int start,homenr,nrend; int i,j,d; int ncons,settle_error; tensor vir_r_m_dr; rvec *vstor; real invdt,vir_fac,t; t_ilist *settle; int nsettle; t_pbc pbc,*pbc_null; char buf[22]; t_vetavars vetavar; int nth,th; if (econq == econqForceDispl && !EI_ENERGY_MINIMIZATION(ir->eI)) { gmx_incons("constrain called for forces displacements while not doing energy minimization, can not do this while the LINCS and SETTLE constraint connection matrices are mass weighted"); } bOK = TRUE; bDump = FALSE; start = md->start; homenr = md->homenr; nrend = start+homenr; /* set constants for pressure control integration */ init_vetavars(&vetavar,econq!=econqCoord, veta,vetanew,ir,ekind,bPscal); if (ir->delta_t == 0) { invdt = 0; } else { invdt = 1/ir->delta_t; } if (ir->efep != efepNO && EI_DYNAMICS(ir->eI)) { /* Set the constraint lengths for the step at which this configuration * is meant to be. The invmasses should not be changed. */ lambda += delta_step*ir->fepvals->delta_lambda; } if (vir != NULL) { clear_mat(vir_r_m_dr); } where(); settle = &idef->il[F_SETTLE]; nsettle = settle->nr/(1+NRAL(F_SETTLE)); if (nsettle > 0) { nth = gmx_omp_nthreads_get(emntSETTLE); } else { nth = 1; } if (nth > 1 && constr->vir_r_m_dr_th == NULL) { snew(constr->vir_r_m_dr_th,nth); snew(constr->settle_error,nth); } settle_error = -1; /* We do not need full pbc when constraints do not cross charge groups, * i.e. when dd->constraint_comm==NULL. * Note that PBC for constraints is different from PBC for bondeds. * For constraints there is both forward and backward communication. */ if (ir->ePBC != epbcNONE && (cr->dd || bMolPBC) && !(cr->dd && cr->dd->constraint_comm==NULL)) { /* With pbc=screw the screw has been changed to a shift * by the constraint coordinate communication routine, * so that here we can use normal pbc. */ pbc_null = set_pbc_dd(&pbc,ir->ePBC,cr->dd,FALSE,box); } else { pbc_null = NULL; } /* Communicate the coordinates required for the non-local constraints * for LINCS and/or SETTLE. */ if (cr->dd) { dd_move_x_constraints(cr->dd,box,x,xprime); } else if (PARTDECOMP(cr)) { pd_move_x_constraints(cr,x,xprime); } if (constr->lincsd != NULL) { bOK = constrain_lincs(fplog,bLog,bEner,ir,step,constr->lincsd,md,cr, x,xprime,min_proj, box,pbc_null,lambda,dvdlambda, invdt,v,vir!=NULL,vir_r_m_dr, econq,nrnb, constr->maxwarn,&constr->warncount_lincs); if (!bOK && constr->maxwarn >= 0) { if (fplog != NULL) { fprintf(fplog,"Constraint error in algorithm %s at step %s\n", econstr_names[econtLINCS],gmx_step_str(step,buf)); } bDump = TRUE; } } if (constr->nblocks > 0) { switch (econq) { case (econqCoord): bOK = bshakef(fplog,constr->shaked, homenr,md->invmass,constr->nblocks,constr->sblock, idef,ir,x,xprime,nrnb, constr->lagr,lambda,dvdlambda, invdt,v,vir!=NULL,vir_r_m_dr, constr->maxwarn>=0,econq,&vetavar); break; case (econqVeloc): bOK = bshakef(fplog,constr->shaked, homenr,md->invmass,constr->nblocks,constr->sblock, idef,ir,x,min_proj,nrnb, constr->lagr,lambda,dvdlambda, invdt,NULL,vir!=NULL,vir_r_m_dr, constr->maxwarn>=0,econq,&vetavar); break; default: gmx_fatal(FARGS,"Internal error, SHAKE called for constraining something else than coordinates"); break; } if (!bOK && constr->maxwarn >= 0) { if (fplog != NULL) { fprintf(fplog,"Constraint error in algorithm %s at step %s\n", econstr_names[econtSHAKE],gmx_step_str(step,buf)); } bDump = TRUE; } } if (nsettle > 0) { int calcvir_atom_end; if (vir == NULL) { calcvir_atom_end = 0; } else { calcvir_atom_end = md->start + md->homenr; } switch (econq) { case econqCoord: #pragma omp parallel for num_threads(nth) schedule(static) for(th=0; th<nth; th++) { int start_th,end_th; if (th > 0) { clear_mat(constr->vir_r_m_dr_th[th]); } start_th = (nsettle* th )/nth; end_th = (nsettle*(th+1))/nth; if (start_th >= 0 && end_th - start_th > 0) { csettle(constr->settled, end_th-start_th, settle->iatoms+start_th*(1+NRAL(F_SETTLE)), pbc_null, x[0],xprime[0], invdt,v?v[0]:NULL,calcvir_atom_end, th == 0 ? vir_r_m_dr : constr->vir_r_m_dr_th[th], th == 0 ? &settle_error : &constr->settle_error[th], &vetavar); } } inc_nrnb(nrnb,eNR_SETTLE,nsettle); if (v != NULL) { inc_nrnb(nrnb,eNR_CONSTR_V,nsettle*3); } if (vir != NULL) { inc_nrnb(nrnb,eNR_CONSTR_VIR,nsettle*3); } break; case econqVeloc: case econqDeriv: case econqForce: case econqForceDispl: #pragma omp parallel for num_threads(nth) schedule(static) for(th=0; th<nth; th++) { int start_th,end_th; if (th > 0) { clear_mat(constr->vir_r_m_dr_th[th]); } start_th = (nsettle* th )/nth; end_th = (nsettle*(th+1))/nth; if (start_th >= 0 && end_th - start_th > 0) { settle_proj(fplog,constr->settled,econq, end_th-start_th, settle->iatoms+start_th*(1+NRAL(F_SETTLE)), pbc_null, x, xprime,min_proj,calcvir_atom_end, th == 0 ? vir_r_m_dr : constr->vir_r_m_dr_th[th], &vetavar); } } /* This is an overestimate */ inc_nrnb(nrnb,eNR_SETTLE,nsettle); break; case econqDeriv_FlexCon: /* Nothing to do, since the are no flexible constraints in settles */ break; default: gmx_incons("Unknown constraint quantity for settle"); } } if (settle->nr > 0) { /* Combine virial and error info of the other threads */ for(i=1; i<nth; i++) { m_add(vir_r_m_dr,constr->vir_r_m_dr_th[i],vir_r_m_dr); settle_error = constr->settle_error[i]; } if (econq == econqCoord && settle_error >= 0) { bOK = FALSE; if (constr->maxwarn >= 0) { char buf[256]; sprintf(buf, "\nstep " gmx_large_int_pfmt ": Water molecule starting at atom %d can not be " "settled.\nCheck for bad contacts and/or reduce the timestep if appropriate.\n", step,ddglatnr(cr->dd,settle->iatoms[settle_error*(1+NRAL(F_SETTLE))+1])); if (fplog) { fprintf(fplog,"%s",buf); } fprintf(stderr,"%s",buf); constr->warncount_settle++; if (constr->warncount_settle > constr->maxwarn) { too_many_constraint_warnings(-1,constr->warncount_settle); } bDump = TRUE; } } } free_vetavars(&vetavar); if (vir != NULL) { switch (econq) { case econqCoord: vir_fac = 0.5/(ir->delta_t*ir->delta_t); break; case econqVeloc: vir_fac = 0.5/ir->delta_t; break; case econqForce: case econqForceDispl: vir_fac = 0.5; break; default: vir_fac = 0; gmx_incons("Unsupported constraint quantity for virial"); } if (EI_VV(ir->eI)) { vir_fac *= 2; /* only constraining over half the distance here */ } for(i=0; i<DIM; i++) { for(j=0; j<DIM; j++) { (*vir)[i][j] = vir_fac*vir_r_m_dr[i][j]; } } } if (bDump) { dump_confs(fplog,step,constr->warn_mtop,start,homenr,cr,x,xprime,box); } if (econq == econqCoord) { if (ir->ePull == epullCONSTRAINT) { if (EI_DYNAMICS(ir->eI)) { t = ir->init_t + (step + delta_step)*ir->delta_t; } else { t = ir->init_t; } set_pbc(&pbc,ir->ePBC,box); pull_constraint(ir->pull,md,&pbc,cr,ir->delta_t,t,x,xprime,v,*vir); } if (constr->ed && delta_step > 0) { /* apply the essential dynamcs constraints here */ do_edsam(ir,step,md,cr,xprime,v,box,constr->ed); } } return bOK; }
int calc_gb_rad_still_sse2_double(t_commrec *cr, t_forcerec *fr, int natoms, gmx_localtop_t *top, const t_atomtypes *atype, double *x, t_nblist *nl, gmx_genborn_t *born) { int i,k,n,ii,is3,ii3,nj0,nj1,offset; int jnrA,jnrB,j3A,j3B; int *mdtype; double shX,shY,shZ; int *jjnr; double *shiftvec; double gpi_ai,gpi2; double factor; double *gb_radius; double *vsolv; double *work; double *dadx; __m128d ix,iy,iz; __m128d jx,jy,jz; __m128d dx,dy,dz; __m128d tx,ty,tz; __m128d rsq,rinv,rinv2,rinv4,rinv6; __m128d ratio,gpi,rai,raj,vai,vaj,rvdw; __m128d ccf,dccf,theta,cosq,term,sinq,res,prod,prod_ai,tmp; __m128d mask,icf4,icf6,mask_cmp; const __m128d half = _mm_set1_pd(0.5); const __m128d three = _mm_set1_pd(3.0); const __m128d one = _mm_set1_pd(1.0); const __m128d two = _mm_set1_pd(2.0); const __m128d zero = _mm_set1_pd(0.0); const __m128d four = _mm_set1_pd(4.0); const __m128d still_p5inv = _mm_set1_pd(STILL_P5INV); const __m128d still_pip5 = _mm_set1_pd(STILL_PIP5); const __m128d still_p4 = _mm_set1_pd(STILL_P4); factor = 0.5 * ONE_4PI_EPS0; gb_radius = born->gb_radius; vsolv = born->vsolv; work = born->gpol_still_work; jjnr = nl->jjnr; shiftvec = fr->shift_vec[0]; dadx = fr->dadx; jnrA = jnrB = 0; jx = _mm_setzero_pd(); jy = _mm_setzero_pd(); jz = _mm_setzero_pd(); n = 0; for(i=0;i<natoms;i++) { work[i]=0; } for(i=0;i<nl->nri;i++) { ii = nl->iinr[i]; ii3 = ii*3; is3 = 3*nl->shift[i]; shX = shiftvec[is3]; shY = shiftvec[is3+1]; shZ = shiftvec[is3+2]; nj0 = nl->jindex[i]; nj1 = nl->jindex[i+1]; ix = _mm_set1_pd(shX+x[ii3+0]); iy = _mm_set1_pd(shY+x[ii3+1]); iz = _mm_set1_pd(shZ+x[ii3+2]); /* Polarization energy for atom ai */ gpi = _mm_setzero_pd(); rai = _mm_load1_pd(gb_radius+ii); prod_ai = _mm_set1_pd(STILL_P4*vsolv[ii]); for(k=nj0;k<nj1-1;k+=2) { jnrA = jjnr[k]; jnrB = jjnr[k+1]; j3A = 3*jnrA; j3B = 3*jnrB; GMX_MM_LOAD_1RVEC_2POINTERS_PD(x+j3A,x+j3B,jx,jy,jz); GMX_MM_LOAD_2VALUES_PD(gb_radius+jnrA,gb_radius+jnrB,raj); GMX_MM_LOAD_2VALUES_PD(vsolv+jnrA,vsolv+jnrB,vaj); dx = _mm_sub_pd(ix,jx); dy = _mm_sub_pd(iy,jy); dz = _mm_sub_pd(iz,jz); rsq = gmx_mm_calc_rsq_pd(dx,dy,dz); rinv = gmx_mm_invsqrt_pd(rsq); rinv2 = _mm_mul_pd(rinv,rinv); rinv4 = _mm_mul_pd(rinv2,rinv2); rinv6 = _mm_mul_pd(rinv4,rinv2); rvdw = _mm_add_pd(rai,raj); ratio = _mm_mul_pd(rsq, gmx_mm_inv_pd( _mm_mul_pd(rvdw,rvdw))); mask_cmp = _mm_cmple_pd(ratio,still_p5inv); /* gmx_mm_sincos_pd() is quite expensive, so avoid calculating it if we can! */ if( 0 == _mm_movemask_pd(mask_cmp) ) { /* if ratio>still_p5inv for ALL elements */ ccf = one; dccf = _mm_setzero_pd(); } else { ratio = _mm_min_pd(ratio,still_p5inv); theta = _mm_mul_pd(ratio,still_pip5); gmx_mm_sincos_pd(theta,&sinq,&cosq); term = _mm_mul_pd(half,_mm_sub_pd(one,cosq)); ccf = _mm_mul_pd(term,term); dccf = _mm_mul_pd(_mm_mul_pd(two,term), _mm_mul_pd(sinq,theta)); } prod = _mm_mul_pd(still_p4,vaj); icf4 = _mm_mul_pd(ccf,rinv4); icf6 = _mm_mul_pd( _mm_sub_pd( _mm_mul_pd(four,ccf),dccf), rinv6); GMX_MM_INCREMENT_2VALUES_PD(work+jnrA,work+jnrB,_mm_mul_pd(prod_ai,icf4)); gpi = _mm_add_pd(gpi, _mm_mul_pd(prod,icf4) ); _mm_store_pd(dadx,_mm_mul_pd(prod,icf6)); dadx+=2; _mm_store_pd(dadx,_mm_mul_pd(prod_ai,icf6)); dadx+=2; } if(k<nj1) { jnrA = jjnr[k]; j3A = 3*jnrA; GMX_MM_LOAD_1RVEC_1POINTER_PD(x+j3A,jx,jy,jz); GMX_MM_LOAD_1VALUE_PD(gb_radius+jnrA,raj); GMX_MM_LOAD_1VALUE_PD(vsolv+jnrA,vaj); dx = _mm_sub_sd(ix,jx); dy = _mm_sub_sd(iy,jy); dz = _mm_sub_sd(iz,jz); rsq = gmx_mm_calc_rsq_pd(dx,dy,dz); rinv = gmx_mm_invsqrt_pd(rsq); rinv2 = _mm_mul_sd(rinv,rinv); rinv4 = _mm_mul_sd(rinv2,rinv2); rinv6 = _mm_mul_sd(rinv4,rinv2); rvdw = _mm_add_sd(rai,raj); ratio = _mm_mul_sd(rsq, gmx_mm_inv_pd( _mm_mul_pd(rvdw,rvdw))); mask_cmp = _mm_cmple_sd(ratio,still_p5inv); /* gmx_mm_sincos_pd() is quite expensive, so avoid calculating it if we can! */ if( 0 == _mm_movemask_pd(mask_cmp) ) { /* if ratio>still_p5inv for ALL elements */ ccf = one; dccf = _mm_setzero_pd(); } else { ratio = _mm_min_sd(ratio,still_p5inv); theta = _mm_mul_sd(ratio,still_pip5); gmx_mm_sincos_pd(theta,&sinq,&cosq); term = _mm_mul_sd(half,_mm_sub_sd(one,cosq)); ccf = _mm_mul_sd(term,term); dccf = _mm_mul_sd(_mm_mul_sd(two,term), _mm_mul_sd(sinq,theta)); } prod = _mm_mul_sd(still_p4,vaj); icf4 = _mm_mul_sd(ccf,rinv4); icf6 = _mm_mul_sd( _mm_sub_sd( _mm_mul_sd(four,ccf),dccf), rinv6); GMX_MM_INCREMENT_1VALUE_PD(work+jnrA,_mm_mul_sd(prod_ai,icf4)); gpi = _mm_add_sd(gpi, _mm_mul_sd(prod,icf4) ); _mm_store_pd(dadx,_mm_mul_pd(prod,icf6)); dadx+=2; _mm_store_pd(dadx,_mm_mul_pd(prod_ai,icf6)); dadx+=2; } gmx_mm_update_1pot_pd(gpi,work+ii); } /* Sum up the polarization energy from other nodes */ if(PARTDECOMP(cr)) { gmx_sum(natoms, work, cr); } else if(DOMAINDECOMP(cr)) { dd_atom_sum_real(cr->dd, work); } /* Compute the radii */ for(i=0;i<fr->natoms_force;i++) /* PELA born->nr */ { if(born->use[i] != 0) { gpi_ai = born->gpol[i] + work[i]; /* add gpi to the initial pol energy gpi_ai*/ gpi2 = gpi_ai * gpi_ai; born->bRad[i] = factor*gmx_invsqrt(gpi2); fr->invsqrta[i] = gmx_invsqrt(born->bRad[i]); } } /* Extra (local) communication required for DD */ if(DOMAINDECOMP(cr)) { dd_atom_spread_real(cr->dd, born->bRad); dd_atom_spread_real(cr->dd, fr->invsqrta); } return 0; }
int calc_gb_rad_hct_obc_sse2_double(t_commrec *cr, t_forcerec * fr, int natoms, gmx_localtop_t *top, const t_atomtypes *atype, double *x, t_nblist *nl, gmx_genborn_t *born,t_mdatoms *md,int gb_algorithm) { int i,ai,k,n,ii,ii3,is3,nj0,nj1,at0,at1,offset; int jnrA,jnrB; int j3A,j3B; double shX,shY,shZ; double rr,rr_inv,rr_inv2,sum_tmp,sum,sum2,sum3,gbr; double sum_ai2, sum_ai3,tsum,tchain,doffset; double *obc_param; double *gb_radius; double *work; int * jjnr; double *dadx; double *shiftvec; double min_rad,rad; __m128d ix,iy,iz,jx,jy,jz; __m128d dx,dy,dz,t1,t2,t3,t4; __m128d rsq,rinv,r; __m128d rai,rai_inv,raj, raj_inv,rai_inv2,sk,sk2,lij,dlij,duij; __m128d uij,lij2,uij2,lij3,uij3,diff2; __m128d lij_inv,sk2_inv,prod,log_term,tmp,tmp_sum; __m128d sum_ai, tmp_ai,sk_ai,sk_aj,sk2_ai,sk2_aj,sk2_rinv; __m128d dadx1,dadx2; __m128d logterm; __m128d mask; __m128d obc_mask1,obc_mask2,obc_mask3; __m128d oneeighth = _mm_set1_pd(0.125); __m128d onefourth = _mm_set1_pd(0.25); const __m128d half = _mm_set1_pd(0.5); const __m128d three = _mm_set1_pd(3.0); const __m128d one = _mm_set1_pd(1.0); const __m128d two = _mm_set1_pd(2.0); const __m128d zero = _mm_set1_pd(0.0); const __m128d neg = _mm_set1_pd(-1.0); /* Set the dielectric offset */ doffset = born->gb_doffset; gb_radius = born->gb_radius; obc_param = born->param; work = born->gpol_hct_work; jjnr = nl->jjnr; dadx = fr->dadx; shiftvec = fr->shift_vec[0]; jx = _mm_setzero_pd(); jy = _mm_setzero_pd(); jz = _mm_setzero_pd(); jnrA = jnrB = 0; for(i=0;i<born->nr;i++) { work[i] = 0; } for(i=0;i<nl->nri;i++) { ii = nl->iinr[i]; ii3 = ii*3; is3 = 3*nl->shift[i]; shX = shiftvec[is3]; shY = shiftvec[is3+1]; shZ = shiftvec[is3+2]; nj0 = nl->jindex[i]; nj1 = nl->jindex[i+1]; ix = _mm_set1_pd(shX+x[ii3+0]); iy = _mm_set1_pd(shY+x[ii3+1]); iz = _mm_set1_pd(shZ+x[ii3+2]); rai = _mm_load1_pd(gb_radius+ii); rai_inv= gmx_mm_inv_pd(rai); sum_ai = _mm_setzero_pd(); sk_ai = _mm_load1_pd(born->param+ii); sk2_ai = _mm_mul_pd(sk_ai,sk_ai); for(k=nj0;k<nj1-1;k+=2) { jnrA = jjnr[k]; jnrB = jjnr[k+1]; j3A = 3*jnrA; j3B = 3*jnrB; GMX_MM_LOAD_1RVEC_2POINTERS_PD(x+j3A,x+j3B,jx,jy,jz); GMX_MM_LOAD_2VALUES_PD(gb_radius+jnrA,gb_radius+jnrB,raj); GMX_MM_LOAD_2VALUES_PD(obc_param+jnrA,obc_param+jnrB,sk_aj); dx = _mm_sub_pd(ix, jx); dy = _mm_sub_pd(iy, jy); dz = _mm_sub_pd(iz, jz); rsq = gmx_mm_calc_rsq_pd(dx,dy,dz); rinv = gmx_mm_invsqrt_pd(rsq); r = _mm_mul_pd(rsq,rinv); /* Compute raj_inv aj1-4 */ raj_inv = gmx_mm_inv_pd(raj); /* Evaluate influence of atom aj -> ai */ t1 = _mm_add_pd(r,sk_aj); t2 = _mm_sub_pd(r,sk_aj); t3 = _mm_sub_pd(sk_aj,r); obc_mask1 = _mm_cmplt_pd(rai, t1); obc_mask2 = _mm_cmplt_pd(rai, t2); obc_mask3 = _mm_cmplt_pd(rai, t3); uij = gmx_mm_inv_pd(t1); lij = _mm_or_pd( _mm_and_pd(obc_mask2,gmx_mm_inv_pd(t2)), _mm_andnot_pd(obc_mask2,rai_inv)); dlij = _mm_and_pd(one,obc_mask2); uij2 = _mm_mul_pd(uij, uij); uij3 = _mm_mul_pd(uij2,uij); lij2 = _mm_mul_pd(lij, lij); lij3 = _mm_mul_pd(lij2,lij); diff2 = _mm_sub_pd(uij2,lij2); lij_inv = gmx_mm_invsqrt_pd(lij2); sk2_aj = _mm_mul_pd(sk_aj,sk_aj); sk2_rinv = _mm_mul_pd(sk2_aj,rinv); prod = _mm_mul_pd(onefourth,sk2_rinv); logterm = gmx_mm_log_pd(_mm_mul_pd(uij,lij_inv)); t1 = _mm_sub_pd(lij,uij); t2 = _mm_mul_pd(diff2, _mm_sub_pd(_mm_mul_pd(onefourth,r), prod)); t3 = _mm_mul_pd(half,_mm_mul_pd(rinv,logterm)); t1 = _mm_add_pd(t1,_mm_add_pd(t2,t3)); t4 = _mm_mul_pd(two,_mm_sub_pd(rai_inv,lij)); t4 = _mm_and_pd(t4,obc_mask3); t1 = _mm_mul_pd(half,_mm_add_pd(t1,t4)); sum_ai = _mm_add_pd(sum_ai, _mm_and_pd(t1,obc_mask1) ); t1 = _mm_add_pd(_mm_mul_pd(half,lij2), _mm_mul_pd(prod,lij3)); t1 = _mm_sub_pd(t1, _mm_mul_pd(onefourth, _mm_add_pd(_mm_mul_pd(lij,rinv), _mm_mul_pd(lij3,r)))); t2 = _mm_mul_pd(onefourth, _mm_add_pd(_mm_mul_pd(uij,rinv), _mm_mul_pd(uij3,r))); t2 = _mm_sub_pd(t2, _mm_add_pd(_mm_mul_pd(half,uij2), _mm_mul_pd(prod,uij3))); t3 = _mm_mul_pd(_mm_mul_pd(onefourth,logterm), _mm_mul_pd(rinv,rinv)); t3 = _mm_sub_pd(t3, _mm_mul_pd(_mm_mul_pd(diff2,oneeighth), _mm_add_pd(one, _mm_mul_pd(sk2_rinv,rinv)))); t1 = _mm_mul_pd(rinv, _mm_add_pd(_mm_mul_pd(dlij,t1), _mm_add_pd(t2,t3))); dadx1 = _mm_and_pd(t1,obc_mask1); /* Evaluate influence of atom ai -> aj */ t1 = _mm_add_pd(r,sk_ai); t2 = _mm_sub_pd(r,sk_ai); t3 = _mm_sub_pd(sk_ai,r); obc_mask1 = _mm_cmplt_pd(raj, t1); obc_mask2 = _mm_cmplt_pd(raj, t2); obc_mask3 = _mm_cmplt_pd(raj, t3); uij = gmx_mm_inv_pd(t1); lij = _mm_or_pd( _mm_and_pd(obc_mask2,gmx_mm_inv_pd(t2)), _mm_andnot_pd(obc_mask2,raj_inv)); dlij = _mm_and_pd(one,obc_mask2); uij2 = _mm_mul_pd(uij, uij); uij3 = _mm_mul_pd(uij2,uij); lij2 = _mm_mul_pd(lij, lij); lij3 = _mm_mul_pd(lij2,lij); diff2 = _mm_sub_pd(uij2,lij2); lij_inv = gmx_mm_invsqrt_pd(lij2); sk2_rinv = _mm_mul_pd(sk2_ai,rinv); prod = _mm_mul_pd(onefourth,sk2_rinv); logterm = gmx_mm_log_pd(_mm_mul_pd(uij,lij_inv)); t1 = _mm_sub_pd(lij,uij); t2 = _mm_mul_pd(diff2, _mm_sub_pd(_mm_mul_pd(onefourth,r), prod)); t3 = _mm_mul_pd(half,_mm_mul_pd(rinv,logterm)); t1 = _mm_add_pd(t1,_mm_add_pd(t2,t3)); t4 = _mm_mul_pd(two,_mm_sub_pd(raj_inv,lij)); t4 = _mm_and_pd(t4,obc_mask3); t1 = _mm_mul_pd(half,_mm_add_pd(t1,t4)); GMX_MM_INCREMENT_2VALUES_PD(work+jnrA,work+jnrB,_mm_and_pd(t1,obc_mask1)); t1 = _mm_add_pd(_mm_mul_pd(half,lij2), _mm_mul_pd(prod,lij3)); t1 = _mm_sub_pd(t1, _mm_mul_pd(onefourth, _mm_add_pd(_mm_mul_pd(lij,rinv), _mm_mul_pd(lij3,r)))); t2 = _mm_mul_pd(onefourth, _mm_add_pd(_mm_mul_pd(uij,rinv), _mm_mul_pd(uij3,r))); t2 = _mm_sub_pd(t2, _mm_add_pd(_mm_mul_pd(half,uij2), _mm_mul_pd(prod,uij3))); t3 = _mm_mul_pd(_mm_mul_pd(onefourth,logterm), _mm_mul_pd(rinv,rinv)); t3 = _mm_sub_pd(t3, _mm_mul_pd(_mm_mul_pd(diff2,oneeighth), _mm_add_pd(one, _mm_mul_pd(sk2_rinv,rinv)))); t1 = _mm_mul_pd(rinv, _mm_add_pd(_mm_mul_pd(dlij,t1), _mm_add_pd(t2,t3))); dadx2 = _mm_and_pd(t1,obc_mask1); _mm_store_pd(dadx,dadx1); dadx += 2; _mm_store_pd(dadx,dadx2); dadx += 2; } /* end normal inner loop */ if(k<nj1) { jnrA = jjnr[k]; j3A = 3*jnrA; GMX_MM_LOAD_1RVEC_1POINTER_PD(x+j3A,jx,jy,jz); GMX_MM_LOAD_1VALUE_PD(gb_radius+jnrA,raj); GMX_MM_LOAD_1VALUE_PD(obc_param+jnrA,sk_aj); dx = _mm_sub_sd(ix, jx); dy = _mm_sub_sd(iy, jy); dz = _mm_sub_sd(iz, jz); rsq = gmx_mm_calc_rsq_pd(dx,dy,dz); rinv = gmx_mm_invsqrt_pd(rsq); r = _mm_mul_sd(rsq,rinv); /* Compute raj_inv aj1-4 */ raj_inv = gmx_mm_inv_pd(raj); /* Evaluate influence of atom aj -> ai */ t1 = _mm_add_sd(r,sk_aj); t2 = _mm_sub_sd(r,sk_aj); t3 = _mm_sub_sd(sk_aj,r); obc_mask1 = _mm_cmplt_sd(rai, t1); obc_mask2 = _mm_cmplt_sd(rai, t2); obc_mask3 = _mm_cmplt_sd(rai, t3); uij = gmx_mm_inv_pd(t1); lij = _mm_or_pd(_mm_and_pd(obc_mask2,gmx_mm_inv_pd(t2)), _mm_andnot_pd(obc_mask2,rai_inv)); dlij = _mm_and_pd(one,obc_mask2); uij2 = _mm_mul_sd(uij, uij); uij3 = _mm_mul_sd(uij2,uij); lij2 = _mm_mul_sd(lij, lij); lij3 = _mm_mul_sd(lij2,lij); diff2 = _mm_sub_sd(uij2,lij2); lij_inv = gmx_mm_invsqrt_pd(lij2); sk2_aj = _mm_mul_sd(sk_aj,sk_aj); sk2_rinv = _mm_mul_sd(sk2_aj,rinv); prod = _mm_mul_sd(onefourth,sk2_rinv); logterm = gmx_mm_log_pd(_mm_mul_sd(uij,lij_inv)); t1 = _mm_sub_sd(lij,uij); t2 = _mm_mul_sd(diff2, _mm_sub_sd(_mm_mul_pd(onefourth,r), prod)); t3 = _mm_mul_sd(half,_mm_mul_sd(rinv,logterm)); t1 = _mm_add_sd(t1,_mm_add_sd(t2,t3)); t4 = _mm_mul_sd(two,_mm_sub_sd(rai_inv,lij)); t4 = _mm_and_pd(t4,obc_mask3); t1 = _mm_mul_sd(half,_mm_add_sd(t1,t4)); sum_ai = _mm_add_sd(sum_ai, _mm_and_pd(t1,obc_mask1) ); t1 = _mm_add_sd(_mm_mul_sd(half,lij2), _mm_mul_sd(prod,lij3)); t1 = _mm_sub_sd(t1, _mm_mul_sd(onefourth, _mm_add_sd(_mm_mul_sd(lij,rinv), _mm_mul_sd(lij3,r)))); t2 = _mm_mul_sd(onefourth, _mm_add_sd(_mm_mul_sd(uij,rinv), _mm_mul_sd(uij3,r))); t2 = _mm_sub_sd(t2, _mm_add_sd(_mm_mul_sd(half,uij2), _mm_mul_sd(prod,uij3))); t3 = _mm_mul_sd(_mm_mul_sd(onefourth,logterm), _mm_mul_sd(rinv,rinv)); t3 = _mm_sub_sd(t3, _mm_mul_sd(_mm_mul_sd(diff2,oneeighth), _mm_add_sd(one, _mm_mul_sd(sk2_rinv,rinv)))); t1 = _mm_mul_sd(rinv, _mm_add_sd(_mm_mul_sd(dlij,t1), _mm_add_pd(t2,t3))); dadx1 = _mm_and_pd(t1,obc_mask1); /* Evaluate influence of atom ai -> aj */ t1 = _mm_add_sd(r,sk_ai); t2 = _mm_sub_sd(r,sk_ai); t3 = _mm_sub_sd(sk_ai,r); obc_mask1 = _mm_cmplt_sd(raj, t1); obc_mask2 = _mm_cmplt_sd(raj, t2); obc_mask3 = _mm_cmplt_sd(raj, t3); uij = gmx_mm_inv_pd(t1); lij = _mm_or_pd( _mm_and_pd(obc_mask2,gmx_mm_inv_pd(t2)), _mm_andnot_pd(obc_mask2,raj_inv)); dlij = _mm_and_pd(one,obc_mask2); uij2 = _mm_mul_sd(uij, uij); uij3 = _mm_mul_sd(uij2,uij); lij2 = _mm_mul_sd(lij, lij); lij3 = _mm_mul_sd(lij2,lij); diff2 = _mm_sub_sd(uij2,lij2); lij_inv = gmx_mm_invsqrt_pd(lij2); sk2_rinv = _mm_mul_sd(sk2_ai,rinv); prod = _mm_mul_sd(onefourth,sk2_rinv); logterm = gmx_mm_log_pd(_mm_mul_sd(uij,lij_inv)); t1 = _mm_sub_sd(lij,uij); t2 = _mm_mul_sd(diff2, _mm_sub_sd(_mm_mul_sd(onefourth,r), prod)); t3 = _mm_mul_sd(half,_mm_mul_sd(rinv,logterm)); t1 = _mm_add_sd(t1,_mm_add_sd(t2,t3)); t4 = _mm_mul_sd(two,_mm_sub_sd(raj_inv,lij)); t4 = _mm_and_pd(t4,obc_mask3); t1 = _mm_mul_sd(half,_mm_add_sd(t1,t4)); GMX_MM_INCREMENT_1VALUE_PD(work+jnrA,_mm_and_pd(t1,obc_mask1)); t1 = _mm_add_sd(_mm_mul_sd(half,lij2), _mm_mul_sd(prod,lij3)); t1 = _mm_sub_sd(t1, _mm_mul_sd(onefourth, _mm_add_sd(_mm_mul_sd(lij,rinv), _mm_mul_sd(lij3,r)))); t2 = _mm_mul_sd(onefourth, _mm_add_sd(_mm_mul_sd(uij,rinv), _mm_mul_sd(uij3,r))); t2 = _mm_sub_sd(t2, _mm_add_sd(_mm_mul_sd(half,uij2), _mm_mul_sd(prod,uij3))); t3 = _mm_mul_sd(_mm_mul_sd(onefourth,logterm), _mm_mul_sd(rinv,rinv)); t3 = _mm_sub_sd(t3, _mm_mul_sd(_mm_mul_sd(diff2,oneeighth), _mm_add_sd(one, _mm_mul_sd(sk2_rinv,rinv)))); t1 = _mm_mul_sd(rinv, _mm_add_sd(_mm_mul_sd(dlij,t1), _mm_add_sd(t2,t3))); dadx2 = _mm_and_pd(t1,obc_mask1); _mm_store_pd(dadx,dadx1); dadx += 2; _mm_store_pd(dadx,dadx2); dadx += 2; } gmx_mm_update_1pot_pd(sum_ai,work+ii); } /* Parallel summations */ if(PARTDECOMP(cr)) { gmx_sum(natoms, work, cr); } else if(DOMAINDECOMP(cr)) { dd_atom_sum_real(cr->dd, work); } if(gb_algorithm==egbHCT) { /* HCT */ for(i=0;i<fr->natoms_force;i++) /* PELA born->nr */ { if(born->use[i] != 0) { rr = top->atomtypes.gb_radius[md->typeA[i]]-doffset; sum = 1.0/rr - work[i]; min_rad = rr + doffset; rad = 1.0/sum; born->bRad[i] = rad > min_rad ? rad : min_rad; fr->invsqrta[i] = gmx_invsqrt(born->bRad[i]); } } /* Extra communication required for DD */ if(DOMAINDECOMP(cr)) { dd_atom_spread_real(cr->dd, born->bRad); dd_atom_spread_real(cr->dd, fr->invsqrta); } } else { /* OBC */ for(i=0;i<fr->natoms_force;i++) /* PELA born->nr */ { if(born->use[i] != 0) { rr = top->atomtypes.gb_radius[md->typeA[i]]; rr_inv2 = 1.0/rr; rr = rr-doffset; rr_inv = 1.0/rr; sum = rr * work[i]; sum2 = sum * sum; sum3 = sum2 * sum; tsum = tanh(born->obc_alpha*sum-born->obc_beta*sum2+born->obc_gamma*sum3); born->bRad[i] = rr_inv - tsum*rr_inv2; born->bRad[i] = 1.0 / born->bRad[i]; fr->invsqrta[i]=gmx_invsqrt(born->bRad[i]); tchain = rr * (born->obc_alpha-2*born->obc_beta*sum+3*born->obc_gamma*sum2); born->drobc[i] = (1.0-tsum*tsum)*tchain*rr_inv2; } } /* Extra (local) communication required for DD */ if(DOMAINDECOMP(cr)) { dd_atom_spread_real(cr->dd, born->bRad); dd_atom_spread_real(cr->dd, fr->invsqrta); dd_atom_spread_real(cr->dd, born->drobc); } } return 0; }
void do_mmcg (int natoms, // number of atoms in simulation t_inputrec *inputrec, // input record and box stuff t_mdatoms *md, // the atoms t_state *state, // positions & velocities gmx_mtop_t *top, // global topology t_commrec *cr, // communicators rvec *cg_cm, // centre of mass of charge groups int *allcgid, // charge groups ids int allcgnr, // charge groups number int *allsolid, // solvent groups ids int allsolnr, // solvent groups number FILE *log) // logfile { int i, j, p, q, qmin; real dvmod, shell2w2, dmin, d; rvec vecdist, dv, *v; shell2w2 = pow (inputrec->mmcg.shell2wt, 2.0); // Threshold v = state->v; // Velocities t_block cgs; // charge groups cgs = gmx_mtop_global_cgs(top); rvec *all_cg_cm=NULL; snew(all_cg_cm,allcgnr); // for DD, we need to get cg_cm of all given charge groups (fr->cg_cm is local), // the nearest one from a monitored water // may be more than one DD cell distant. if (DOMAINDECOMP(cr)) { if(cr->nnodes!=1) gmx_barrier(cr); for(i=0; i<allcgnr; i++) { int sender = 0,senderf,k; for(j=0; j<cr->dd->ncg_home; j++) { // is the cg a home cg ? if(cr->dd->index_gl[j]==allcgid[i] ) { sender = cr->sim_nodeid; for(k=0;k<3;k++) all_cg_cm[i][k]=cg_cm[j][k];//FIXME improve ! compilation error when done with pointers } } MPI_Allreduce(&sender,&senderf,1,MPI_INT,MPI_SUM,cr->dd->mpi_comm_all); for(k=0;k<3;k++) MPI_Bcast(&all_cg_cm[i][k],sizeof(all_cg_cm[i][k]),MPI_BYTE,senderf,cr->dd->mpi_comm_all); //FIXME again ! } } for(i=0; i<allsolnr; i++) { // Loop over waters - START p = allsolid[i]; if (PARTDECOMP(cr)) { // water i is in the node if((cgs.index[p]>=md->start) && (cgs.index[p]<(md->start+md->homenr))) { for (j=0; j<allcgnr; j++) { // Looking for min dist (dmin) q = allcgid[j]; // and for the nearest charge group (qmin) d = distance2(cg_cm[p],cg_cm[q]); if(!j) { dmin = d; qmin = q; } else if (d < dmin) { dmin = d; qmin = q; } } if (dmin >= shell2w2) { // Modifing velocity rvec_sub(cg_cm[p], cg_cm[qmin], vecdist); unitv (vecdist, vecdist); for (j=cgs.index[p]; j<(3+cgs.index[p]); j++) { dvmod = iprod (v[j], vecdist); if (dvmod <= 0) continue; svmul (2.0*dvmod, vecdist, dv); rvec_dec (&v[j], dv); // Warning (=>?) } } } } if (DOMAINDECOMP(cr)) { int g_atnr; // global atom ID int l_atnr; // local atom ID in the DD cell int l_cgnr; // local charge group number for(g_atnr=cgs.index[p];g_atnr<=cgs.index[p]+2;g_atnr++) { if(ga2la_get_home(cr->dd->ga2la,g_atnr,&l_atnr)) {// search in global to local lookup table // if the atom (not water) is in home atoms // and get local atom number l_cgnr = cr->dd->la2lc[l_atnr]; // get local charge group number for (j=0; j<allcgnr; j++) { // Looking for min dist (dmin) // and for the nearest charge group (qmin) d = distance2(cg_cm[l_cgnr],all_cg_cm[j]); if(!j) { dmin = d; qmin = j; } else if (d < dmin) { dmin = d; qmin = j; } } if (dmin >= shell2w2) { // Modifing velocity rvec_sub(cg_cm[l_cgnr], all_cg_cm[qmin], vecdist); unitv (vecdist, vecdist); dvmod = iprod (v[l_atnr], vecdist); if (dvmod <= 0) continue; svmul (2.0*dvmod, vecdist, dv); rvec_dec (&v[l_atnr], dv); // Warning (=>?) } } } } } // Loop over waters - END return; }