static void pull_set_pbcatoms(t_commrec *cr, t_pull *pull, t_mdatoms *md, rvec *x, rvec *x_pbc) { int g, n, m; n = 0; for (g = 0; g < pull->ngroup; g++) { if ((g == 0 && PULL_CYL(pull)) || pull->group[g].pbcatom == -1) { clear_rvec(x_pbc[g]); } else { pull_set_pbcatom(cr, &pull->group[g], md, x, x_pbc[g]); for (m = 0; m < DIM; m++) { if (pull->dim[m] == 0) { x_pbc[g][m] = 0.0; } } n++; } } if (cr && PAR(cr) && n > 0) { /* Sum over the nodes to get x_pbc from the home node of pbcatom */ gmx_sum(pull->ngroup*DIM, x_pbc[0], cr); } }
real Umbrella_Communicate(real Q_local,real *k_Q,real *Q_0) { int i; int i_omp=gmx_omp_get_thread_num(); // int n_omp=gmx_omp_get_num_procs(); // Wrong number int n_omp=udata.n_omp; // int i_mpi; static real Q_semilocal[UMB_MAX_OMP]; real Q_global; if (n_omp>UMB_MAX_OMP) { fprintf(stderr,"Seg fault is probably about to happen because Q_semilocal is not big enough to accommodate %d omp threads. See %d in %s.\n",n_omp,__LINE__,__FILE__); } Q_semilocal[i_omp]=Q_local; #pragma omp barrier // #pragma omp master // { if (i_omp==0) { Q_local=0; // fprintf(stderr,"%f %f %f %f %f\n",Q_semilocal[0],Q_semilocal[1],Q_semilocal[2],Q_semilocal[3],Q_semilocal[4]); for (i=0; i<n_omp; i++) { Q_local+=Q_semilocal[i]; } // fprintf(stderr,"%f %f %f %f %f\n",Q_semilocal[0],Q_semilocal[1],Q_semilocal[2],Q_semilocal[3],Q_semilocal[4]); // #ifdef GMX_DOUBLE // MPI_Allreduce(&Q_local,&Q_global,1,MPI_DOUBLE,MPI_SUM,MPI_COMM_WORLD); // #else // MPI_Allreduce(&Q_local,&Q_global,1,MPI_FLOAT, MPI_SUM,MPI_COMM_WORLD); // #endif // for (i=0; i<n_omp; i++) { // Q_semilocal[i]=Q_global; // } // gmx_sum declared in src/gromacs/legacyheaders/network.h (in main.h) #ifdef GMX_MPI gmx_sum(1,&Q_local,udata.cr); #endif for (i=0; i<n_omp; i++) { Q_semilocal[i]=Q_local; } // fprintf(stderr,"%f %f %f %f %f\n",Q_semilocal[0],Q_semilocal[1],Q_semilocal[2],Q_semilocal[3],Q_semilocal[4]); } #pragma omp barrier Q_global=Q_semilocal[i_omp]; *k_Q=udata.k_Q; *Q_0=udata.Q_0+(udata.Q_init-udata.Q_0)*exp(-udata.step/udata.Q_steps); // #pragma omp master // { if (i_omp==0) { // MPI_Comm_rank(MPI_COMM_WORLD,&i_mpi); // if (i_mpi==0) { // udata.fp=NULL on other mpi processes. if ((udata.step % udata.freq)==0 && udata.fp!=NULL) { fprintf(udata.fp,"%d %g %g\n",udata.step,Q_global,0.5*(*k_Q)*(Q_global-(*Q_0))*(Q_global-(*Q_0))); } // } } return Q_global; }
static void pull_set_pbcatoms(t_commrec *cr, t_pull *pull, rvec *x, rvec *x_pbc) { int g, n, m; n = 0; for (g = 0; g < pull->ngroup; g++) { if (!pull->group[g].bCalcCOM || pull->group[g].pbcatom == -1) { clear_rvec(x_pbc[g]); } else { pull_set_pbcatom(cr, &pull->group[g], x, x_pbc[g]); n++; } } if (cr && PAR(cr) && n > 0) { /* Sum over the nodes to get x_pbc from the home node of pbcatom */ gmx_sum(pull->ngroup*DIM, x_pbc[0], cr); } }
/* Assemble the positions of the group such that every node has all of them. * The atom indices are retrieved from anrs_loc[0..nr_loc] * Note that coll_ind[i] = i is needed in the serial case */ extern void communicate_group_positions( t_commrec *cr, rvec *xcoll, /* OUT: Collective array of positions */ ivec *shifts, /* IN+OUT: Collective array of shifts for xcoll */ ivec *extra_shifts, /* BUF: Extra shifts since last time step */ const gmx_bool bNS, /* IN: NS step, the shifts have changed */ rvec *x_loc, /* IN: Local positions on this node */ const int nr, /* IN: Total number of atoms in the group */ const int nr_loc, /* IN: Local number of atoms in the group */ int *anrs_loc, /* IN: Local atom numbers */ int *coll_ind, /* IN: Collective index */ rvec *xcoll_old, /* IN+OUT: Positions from the last time step, used to make group whole */ matrix box) { int i; /* Zero out the groups' global position array */ clear_rvecs(nr, xcoll); /* Put the local positions that this node has into the right place of * the collective array. Note that in the serial case, coll_ind[i] = i */ for (i=0; i<nr_loc; i++) copy_rvec(x_loc[anrs_loc[i]], xcoll[coll_ind[i]]); if (PAR(cr)) { /* Add the arrays from all nodes together */ gmx_sum(nr*3, xcoll[0], cr); } /* To make the group whole, start with a whole group and each * step move the assembled positions at closest distance to the positions * from the last step. First shift the positions with the saved shift * vectors (these are 0 when this routine is called for the first time!) */ shift_positions_group(box, xcoll, shifts, nr); /* Now check if some shifts changed since the last step. * This only needs to be done when the shifts are expected to have changed, * i.e. after neighboursearching */ if (bNS) { get_shifts_group(3, box, xcoll, nr, xcoll_old, extra_shifts); /* Shift with the additional shifts such that we get a whole group now */ shift_positions_group(box, xcoll, extra_shifts, nr); /* Add the shift vectors together for the next time step */ for (i=0; i<nr; i++) { shifts[i][XX] += extra_shifts[i][XX]; shifts[i][YY] += extra_shifts[i][YY]; shifts[i][ZZ] += extra_shifts[i][ZZ]; } /* Store current correctly-shifted positions for comparison in the next NS time step */ for (i=0; i<nr; i++) copy_rvec(xcoll[i],xcoll_old[i]); } }
static void accumulate_ekin(t_commrec *cr,t_grpopts *opts, gmx_ekindata_t *ekind) { int g; if(PAR(cr)) for(g=0; (g<opts->ngtc); g++) gmx_sum(DIM*DIM,ekind->tcstat[g].ekin[0],cr); }
static void pull_reduce_real(t_commrec *cr, pull_comm_t *comm, int n, real *data) { if (cr != NULL && PAR(cr)) { if (comm->bParticipateAll) { /* Sum the contributions over all DD ranks */ gmx_sum(n, data, cr); } else { #if GMX_MPI #if MPI_IN_PLACE_EXISTS MPI_Allreduce(MPI_IN_PLACE, data, n, GMX_MPI_REAL, MPI_SUM, comm->mpi_comm_com); #else real *buf; snew(buf, n); MPI_Allreduce(data, buf, n, GMX_MPI_REAL, MPI_SUM, comm->mpi_comm_com); /* Copy the result from the buffer to the input/output data */ for (int i = 0; i < n; i++) { data[i] = buf[i]; } sfree(buf); #endif #else gmx_incons("comm->bParticipateAll=FALSE without GMX_MPI"); #endif } } }
/* Estimate the reciprocal space part error of the SPME Ewald sum. */ static real estimate_reciprocal( t_inputinfo *info, rvec x[], /* array of particles */ real q[], /* array of charges */ int nr, /* number of charges = size of the charge array */ FILE *fp_out, gmx_bool bVerbose, unsigned int seed, /* The seed for the random number generator */ int *nsamples, /* Return the number of samples used if Monte Carlo * algorithm is used for self energy error estimate */ t_commrec *cr) { real e_rec=0; /* reciprocal error estimate */ real e_rec1=0; /* Error estimate term 1*/ real e_rec2=0; /* Error estimate term 2*/ real e_rec3=0; /* Error estimate term 3 */ real e_rec3x=0; /* part of Error estimate term 3 in x */ real e_rec3y=0; /* part of Error estimate term 3 in y */ real e_rec3z=0; /* part of Error estimate term 3 in z */ int i,ci; int nx,ny,nz; /* grid coordinates */ real q2_all=0; /* sum of squared charges */ rvec gridpx; /* reciprocal grid point in x direction*/ rvec gridpxy; /* reciprocal grid point in x and y direction*/ rvec gridp; /* complete reciprocal grid point in 3 directions*/ rvec tmpvec; /* template to create points from basis vectors */ rvec tmpvec2; /* template to create points from basis vectors */ real coeff=0; /* variable to compute coefficients of the error estimate */ real coeff2=0; /* variable to compute coefficients of the error estimate */ real tmp=0; /* variables to compute different factors from vectors */ real tmp1=0; real tmp2=0; gmx_bool bFraction; /* Random number generator */ gmx_rng_t rng=NULL; int *numbers=NULL; /* Index variables for parallel work distribution */ int startglobal,stopglobal; int startlocal, stoplocal; int x_per_core; int xtot; #ifdef TAKETIME double t0=0.0; double t1=0.0; #endif rng=gmx_rng_init(seed); clear_rvec(gridpx); clear_rvec(gridpxy); clear_rvec(gridp); clear_rvec(tmpvec); clear_rvec(tmpvec2); for(i=0;i<nr;i++) { q2_all += q[i]*q[i]; } /* Calculate indices for work distribution */ startglobal=-info->nkx[0]/2; stopglobal = info->nkx[0]/2; xtot = stopglobal*2+1; if (PAR(cr)) { x_per_core = ceil((real)xtot / (real)cr->nnodes); startlocal = startglobal + x_per_core*cr->nodeid; stoplocal = startlocal + x_per_core -1; if (stoplocal > stopglobal) stoplocal = stopglobal; } else { startlocal = startglobal; stoplocal = stopglobal; x_per_core = xtot; } /* #ifdef GMX_LIB_MPI MPI_Barrier(MPI_COMM_WORLD); #endif */ #ifdef GMX_LIB_MPI #ifdef TAKETIME if (MASTER(cr)) t0 = MPI_Wtime(); #endif #endif if (MASTER(cr)){ fprintf(stderr, "Calculating reciprocal error part 1 ..."); } for(nx=startlocal; nx<=stoplocal; nx++) { svmul(nx,info->recipbox[XX],gridpx); for(ny=-info->nky[0]/2; ny<info->nky[0]/2+1; ny++) { svmul(ny,info->recipbox[YY],tmpvec); rvec_add(gridpx,tmpvec,gridpxy); for(nz=-info->nkz[0]/2; nz<info->nkz[0]/2+1; nz++) { if ( 0 == nx && 0 == ny && 0 == nz ) continue; svmul(nz,info->recipbox[ZZ],tmpvec); rvec_add(gridpxy,tmpvec,gridp); tmp=norm2(gridp); coeff=exp(-1.0 * M_PI * M_PI * tmp / info->ewald_beta[0] / info->ewald_beta[0] ) ; coeff/= 2.0 * M_PI * info->volume * tmp; coeff2=tmp ; tmp=eps_poly2(nx,info->nkx[0],info->pme_order[0]); tmp+=eps_poly2(ny,info->nkx[0],info->pme_order[0]); tmp+=eps_poly2(nz,info->nkx[0],info->pme_order[0]); tmp1=eps_poly1(nx,info->nkx[0],info->pme_order[0]); tmp2=eps_poly1(ny,info->nky[0],info->pme_order[0]); tmp+=2.0 * tmp1 * tmp2; tmp1=eps_poly1(nz,info->nkz[0],info->pme_order[0]); tmp2=eps_poly1(ny,info->nky[0],info->pme_order[0]); tmp+=2.0 * tmp1 * tmp2; tmp1=eps_poly1(nz,info->nkz[0],info->pme_order[0]); tmp2=eps_poly1(nx,info->nkx[0],info->pme_order[0]); tmp+=2.0 * tmp1 * tmp2; tmp1=eps_poly1(nx,info->nkx[0],info->pme_order[0]); tmp1+=eps_poly1(ny,info->nky[0],info->pme_order[0]); tmp1+=eps_poly1(nz,info->nkz[0],info->pme_order[0]); tmp+= tmp1 * tmp1; e_rec1+= 32.0 * M_PI * M_PI * coeff * coeff * coeff2 * tmp * q2_all * q2_all / nr ; tmp1=eps_poly3(nx,info->nkx[0],info->pme_order[0]); tmp1*=info->nkx[0]; tmp2=iprod(gridp,info->recipbox[XX]); tmp=tmp1*tmp2; tmp1=eps_poly3(ny,info->nky[0],info->pme_order[0]); tmp1*=info->nky[0]; tmp2=iprod(gridp,info->recipbox[YY]); tmp+=tmp1*tmp2; tmp1=eps_poly3(nz,info->nkz[0],info->pme_order[0]); tmp1*=info->nkz[0]; tmp2=iprod(gridp,info->recipbox[ZZ]); tmp+=tmp1*tmp2; tmp*=4.0 * M_PI; tmp1=eps_poly4(nx,info->nkx[0],info->pme_order[0]); tmp1*=norm2(info->recipbox[XX]); tmp1*=info->nkx[0] * info->nkx[0]; tmp+=tmp1; tmp1=eps_poly4(ny,info->nky[0],info->pme_order[0]); tmp1*=norm2(info->recipbox[YY]); tmp1*=info->nky[0] * info->nky[0]; tmp+=tmp1; tmp1=eps_poly4(nz,info->nkz[0],info->pme_order[0]); tmp1*=norm2(info->recipbox[ZZ]); tmp1*=info->nkz[0] * info->nkz[0]; tmp+=tmp1; e_rec2+= 4.0 * coeff * coeff * tmp * q2_all * q2_all / nr ; } } if (MASTER(cr)) fprintf(stderr, "\rCalculating reciprocal error part 1 ... %3.0f%%", 100.0*(nx-startlocal+1)/(x_per_core)); } if (MASTER(cr)) fprintf(stderr, "\n"); /* Use just a fraction of all charges to estimate the self energy error term? */ bFraction = (info->fracself > 0.0) && (info->fracself < 1.0); if (bFraction) { /* Here xtot is the number of samples taken for the Monte Carlo calculation * of the average of term IV of equation 35 in Wang2010. Round up to a * number of samples that is divisible by the number of nodes */ x_per_core = ceil(info->fracself * nr / (real)cr->nnodes); xtot = x_per_core * cr->nnodes; } else { /* In this case we use all nr particle positions */ xtot = nr; x_per_core = ceil( (real)xtot / (real)cr->nnodes ); } startlocal = x_per_core * cr->nodeid; stoplocal = min(startlocal + x_per_core, xtot); /* min needed if xtot == nr */ if (bFraction) { /* Make shure we get identical results in serial and parallel. Therefore, * take the sample indices from a single, global random number array that * is constructed on the master node and that only depends on the seed */ snew(numbers, xtot); if (MASTER(cr)) { for (i=0; i<xtot; i++) { numbers[i] = floor(gmx_rng_uniform_real(rng) * nr ); } } /* Broadcast the random number array to the other nodes */ if (PAR(cr)) { nblock_bc(cr,xtot,numbers); } if (bVerbose && MASTER(cr)) { fprintf(stdout, "Using %d sample%s to approximate the self interaction error term", xtot, xtot==1?"":"s"); if (PAR(cr)) fprintf(stdout, " (%d sample%s per node)", x_per_core, x_per_core==1?"":"s"); fprintf(stdout, ".\n"); } } /* Return the number of positions used for the Monte Carlo algorithm */ *nsamples = xtot; for(i=startlocal;i<stoplocal;i++) { e_rec3x=0; e_rec3y=0; e_rec3z=0; if (bFraction) { /* Randomly pick a charge */ ci = numbers[i]; } else { /* Use all charges */ ci = i; } /* for(nx=startlocal; nx<=stoplocal; nx++)*/ for(nx=-info->nkx[0]/2; nx<info->nkx[0]/2+1; nx++) { svmul(nx,info->recipbox[XX],gridpx); for(ny=-info->nky[0]/2; ny<info->nky[0]/2+1; ny++) { svmul(ny,info->recipbox[YY],tmpvec); rvec_add(gridpx,tmpvec,gridpxy); for(nz=-info->nkz[0]/2; nz<info->nkz[0]/2+1; nz++) { if ( 0 == nx && 0 == ny && 0 == nz) continue; svmul(nz,info->recipbox[ZZ],tmpvec); rvec_add(gridpxy,tmpvec,gridp); tmp=norm2(gridp); coeff=exp(-1.0 * M_PI * M_PI * tmp / info->ewald_beta[0] / info->ewald_beta[0] ); coeff/= tmp ; e_rec3x+=coeff*eps_self(nx,info->nkx[0],info->recipbox[XX],info->pme_order[0],x[ci]); e_rec3y+=coeff*eps_self(ny,info->nky[0],info->recipbox[YY],info->pme_order[0],x[ci]); e_rec3z+=coeff*eps_self(nz,info->nkz[0],info->recipbox[ZZ],info->pme_order[0],x[ci]); } } } clear_rvec(tmpvec2); svmul(e_rec3x,info->recipbox[XX],tmpvec); rvec_inc(tmpvec2,tmpvec); svmul(e_rec3y,info->recipbox[YY],tmpvec); rvec_inc(tmpvec2,tmpvec); svmul(e_rec3z,info->recipbox[ZZ],tmpvec); rvec_inc(tmpvec2,tmpvec); e_rec3 += q[ci]*q[ci]*q[ci]*q[ci]*norm2(tmpvec2) / ( xtot * M_PI * info->volume * M_PI * info->volume); if (MASTER(cr)){ fprintf(stderr, "\rCalculating reciprocal error part 2 ... %3.0f%%", 100.0*(i+1)/stoplocal); } } if (MASTER(cr)) fprintf(stderr, "\n"); #ifdef GMX_LIB_MPI #ifdef TAKETIME if (MASTER(cr)) { t1= MPI_Wtime() - t0; fprintf(fp_out, "Recip. err. est. took : %lf s\n", t1); } #endif #endif #ifdef DEBUG if (PAR(cr)) { fprintf(stderr, "Node %3d: nx=[%3d...%3d] e_rec3=%e\n", cr->nodeid, startlocal, stoplocal, e_rec3); } #endif if (PAR(cr)) { gmx_sum(1,&e_rec1,cr); gmx_sum(1,&e_rec2,cr); gmx_sum(1,&e_rec3,cr); } /* e_rec1*=8.0 * q2_all / info->volume / info->volume / nr ; e_rec2*= q2_all / M_PI / M_PI / info->volume / info->volume / nr ; e_rec3/= M_PI * M_PI * info->volume * info->volume * nr ; */ e_rec=sqrt(e_rec1+e_rec2+e_rec3); return ONE_4PI_EPS0 * e_rec; }
static void do_my_pme(FILE *fp,real tm,gmx_bool bVerbose,t_inputrec *ir, rvec x[],rvec xbuf[],rvec f[], real charge[],real qbuf[],real qqbuf[], matrix box,gmx_bool bSort, t_commrec *cr,t_nsborder *nsb,t_nrnb *nrnb, t_block *excl,real qtot, t_forcerec *fr,int index[],FILE *fp_xvg, int ngroups,unsigned short cENER[]) { real ener,vcorr,q,xx,dvdl=0,vdip,vcharge; tensor vir,vir_corr,vir_tot; rvec mu_tot[2]; int i,m,ii,ig,jg; real **epme,*qptr; /* Initiate local variables */ fr->f_el_recip = f; clear_mat(vir); clear_mat(vir_corr); if (ngroups > 1) { fprintf(fp,"There are %d energy groups\n",ngroups); snew(epme,ngroups); for(i=0; (i<ngroups); i++) snew(epme[i],ngroups); } /* Put x is in the box, this part needs to be parallellized properly */ /*put_atoms_in_box(box,nsb->natoms,x);*/ /* Here sorting of X (and q) is done. * Alternatively, one could just put the atoms in one of the * cr->nnodes slabs. That is much cheaper than sorting. */ for(i=0; (i<nsb->natoms); i++) index[i] = i; if (bSort) { xptr = x; qsort(index,nsb->natoms,sizeof(index[0]),comp_xptr); xptr = NULL; /* To trap unintentional use of the ptr */ } /* After sorting we only need the part that is to be computed on * this processor. We also compute the mu_tot here (system dipole) */ clear_rvec(mu_tot[0]); for(i=START(nsb); (i<START(nsb)+HOMENR(nsb)); i++) { ii = index[i]; q = charge[ii]; qbuf[i] = q; for(m=0; (m<DIM); m++) { xx = x[ii][m]; xbuf[i][m] = xx; mu_tot[0][m] += q*xx; } clear_rvec(f[ii]); } copy_rvec(mu_tot[0],mu_tot[1]); if (debug) { pr_rvec(debug,0,"qbuf",qbuf,nsb->natoms,TRUE); pr_rvecs(debug,0,"xbuf",xbuf,nsb->natoms); pr_rvecs(debug,0,"box",box,DIM); } for(ig=0; (ig<ngroups); ig++) { for(jg=ig; (jg<ngroups); jg++) { if (ngroups > 1) { for(i=START(nsb); (i<START(nsb)+HOMENR(nsb)); i++) { if ((cENER[i] == ig) || (cENER[i] == jg)) qqbuf[i] = qbuf[i]; else qqbuf[i] = 0; } qptr = qqbuf; } else qptr = qbuf; ener = do_pme(fp,bVerbose,ir,xbuf,f,qptr,qptr,box,cr, nsb,nrnb,vir,fr->ewaldcoeff,FALSE,0,&dvdl,FALSE); vcorr = ewald_LRcorrection(fp,nsb,cr,fr,qptr,qptr,excl,xbuf,box,mu_tot, ir->ewald_geometry,ir->epsilon_surface, 0,&dvdl,&vdip,&vcharge); gmx_sum(1,&ener,cr); gmx_sum(1,&vcorr,cr); if (ngroups > 1) epme[ig][jg] = ener+vcorr; } } if (ngroups > 1) { if (fp_xvg) fprintf(fp_xvg,"%10.3f",tm); for(ig=0; (ig<ngroups); ig++) { for(jg=ig; (jg<ngroups); jg++) { if (ig != jg) epme[ig][jg] -= epme[ig][ig]+epme[jg][jg]; if (fp_xvg) fprintf(fp_xvg," %12.5e",epme[ig][jg]); } } if (fp_xvg) fprintf(fp_xvg,"\n"); } else { fprintf(fp,"Time: %10.3f Energy: %12.5e Correction: %12.5e Total: %12.5e\n", tm,ener,vcorr,ener+vcorr); if (fp_xvg) fprintf(fp_xvg,"%10.3f %12.5e %12.5e %12.5e\n",tm,ener+vcorr,vdip,vcharge); if (bVerbose) { m_add(vir,vir_corr,vir_tot); gmx_sum(9,vir_tot[0],cr); pr_rvecs(fp,0,"virial",vir_tot,DIM); } fflush(fp); } }
void do_force(FILE *log,t_commrec *cr,t_commrec *mcr, t_parm *parm,t_nsborder *nsb,tensor vir_part,tensor pme_vir, int step,t_nrnb *nrnb,t_topology *top,t_groups *grps, rvec x[],rvec v[],rvec f[],rvec buf[], t_mdatoms *mdatoms,real ener[],t_fcdata *fcd,bool bVerbose, real lambda,t_graph *graph, bool bNS,bool bNBFonly,t_forcerec *fr, rvec mu_tot, bool bGatherOnly) { static rvec box_size; static real dvdl_lr = 0; int cg0,cg1,i,j; int start,homenr; static real mu_and_q[DIM+1]; real qsum; start = START(nsb); homenr = HOMENR(nsb); cg0 = CG0(nsb); cg1 = CG1(nsb); update_forcerec(log,fr,parm->box); /* Calculate total (local) dipole moment in a temporary common array. * This makes it possible to sum them over nodes faster. */ calc_mu_and_q(nsb,x,mdatoms->chargeT,mu_and_q,mu_and_q+DIM); if (fr->ePBC != epbcNONE) { /* Compute shift vectors every step, because of pressure coupling! */ if (parm->ir.epc != epcNO) calc_shifts(parm->box,box_size,fr->shift_vec); if (bNS) { put_charge_groups_in_box(log,cg0,cg1,parm->box,box_size, &(top->blocks[ebCGS]),x,fr->cg_cm); inc_nrnb(nrnb,eNR_RESETX,homenr); } else if (parm->ir.eI==eiSteep || parm->ir.eI==eiCG) unshift_self(graph,parm->box,x); } else if (bNS) calc_cgcm(log,cg0,cg1,&(top->blocks[ebCGS]),x,fr->cg_cm); if (bNS) { inc_nrnb(nrnb,eNR_CGCM,cg1-cg0); if (PAR(cr)) move_cgcm(log,cr,fr->cg_cm,nsb->workload); if (debug) pr_rvecs(debug,0,"cgcm",fr->cg_cm,nsb->cgtotal); } /* Communicate coordinates and sum dipole and net charge if necessary */ if (PAR(cr)) { move_x(log,cr->left,cr->right,x,nsb,nrnb); gmx_sum(DIM+1,mu_and_q,cr); } for(i=0;i<DIM;i++) mu_tot[i]=mu_and_q[i]; qsum=mu_and_q[DIM]; /* Reset energies */ reset_energies(&(parm->ir.opts),grps,fr,bNS,ener); if (bNS) { if (fr->ePBC != epbcNONE) /* Calculate intramolecular shift vectors to make molecules whole */ mk_mshift(log,graph,parm->box,x); /* Reset long range forces if necessary */ if (fr->bTwinRange) { clear_rvecs(nsb->natoms,fr->f_twin); clear_rvecs(SHIFTS,fr->fshift_twin); } /* Do the actual neighbour searching and if twin range electrostatics * also do the calculation of long range forces and energies. */ dvdl_lr = 0; ns(log,fr,x,f,parm->box,grps,&(parm->ir.opts),top,mdatoms, cr,nrnb,nsb,step,lambda,&dvdl_lr); } /* Reset PME/Ewald forces if necessary */ if (EEL_LR(fr->eeltype)) clear_rvecs(homenr,fr->f_pme+start); /* Copy long range forces into normal buffers */ if (fr->bTwinRange) { for(i=0; i<nsb->natoms; i++) copy_rvec(fr->f_twin[i],f[i]); for(i=0; i<SHIFTS; i++) copy_rvec(fr->fshift_twin[i],fr->fshift[i]); } else { clear_rvecs(nsb->natoms,f); clear_rvecs(SHIFTS,fr->fshift); } /* Compute the forces */ force(log,step,fr,&(parm->ir),&(top->idef),nsb,cr,mcr,nrnb,grps,mdatoms, top->atoms.grps[egcENER].nr,&(parm->ir.opts), x,f,ener,fcd,bVerbose,parm->box,lambda,graph,&(top->atoms.excl), bNBFonly,pme_vir,mu_tot,qsum,bGatherOnly); /* Take long range contribution to free energy into account */ ener[F_DVDL] += dvdl_lr; #ifdef DEBUG if (bNS) print_nrnb(log,nrnb); #endif /* The short-range virial from surrounding boxes */ clear_mat(vir_part); calc_vir(log,SHIFTS,fr->shift_vec,fr->fshift,vir_part); inc_nrnb(nrnb,eNR_VIRIAL,SHIFTS); if (debug) pr_rvecs(debug,0,"vir_shifts",vir_part,DIM); /* Compute forces due to electric field */ calc_f_el(start,homenr,mdatoms->chargeT,f,parm->ir.ex); /* When using PME/Ewald we compute the long range virial (pme_vir) there. * otherwise we do it based on long range forces from twin range * cut-off based calculation (or not at all). */ /* Communicate the forces */ if (PAR(cr)) move_f(log,cr->left,cr->right,f,buf,nsb,nrnb); }
/* Estimate the reciprocal space part error of the SPME Ewald sum. */ static real estimate_reciprocal( t_inputinfo *info, rvec x[], /* array of particles */ real q[], /* array of charges */ int nr, /* number of charges = size of the charge array */ FILE *fp_out, t_commrec *cr) { real e_rec=0; /* reciprocal error estimate */ real e_rec1=0; /* Error estimate term 1*/ real e_rec2=0; /* Error estimate term 2*/ real e_rec3=0; /* Error estimate term 3 */ real e_rec3x=0; /* part of Error estimate term 3 in x */ real e_rec3y=0; /* part of Error estimate term 3 in y */ real e_rec3z=0; /* part of Error estimate term 3 in z */ int i,ci; int nx,ny,nz; /* grid coordinates */ real q2_all=0; /* sum of squared charges */ rvec gridpx; /* reciprocal grid point in x direction*/ rvec gridpxy; /* reciprocal grid point in x and y direction*/ rvec gridp; /* complete reciprocal grid point in 3 directions*/ rvec tmpvec; /* template to create points from basis vectors */ rvec tmpvec2; /* template to create points from basis vectors */ real coeff=0; /* variable to compute coefficients of the error estimate */ real coeff2=0; /* variable to compute coefficients of the error estimate */ real tmp=0; /* variables to compute different factors from vectors */ real tmp1=0; real tmp2=0; real xtmp=0; real ytmp=0; real ztmp=0; double ewald_error; /* Random number generator */ gmx_rng_t rng=NULL; /*rng=gmx_rng_init(gmx_rng_make_seed()); */ /* Index variables for parallel work distribution */ int startglobal,stopglobal; int startlocal, stoplocal; int x_per_core; int nrsamples; real xtot; /* #define TAKETIME */ #ifdef TAKETIME double t0=0.0; double t1=0.0; double t2=0.0; #endif rng=gmx_rng_init(cr->nodeid); clear_rvec(gridpx); clear_rvec(gridpxy); clear_rvec(gridp); clear_rvec(tmpvec); clear_rvec(tmpvec2); for(i=0;i<nr;i++) { q2_all += q[i]*q[i]; } /* Calculate indices for work distribution */ startglobal=-info->nkx[0]/2; stopglobal = info->nkx[0]/2; xtot = stopglobal*2+1; if (PAR(cr)) { x_per_core = ceil(xtot / cr->nnodes); startlocal = startglobal + x_per_core*cr->nodeid; stoplocal = startlocal + x_per_core -1; if (stoplocal > stopglobal) stoplocal = stopglobal; } else { startlocal = startglobal; stoplocal = stopglobal; x_per_core = xtot; } /* #ifdef GMX_MPI MPI_Barrier(MPI_COMM_WORLD); #endif */ #ifdef TAKETIME if (MASTER(cr)) t0 = MPI_Wtime(); #endif if (MASTER(cr)){ fprintf(stderr, "Calculating reciprocal error part 1 ..."); } for(nx=startlocal; nx<=stoplocal; nx++) { svmul(nx,info->recipbox[XX],gridpx); for(ny=-info->nky[0]/2; ny<info->nky[0]/2+1; ny++) { svmul(ny,info->recipbox[YY],tmpvec); rvec_add(gridpx,tmpvec,gridpxy); for(nz=-info->nkz[0]/2; nz<info->nkz[0]/2+1; nz++) { if ( 0 == nx && 0 == ny && 0 == nz ) continue; svmul(nz,info->recipbox[ZZ],tmpvec); rvec_add(gridpxy,tmpvec,gridp); tmp=norm2(gridp); coeff=exp(-1.0 * M_PI * M_PI * tmp / info->ewald_beta[0] / info->ewald_beta[0] ) ; coeff/= 2.0 * M_PI * info->volume * tmp; coeff2=tmp ; tmp=eps_poly2(nx,info->nkx[0],info->pme_order[0]); tmp+=eps_poly2(ny,info->nkx[0],info->pme_order[0]); tmp+=eps_poly2(nz,info->nkx[0],info->pme_order[0]); tmp1=eps_poly1(nx,info->nkx[0],info->pme_order[0]); tmp2=eps_poly1(ny,info->nky[0],info->pme_order[0]); tmp+=2.0 * tmp1 * tmp2; tmp1=eps_poly1(nz,info->nkz[0],info->pme_order[0]); tmp2=eps_poly1(ny,info->nky[0],info->pme_order[0]); tmp+=2.0 * tmp1 * tmp2; tmp1=eps_poly1(nz,info->nkz[0],info->pme_order[0]); tmp2=eps_poly1(nx,info->nkx[0],info->pme_order[0]); tmp+=2.0 * tmp1 * tmp2; tmp1=eps_poly1(nx,info->nkx[0],info->pme_order[0]); tmp1+=eps_poly1(ny,info->nky[0],info->pme_order[0]); tmp1+=eps_poly1(nz,info->nkz[0],info->pme_order[0]); tmp+= tmp1 * tmp1; e_rec1+= 32.0 * M_PI * M_PI * coeff * coeff * coeff2 * tmp * q2_all * q2_all / nr ; tmp1=eps_poly3(nx,info->nkx[0],info->pme_order[0]); tmp1*=info->nkx[0]; tmp2=iprod(gridp,info->recipbox[XX]); tmp=tmp1*tmp2; tmp1=eps_poly3(ny,info->nky[0],info->pme_order[0]); tmp1*=info->nky[0]; tmp2=iprod(gridp,info->recipbox[YY]); tmp+=tmp1*tmp2; tmp1=eps_poly3(nz,info->nkz[0],info->pme_order[0]); tmp1*=info->nkz[0]; tmp2=iprod(gridp,info->recipbox[ZZ]); tmp+=tmp1*tmp2; tmp*=4.0 * M_PI; tmp1=eps_poly4(nx,info->nkx[0],info->pme_order[0]); tmp1*=norm2(info->recipbox[XX]); tmp1*=info->nkx[0] * info->nkx[0]; tmp+=tmp1; tmp1=eps_poly4(ny,info->nky[0],info->pme_order[0]); tmp1*=norm2(info->recipbox[YY]); tmp1*=info->nky[0] * info->nky[0]; tmp+=tmp1; tmp1=eps_poly4(nz,info->nkz[0],info->pme_order[0]); tmp1*=norm2(info->recipbox[ZZ]); tmp1*=info->nkz[0] * info->nkz[0]; tmp+=tmp1; e_rec2+= 4.0 * coeff * coeff * tmp * q2_all * q2_all / nr ; } } if (MASTER(cr)) fprintf(stderr, "\rCalculating reciprocal error part 1 ... %3.0f%%", 100.0*(nx-startlocal+1)/(x_per_core)); } /* #ifdef GMX_MPI MPI_Barrier(MPI_COMM_WORLD); #endif */ if (MASTER(cr)) fprintf(stderr, "\n"); if (info->fracself>0) { nrsamples=ceil(info->fracself*nr); } else { nrsamples=nr; } xtot=nrsamples; startglobal=0; stopglobal=nr; if(PAR(cr)) { x_per_core=ceil(xtot/cr->nnodes); startlocal=startglobal+x_per_core*cr->nodeid; stoplocal=startglobal+x_per_core*(cr->nodeid+1); if (stoplocal>stopglobal) stoplocal=stopglobal; } else { startlocal=startglobal; stoplocal=stopglobal; x_per_core=xtot; } for(i=startlocal;i<stoplocal;i++) { e_rec3x=0; e_rec3y=0; e_rec3z=0; if (info->fracself<0) { ci=i; }else { ci=floor(gmx_rng_uniform_real(rng) * nr ); if (ci==nr) { ci=nr-1; } } /* for(nx=startlocal; nx<=stoplocal; nx++)*/ for(nx=-info->nkx[0]/2; nx<info->nkx[0]/2+1; nx++) { svmul(nx,info->recipbox[XX],gridpx); for(ny=-info->nky[0]/2; ny<info->nky[0]/2+1; ny++) { svmul(ny,info->recipbox[YY],tmpvec); rvec_add(gridpx,tmpvec,gridpxy); for(nz=-info->nkz[0]/2; nz<info->nkz[0]/2+1; nz++) { if ( 0 == nx && 0 == ny && 0 == nz) continue; svmul(nz,info->recipbox[ZZ],tmpvec); rvec_add(gridpxy,tmpvec,gridp); tmp=norm2(gridp); coeff=exp(-1.0 * M_PI * M_PI * tmp / info->ewald_beta[0] / info->ewald_beta[0] ); coeff/= tmp ; e_rec3x+=coeff*eps_self(nx,info->nkx[0],info->recipbox[XX],info->pme_order[0],x[ci]); e_rec3y+=coeff*eps_self(ny,info->nky[0],info->recipbox[YY],info->pme_order[0],x[ci]); e_rec3z+=coeff*eps_self(nz,info->nkz[0],info->recipbox[ZZ],info->pme_order[0],x[ci]); } } } clear_rvec(tmpvec2); svmul(e_rec3x,info->recipbox[XX],tmpvec); rvec_inc(tmpvec2,tmpvec); svmul(e_rec3y,info->recipbox[YY],tmpvec); rvec_inc(tmpvec2,tmpvec); svmul(e_rec3z,info->recipbox[ZZ],tmpvec); rvec_inc(tmpvec2,tmpvec); e_rec3 += q[ci]*q[ci]*q[ci]*q[ci]*norm2(tmpvec2) / ( nrsamples * M_PI * info->volume * M_PI * info->volume); if (MASTER(cr)){ fprintf(stderr, "\rCalculating reciprocal error part 2 ... %3.0f%%", 100.0*(i+1)/stoplocal); } } if (MASTER(cr)) fprintf(stderr, "\n"); #ifdef TAKETIME if (MASTER(cr)) { t1= MPI_Wtime() - t0; fprintf(fp_out, "Recip. err. est. took : %lf s\n", t1); } #endif #ifdef DEBUG if (PAR(cr)) { fprintf(stderr, "Node %3d: nx=[%3d...%3d] e_rec3=%e\n", cr->nodeid, startlocal, stoplocal, e_rec3); } #endif /* #ifdef GMX_MPI MPI_Barrier(MPI_COMM_WORLD); #endif */ #ifdef TAKETIME if (MASTER(cr)) { t2= MPI_Wtime() - t0; fprintf(fp_out, "barrier : %lf s\n", t2-t1); } #endif if (PAR(cr)) { gmx_sum(1,&e_rec1,cr); gmx_sum(1,&e_rec2,cr); gmx_sum(1,&e_rec3,cr); } #ifdef TAKETIME if (MASTER(cr)) fprintf(fp_out, "final reduce : %lf s\n", MPI_Wtime() - t0-t2); #endif /* e_rec1*=8.0 * q2_all / info->volume / info->volume / nr ; e_rec2*= q2_all / M_PI / M_PI / info->volume / info->volume / nr ; e_rec3/= M_PI * M_PI * info->volume * info->volume * nr ; */ e_rec=sqrt(e_rec1+e_rec2+e_rec3); return ONE_4PI_EPS0 * e_rec; }
int calc_gb_rad_still_sse2_double(t_commrec *cr, t_forcerec *fr, int natoms, gmx_localtop_t *top, const t_atomtypes *atype, double *x, t_nblist *nl, gmx_genborn_t *born) { int i,k,n,ii,is3,ii3,nj0,nj1,offset; int jnrA,jnrB,j3A,j3B; int *mdtype; double shX,shY,shZ; int *jjnr; double *shiftvec; double gpi_ai,gpi2; double factor; double *gb_radius; double *vsolv; double *work; double *dadx; __m128d ix,iy,iz; __m128d jx,jy,jz; __m128d dx,dy,dz; __m128d tx,ty,tz; __m128d rsq,rinv,rinv2,rinv4,rinv6; __m128d ratio,gpi,rai,raj,vai,vaj,rvdw; __m128d ccf,dccf,theta,cosq,term,sinq,res,prod,prod_ai,tmp; __m128d mask,icf4,icf6,mask_cmp; const __m128d half = _mm_set1_pd(0.5); const __m128d three = _mm_set1_pd(3.0); const __m128d one = _mm_set1_pd(1.0); const __m128d two = _mm_set1_pd(2.0); const __m128d zero = _mm_set1_pd(0.0); const __m128d four = _mm_set1_pd(4.0); const __m128d still_p5inv = _mm_set1_pd(STILL_P5INV); const __m128d still_pip5 = _mm_set1_pd(STILL_PIP5); const __m128d still_p4 = _mm_set1_pd(STILL_P4); factor = 0.5 * ONE_4PI_EPS0; gb_radius = born->gb_radius; vsolv = born->vsolv; work = born->gpol_still_work; jjnr = nl->jjnr; shiftvec = fr->shift_vec[0]; dadx = fr->dadx; jnrA = jnrB = 0; jx = _mm_setzero_pd(); jy = _mm_setzero_pd(); jz = _mm_setzero_pd(); n = 0; for(i=0;i<natoms;i++) { work[i]=0; } for(i=0;i<nl->nri;i++) { ii = nl->iinr[i]; ii3 = ii*3; is3 = 3*nl->shift[i]; shX = shiftvec[is3]; shY = shiftvec[is3+1]; shZ = shiftvec[is3+2]; nj0 = nl->jindex[i]; nj1 = nl->jindex[i+1]; ix = _mm_set1_pd(shX+x[ii3+0]); iy = _mm_set1_pd(shY+x[ii3+1]); iz = _mm_set1_pd(shZ+x[ii3+2]); /* Polarization energy for atom ai */ gpi = _mm_setzero_pd(); rai = _mm_load1_pd(gb_radius+ii); prod_ai = _mm_set1_pd(STILL_P4*vsolv[ii]); for(k=nj0;k<nj1-1;k+=2) { jnrA = jjnr[k]; jnrB = jjnr[k+1]; j3A = 3*jnrA; j3B = 3*jnrB; GMX_MM_LOAD_1RVEC_2POINTERS_PD(x+j3A,x+j3B,jx,jy,jz); GMX_MM_LOAD_2VALUES_PD(gb_radius+jnrA,gb_radius+jnrB,raj); GMX_MM_LOAD_2VALUES_PD(vsolv+jnrA,vsolv+jnrB,vaj); dx = _mm_sub_pd(ix,jx); dy = _mm_sub_pd(iy,jy); dz = _mm_sub_pd(iz,jz); rsq = gmx_mm_calc_rsq_pd(dx,dy,dz); rinv = gmx_mm_invsqrt_pd(rsq); rinv2 = _mm_mul_pd(rinv,rinv); rinv4 = _mm_mul_pd(rinv2,rinv2); rinv6 = _mm_mul_pd(rinv4,rinv2); rvdw = _mm_add_pd(rai,raj); ratio = _mm_mul_pd(rsq, gmx_mm_inv_pd( _mm_mul_pd(rvdw,rvdw))); mask_cmp = _mm_cmple_pd(ratio,still_p5inv); /* gmx_mm_sincos_pd() is quite expensive, so avoid calculating it if we can! */ if( 0 == _mm_movemask_pd(mask_cmp) ) { /* if ratio>still_p5inv for ALL elements */ ccf = one; dccf = _mm_setzero_pd(); } else { ratio = _mm_min_pd(ratio,still_p5inv); theta = _mm_mul_pd(ratio,still_pip5); gmx_mm_sincos_pd(theta,&sinq,&cosq); term = _mm_mul_pd(half,_mm_sub_pd(one,cosq)); ccf = _mm_mul_pd(term,term); dccf = _mm_mul_pd(_mm_mul_pd(two,term), _mm_mul_pd(sinq,theta)); } prod = _mm_mul_pd(still_p4,vaj); icf4 = _mm_mul_pd(ccf,rinv4); icf6 = _mm_mul_pd( _mm_sub_pd( _mm_mul_pd(four,ccf),dccf), rinv6); GMX_MM_INCREMENT_2VALUES_PD(work+jnrA,work+jnrB,_mm_mul_pd(prod_ai,icf4)); gpi = _mm_add_pd(gpi, _mm_mul_pd(prod,icf4) ); _mm_store_pd(dadx,_mm_mul_pd(prod,icf6)); dadx+=2; _mm_store_pd(dadx,_mm_mul_pd(prod_ai,icf6)); dadx+=2; } if(k<nj1) { jnrA = jjnr[k]; j3A = 3*jnrA; GMX_MM_LOAD_1RVEC_1POINTER_PD(x+j3A,jx,jy,jz); GMX_MM_LOAD_1VALUE_PD(gb_radius+jnrA,raj); GMX_MM_LOAD_1VALUE_PD(vsolv+jnrA,vaj); dx = _mm_sub_sd(ix,jx); dy = _mm_sub_sd(iy,jy); dz = _mm_sub_sd(iz,jz); rsq = gmx_mm_calc_rsq_pd(dx,dy,dz); rinv = gmx_mm_invsqrt_pd(rsq); rinv2 = _mm_mul_sd(rinv,rinv); rinv4 = _mm_mul_sd(rinv2,rinv2); rinv6 = _mm_mul_sd(rinv4,rinv2); rvdw = _mm_add_sd(rai,raj); ratio = _mm_mul_sd(rsq, gmx_mm_inv_pd( _mm_mul_pd(rvdw,rvdw))); mask_cmp = _mm_cmple_sd(ratio,still_p5inv); /* gmx_mm_sincos_pd() is quite expensive, so avoid calculating it if we can! */ if( 0 == _mm_movemask_pd(mask_cmp) ) { /* if ratio>still_p5inv for ALL elements */ ccf = one; dccf = _mm_setzero_pd(); } else { ratio = _mm_min_sd(ratio,still_p5inv); theta = _mm_mul_sd(ratio,still_pip5); gmx_mm_sincos_pd(theta,&sinq,&cosq); term = _mm_mul_sd(half,_mm_sub_sd(one,cosq)); ccf = _mm_mul_sd(term,term); dccf = _mm_mul_sd(_mm_mul_sd(two,term), _mm_mul_sd(sinq,theta)); } prod = _mm_mul_sd(still_p4,vaj); icf4 = _mm_mul_sd(ccf,rinv4); icf6 = _mm_mul_sd( _mm_sub_sd( _mm_mul_sd(four,ccf),dccf), rinv6); GMX_MM_INCREMENT_1VALUE_PD(work+jnrA,_mm_mul_sd(prod_ai,icf4)); gpi = _mm_add_sd(gpi, _mm_mul_sd(prod,icf4) ); _mm_store_pd(dadx,_mm_mul_pd(prod,icf6)); dadx+=2; _mm_store_pd(dadx,_mm_mul_pd(prod_ai,icf6)); dadx+=2; } gmx_mm_update_1pot_pd(gpi,work+ii); } /* Sum up the polarization energy from other nodes */ if(PARTDECOMP(cr)) { gmx_sum(natoms, work, cr); } else if(DOMAINDECOMP(cr)) { dd_atom_sum_real(cr->dd, work); } /* Compute the radii */ for(i=0;i<fr->natoms_force;i++) /* PELA born->nr */ { if(born->use[i] != 0) { gpi_ai = born->gpol[i] + work[i]; /* add gpi to the initial pol energy gpi_ai*/ gpi2 = gpi_ai * gpi_ai; born->bRad[i] = factor*gmx_invsqrt(gpi2); fr->invsqrta[i] = gmx_invsqrt(born->bRad[i]); } } /* Extra (local) communication required for DD */ if(DOMAINDECOMP(cr)) { dd_atom_spread_real(cr->dd, born->bRad); dd_atom_spread_real(cr->dd, fr->invsqrta); } return 0; }
int calc_gb_rad_hct_obc_sse2_double(t_commrec *cr, t_forcerec * fr, int natoms, gmx_localtop_t *top, const t_atomtypes *atype, double *x, t_nblist *nl, gmx_genborn_t *born,t_mdatoms *md,int gb_algorithm) { int i,ai,k,n,ii,ii3,is3,nj0,nj1,at0,at1,offset; int jnrA,jnrB; int j3A,j3B; double shX,shY,shZ; double rr,rr_inv,rr_inv2,sum_tmp,sum,sum2,sum3,gbr; double sum_ai2, sum_ai3,tsum,tchain,doffset; double *obc_param; double *gb_radius; double *work; int * jjnr; double *dadx; double *shiftvec; double min_rad,rad; __m128d ix,iy,iz,jx,jy,jz; __m128d dx,dy,dz,t1,t2,t3,t4; __m128d rsq,rinv,r; __m128d rai,rai_inv,raj, raj_inv,rai_inv2,sk,sk2,lij,dlij,duij; __m128d uij,lij2,uij2,lij3,uij3,diff2; __m128d lij_inv,sk2_inv,prod,log_term,tmp,tmp_sum; __m128d sum_ai, tmp_ai,sk_ai,sk_aj,sk2_ai,sk2_aj,sk2_rinv; __m128d dadx1,dadx2; __m128d logterm; __m128d mask; __m128d obc_mask1,obc_mask2,obc_mask3; __m128d oneeighth = _mm_set1_pd(0.125); __m128d onefourth = _mm_set1_pd(0.25); const __m128d half = _mm_set1_pd(0.5); const __m128d three = _mm_set1_pd(3.0); const __m128d one = _mm_set1_pd(1.0); const __m128d two = _mm_set1_pd(2.0); const __m128d zero = _mm_set1_pd(0.0); const __m128d neg = _mm_set1_pd(-1.0); /* Set the dielectric offset */ doffset = born->gb_doffset; gb_radius = born->gb_radius; obc_param = born->param; work = born->gpol_hct_work; jjnr = nl->jjnr; dadx = fr->dadx; shiftvec = fr->shift_vec[0]; jx = _mm_setzero_pd(); jy = _mm_setzero_pd(); jz = _mm_setzero_pd(); jnrA = jnrB = 0; for(i=0;i<born->nr;i++) { work[i] = 0; } for(i=0;i<nl->nri;i++) { ii = nl->iinr[i]; ii3 = ii*3; is3 = 3*nl->shift[i]; shX = shiftvec[is3]; shY = shiftvec[is3+1]; shZ = shiftvec[is3+2]; nj0 = nl->jindex[i]; nj1 = nl->jindex[i+1]; ix = _mm_set1_pd(shX+x[ii3+0]); iy = _mm_set1_pd(shY+x[ii3+1]); iz = _mm_set1_pd(shZ+x[ii3+2]); rai = _mm_load1_pd(gb_radius+ii); rai_inv= gmx_mm_inv_pd(rai); sum_ai = _mm_setzero_pd(); sk_ai = _mm_load1_pd(born->param+ii); sk2_ai = _mm_mul_pd(sk_ai,sk_ai); for(k=nj0;k<nj1-1;k+=2) { jnrA = jjnr[k]; jnrB = jjnr[k+1]; j3A = 3*jnrA; j3B = 3*jnrB; GMX_MM_LOAD_1RVEC_2POINTERS_PD(x+j3A,x+j3B,jx,jy,jz); GMX_MM_LOAD_2VALUES_PD(gb_radius+jnrA,gb_radius+jnrB,raj); GMX_MM_LOAD_2VALUES_PD(obc_param+jnrA,obc_param+jnrB,sk_aj); dx = _mm_sub_pd(ix, jx); dy = _mm_sub_pd(iy, jy); dz = _mm_sub_pd(iz, jz); rsq = gmx_mm_calc_rsq_pd(dx,dy,dz); rinv = gmx_mm_invsqrt_pd(rsq); r = _mm_mul_pd(rsq,rinv); /* Compute raj_inv aj1-4 */ raj_inv = gmx_mm_inv_pd(raj); /* Evaluate influence of atom aj -> ai */ t1 = _mm_add_pd(r,sk_aj); t2 = _mm_sub_pd(r,sk_aj); t3 = _mm_sub_pd(sk_aj,r); obc_mask1 = _mm_cmplt_pd(rai, t1); obc_mask2 = _mm_cmplt_pd(rai, t2); obc_mask3 = _mm_cmplt_pd(rai, t3); uij = gmx_mm_inv_pd(t1); lij = _mm_or_pd( _mm_and_pd(obc_mask2,gmx_mm_inv_pd(t2)), _mm_andnot_pd(obc_mask2,rai_inv)); dlij = _mm_and_pd(one,obc_mask2); uij2 = _mm_mul_pd(uij, uij); uij3 = _mm_mul_pd(uij2,uij); lij2 = _mm_mul_pd(lij, lij); lij3 = _mm_mul_pd(lij2,lij); diff2 = _mm_sub_pd(uij2,lij2); lij_inv = gmx_mm_invsqrt_pd(lij2); sk2_aj = _mm_mul_pd(sk_aj,sk_aj); sk2_rinv = _mm_mul_pd(sk2_aj,rinv); prod = _mm_mul_pd(onefourth,sk2_rinv); logterm = gmx_mm_log_pd(_mm_mul_pd(uij,lij_inv)); t1 = _mm_sub_pd(lij,uij); t2 = _mm_mul_pd(diff2, _mm_sub_pd(_mm_mul_pd(onefourth,r), prod)); t3 = _mm_mul_pd(half,_mm_mul_pd(rinv,logterm)); t1 = _mm_add_pd(t1,_mm_add_pd(t2,t3)); t4 = _mm_mul_pd(two,_mm_sub_pd(rai_inv,lij)); t4 = _mm_and_pd(t4,obc_mask3); t1 = _mm_mul_pd(half,_mm_add_pd(t1,t4)); sum_ai = _mm_add_pd(sum_ai, _mm_and_pd(t1,obc_mask1) ); t1 = _mm_add_pd(_mm_mul_pd(half,lij2), _mm_mul_pd(prod,lij3)); t1 = _mm_sub_pd(t1, _mm_mul_pd(onefourth, _mm_add_pd(_mm_mul_pd(lij,rinv), _mm_mul_pd(lij3,r)))); t2 = _mm_mul_pd(onefourth, _mm_add_pd(_mm_mul_pd(uij,rinv), _mm_mul_pd(uij3,r))); t2 = _mm_sub_pd(t2, _mm_add_pd(_mm_mul_pd(half,uij2), _mm_mul_pd(prod,uij3))); t3 = _mm_mul_pd(_mm_mul_pd(onefourth,logterm), _mm_mul_pd(rinv,rinv)); t3 = _mm_sub_pd(t3, _mm_mul_pd(_mm_mul_pd(diff2,oneeighth), _mm_add_pd(one, _mm_mul_pd(sk2_rinv,rinv)))); t1 = _mm_mul_pd(rinv, _mm_add_pd(_mm_mul_pd(dlij,t1), _mm_add_pd(t2,t3))); dadx1 = _mm_and_pd(t1,obc_mask1); /* Evaluate influence of atom ai -> aj */ t1 = _mm_add_pd(r,sk_ai); t2 = _mm_sub_pd(r,sk_ai); t3 = _mm_sub_pd(sk_ai,r); obc_mask1 = _mm_cmplt_pd(raj, t1); obc_mask2 = _mm_cmplt_pd(raj, t2); obc_mask3 = _mm_cmplt_pd(raj, t3); uij = gmx_mm_inv_pd(t1); lij = _mm_or_pd( _mm_and_pd(obc_mask2,gmx_mm_inv_pd(t2)), _mm_andnot_pd(obc_mask2,raj_inv)); dlij = _mm_and_pd(one,obc_mask2); uij2 = _mm_mul_pd(uij, uij); uij3 = _mm_mul_pd(uij2,uij); lij2 = _mm_mul_pd(lij, lij); lij3 = _mm_mul_pd(lij2,lij); diff2 = _mm_sub_pd(uij2,lij2); lij_inv = gmx_mm_invsqrt_pd(lij2); sk2_rinv = _mm_mul_pd(sk2_ai,rinv); prod = _mm_mul_pd(onefourth,sk2_rinv); logterm = gmx_mm_log_pd(_mm_mul_pd(uij,lij_inv)); t1 = _mm_sub_pd(lij,uij); t2 = _mm_mul_pd(diff2, _mm_sub_pd(_mm_mul_pd(onefourth,r), prod)); t3 = _mm_mul_pd(half,_mm_mul_pd(rinv,logterm)); t1 = _mm_add_pd(t1,_mm_add_pd(t2,t3)); t4 = _mm_mul_pd(two,_mm_sub_pd(raj_inv,lij)); t4 = _mm_and_pd(t4,obc_mask3); t1 = _mm_mul_pd(half,_mm_add_pd(t1,t4)); GMX_MM_INCREMENT_2VALUES_PD(work+jnrA,work+jnrB,_mm_and_pd(t1,obc_mask1)); t1 = _mm_add_pd(_mm_mul_pd(half,lij2), _mm_mul_pd(prod,lij3)); t1 = _mm_sub_pd(t1, _mm_mul_pd(onefourth, _mm_add_pd(_mm_mul_pd(lij,rinv), _mm_mul_pd(lij3,r)))); t2 = _mm_mul_pd(onefourth, _mm_add_pd(_mm_mul_pd(uij,rinv), _mm_mul_pd(uij3,r))); t2 = _mm_sub_pd(t2, _mm_add_pd(_mm_mul_pd(half,uij2), _mm_mul_pd(prod,uij3))); t3 = _mm_mul_pd(_mm_mul_pd(onefourth,logterm), _mm_mul_pd(rinv,rinv)); t3 = _mm_sub_pd(t3, _mm_mul_pd(_mm_mul_pd(diff2,oneeighth), _mm_add_pd(one, _mm_mul_pd(sk2_rinv,rinv)))); t1 = _mm_mul_pd(rinv, _mm_add_pd(_mm_mul_pd(dlij,t1), _mm_add_pd(t2,t3))); dadx2 = _mm_and_pd(t1,obc_mask1); _mm_store_pd(dadx,dadx1); dadx += 2; _mm_store_pd(dadx,dadx2); dadx += 2; } /* end normal inner loop */ if(k<nj1) { jnrA = jjnr[k]; j3A = 3*jnrA; GMX_MM_LOAD_1RVEC_1POINTER_PD(x+j3A,jx,jy,jz); GMX_MM_LOAD_1VALUE_PD(gb_radius+jnrA,raj); GMX_MM_LOAD_1VALUE_PD(obc_param+jnrA,sk_aj); dx = _mm_sub_sd(ix, jx); dy = _mm_sub_sd(iy, jy); dz = _mm_sub_sd(iz, jz); rsq = gmx_mm_calc_rsq_pd(dx,dy,dz); rinv = gmx_mm_invsqrt_pd(rsq); r = _mm_mul_sd(rsq,rinv); /* Compute raj_inv aj1-4 */ raj_inv = gmx_mm_inv_pd(raj); /* Evaluate influence of atom aj -> ai */ t1 = _mm_add_sd(r,sk_aj); t2 = _mm_sub_sd(r,sk_aj); t3 = _mm_sub_sd(sk_aj,r); obc_mask1 = _mm_cmplt_sd(rai, t1); obc_mask2 = _mm_cmplt_sd(rai, t2); obc_mask3 = _mm_cmplt_sd(rai, t3); uij = gmx_mm_inv_pd(t1); lij = _mm_or_pd(_mm_and_pd(obc_mask2,gmx_mm_inv_pd(t2)), _mm_andnot_pd(obc_mask2,rai_inv)); dlij = _mm_and_pd(one,obc_mask2); uij2 = _mm_mul_sd(uij, uij); uij3 = _mm_mul_sd(uij2,uij); lij2 = _mm_mul_sd(lij, lij); lij3 = _mm_mul_sd(lij2,lij); diff2 = _mm_sub_sd(uij2,lij2); lij_inv = gmx_mm_invsqrt_pd(lij2); sk2_aj = _mm_mul_sd(sk_aj,sk_aj); sk2_rinv = _mm_mul_sd(sk2_aj,rinv); prod = _mm_mul_sd(onefourth,sk2_rinv); logterm = gmx_mm_log_pd(_mm_mul_sd(uij,lij_inv)); t1 = _mm_sub_sd(lij,uij); t2 = _mm_mul_sd(diff2, _mm_sub_sd(_mm_mul_pd(onefourth,r), prod)); t3 = _mm_mul_sd(half,_mm_mul_sd(rinv,logterm)); t1 = _mm_add_sd(t1,_mm_add_sd(t2,t3)); t4 = _mm_mul_sd(two,_mm_sub_sd(rai_inv,lij)); t4 = _mm_and_pd(t4,obc_mask3); t1 = _mm_mul_sd(half,_mm_add_sd(t1,t4)); sum_ai = _mm_add_sd(sum_ai, _mm_and_pd(t1,obc_mask1) ); t1 = _mm_add_sd(_mm_mul_sd(half,lij2), _mm_mul_sd(prod,lij3)); t1 = _mm_sub_sd(t1, _mm_mul_sd(onefourth, _mm_add_sd(_mm_mul_sd(lij,rinv), _mm_mul_sd(lij3,r)))); t2 = _mm_mul_sd(onefourth, _mm_add_sd(_mm_mul_sd(uij,rinv), _mm_mul_sd(uij3,r))); t2 = _mm_sub_sd(t2, _mm_add_sd(_mm_mul_sd(half,uij2), _mm_mul_sd(prod,uij3))); t3 = _mm_mul_sd(_mm_mul_sd(onefourth,logterm), _mm_mul_sd(rinv,rinv)); t3 = _mm_sub_sd(t3, _mm_mul_sd(_mm_mul_sd(diff2,oneeighth), _mm_add_sd(one, _mm_mul_sd(sk2_rinv,rinv)))); t1 = _mm_mul_sd(rinv, _mm_add_sd(_mm_mul_sd(dlij,t1), _mm_add_pd(t2,t3))); dadx1 = _mm_and_pd(t1,obc_mask1); /* Evaluate influence of atom ai -> aj */ t1 = _mm_add_sd(r,sk_ai); t2 = _mm_sub_sd(r,sk_ai); t3 = _mm_sub_sd(sk_ai,r); obc_mask1 = _mm_cmplt_sd(raj, t1); obc_mask2 = _mm_cmplt_sd(raj, t2); obc_mask3 = _mm_cmplt_sd(raj, t3); uij = gmx_mm_inv_pd(t1); lij = _mm_or_pd( _mm_and_pd(obc_mask2,gmx_mm_inv_pd(t2)), _mm_andnot_pd(obc_mask2,raj_inv)); dlij = _mm_and_pd(one,obc_mask2); uij2 = _mm_mul_sd(uij, uij); uij3 = _mm_mul_sd(uij2,uij); lij2 = _mm_mul_sd(lij, lij); lij3 = _mm_mul_sd(lij2,lij); diff2 = _mm_sub_sd(uij2,lij2); lij_inv = gmx_mm_invsqrt_pd(lij2); sk2_rinv = _mm_mul_sd(sk2_ai,rinv); prod = _mm_mul_sd(onefourth,sk2_rinv); logterm = gmx_mm_log_pd(_mm_mul_sd(uij,lij_inv)); t1 = _mm_sub_sd(lij,uij); t2 = _mm_mul_sd(diff2, _mm_sub_sd(_mm_mul_sd(onefourth,r), prod)); t3 = _mm_mul_sd(half,_mm_mul_sd(rinv,logterm)); t1 = _mm_add_sd(t1,_mm_add_sd(t2,t3)); t4 = _mm_mul_sd(two,_mm_sub_sd(raj_inv,lij)); t4 = _mm_and_pd(t4,obc_mask3); t1 = _mm_mul_sd(half,_mm_add_sd(t1,t4)); GMX_MM_INCREMENT_1VALUE_PD(work+jnrA,_mm_and_pd(t1,obc_mask1)); t1 = _mm_add_sd(_mm_mul_sd(half,lij2), _mm_mul_sd(prod,lij3)); t1 = _mm_sub_sd(t1, _mm_mul_sd(onefourth, _mm_add_sd(_mm_mul_sd(lij,rinv), _mm_mul_sd(lij3,r)))); t2 = _mm_mul_sd(onefourth, _mm_add_sd(_mm_mul_sd(uij,rinv), _mm_mul_sd(uij3,r))); t2 = _mm_sub_sd(t2, _mm_add_sd(_mm_mul_sd(half,uij2), _mm_mul_sd(prod,uij3))); t3 = _mm_mul_sd(_mm_mul_sd(onefourth,logterm), _mm_mul_sd(rinv,rinv)); t3 = _mm_sub_sd(t3, _mm_mul_sd(_mm_mul_sd(diff2,oneeighth), _mm_add_sd(one, _mm_mul_sd(sk2_rinv,rinv)))); t1 = _mm_mul_sd(rinv, _mm_add_sd(_mm_mul_sd(dlij,t1), _mm_add_sd(t2,t3))); dadx2 = _mm_and_pd(t1,obc_mask1); _mm_store_pd(dadx,dadx1); dadx += 2; _mm_store_pd(dadx,dadx2); dadx += 2; } gmx_mm_update_1pot_pd(sum_ai,work+ii); } /* Parallel summations */ if(PARTDECOMP(cr)) { gmx_sum(natoms, work, cr); } else if(DOMAINDECOMP(cr)) { dd_atom_sum_real(cr->dd, work); } if(gb_algorithm==egbHCT) { /* HCT */ for(i=0;i<fr->natoms_force;i++) /* PELA born->nr */ { if(born->use[i] != 0) { rr = top->atomtypes.gb_radius[md->typeA[i]]-doffset; sum = 1.0/rr - work[i]; min_rad = rr + doffset; rad = 1.0/sum; born->bRad[i] = rad > min_rad ? rad : min_rad; fr->invsqrta[i] = gmx_invsqrt(born->bRad[i]); } } /* Extra communication required for DD */ if(DOMAINDECOMP(cr)) { dd_atom_spread_real(cr->dd, born->bRad); dd_atom_spread_real(cr->dd, fr->invsqrta); } } else { /* OBC */ for(i=0;i<fr->natoms_force;i++) /* PELA born->nr */ { if(born->use[i] != 0) { rr = top->atomtypes.gb_radius[md->typeA[i]]; rr_inv2 = 1.0/rr; rr = rr-doffset; rr_inv = 1.0/rr; sum = rr * work[i]; sum2 = sum * sum; sum3 = sum2 * sum; tsum = tanh(born->obc_alpha*sum-born->obc_beta*sum2+born->obc_gamma*sum3); born->bRad[i] = rr_inv - tsum*rr_inv2; born->bRad[i] = 1.0 / born->bRad[i]; fr->invsqrta[i]=gmx_invsqrt(born->bRad[i]); tchain = rr * (born->obc_alpha-2*born->obc_beta*sum+3*born->obc_gamma*sum2); born->drobc[i] = (1.0-tsum*tsum)*tchain*rr_inv2; } } /* Extra (local) communication required for DD */ if(DOMAINDECOMP(cr)) { dd_atom_spread_real(cr->dd, born->bRad); dd_atom_spread_real(cr->dd, fr->invsqrta); dd_atom_spread_real(cr->dd, born->drobc); } } return 0; }
/* Assemble the positions of the group such that every node has all of them. * The atom indices are retrieved from anrs_loc[0..nr_loc] * Note that coll_ind[i] = i is needed in the serial case */ extern void communicate_group_positions( const t_commrec *cr, /* Pointer to MPI communication data */ rvec *xcoll, /* Collective array of positions */ ivec *shifts, /* Collective array of shifts for xcoll (can be NULL) */ ivec *extra_shifts, /* (optional) Extra shifts since last time step */ const gmx_bool bNS, /* (optional) NS step, the shifts have changed */ const rvec *x_loc, /* Local positions on this node */ const int nr, /* Total number of atoms in the group */ const int nr_loc, /* Local number of atoms in the group */ const int *anrs_loc, /* Local atom numbers */ const int *coll_ind, /* Collective index */ rvec *xcoll_old, /* (optional) Positions from the last time step, used to make group whole */ const matrix box) /* (optional) The box */ { int i; /* Zero out the groups' global position array */ clear_rvecs(nr, xcoll); /* Put the local positions that this node has into the right place of * the collective array. Note that in the serial case, coll_ind[i] = i */ for (i = 0; i < nr_loc; i++) { copy_rvec(x_loc[anrs_loc[i]], xcoll[coll_ind[i]]); } if (PAR(cr)) { /* Add the arrays from all nodes together */ gmx_sum(nr*3, xcoll[0], cr); } /* Now we have all the positions of the group in the xcoll array present on all * nodes. * * The rest of the code is for making the group whole again in case atoms changed * their PBC representation / crossed a box boundary. We only do that if the * shifts array is allocated. */ if (nullptr != shifts) { /* To make the group whole, start with a whole group and each * step move the assembled positions at closest distance to the positions * from the last step. First shift the positions with the saved shift * vectors (these are 0 when this routine is called for the first time!) */ shift_positions_group(box, xcoll, shifts, nr); /* Now check if some shifts changed since the last step. * This only needs to be done when the shifts are expected to have changed, * i.e. after neighbor searching */ if (bNS) { get_shifts_group(3, box, xcoll, nr, xcoll_old, extra_shifts); /* Shift with the additional shifts such that we get a whole group now */ shift_positions_group(box, xcoll, extra_shifts, nr); /* Add the shift vectors together for the next time step */ for (i = 0; i < nr; i++) { shifts[i][XX] += extra_shifts[i][XX]; shifts[i][YY] += extra_shifts[i][YY]; shifts[i][ZZ] += extra_shifts[i][ZZ]; } /* Store current correctly-shifted positions for comparison in the next NS time step */ for (i = 0; i < nr; i++) { copy_rvec(xcoll[i], xcoll_old[i]); } } } }
void init_orires(FILE *log,int nfa,t_iatom forceatoms[],t_iparams ip[], rvec *xref,t_mdatoms *md,t_inputrec *ir, t_commrec *mcr,t_fcdata *fcd) { int i,j,d,ex,nr,*nr_ex; real mtot; rvec com; t_oriresdata *od; od = &(fcd->orires); od->fc = ir->orires_fc; od->nex = 0; od->S = NULL; if (ir->orires_tau > 0) od->edt = exp(-ir->delta_t/ir->orires_tau); else od->edt = 0; od->edt1 = 1 - od->edt; od->exp_min_t_tau = 1.0; od->nr = nfa/3; if (od->nr == 0) return; nr_ex = NULL; for(i=0; i<nfa; i+=3) { ex = ip[forceatoms[i]].orires.ex; if (ex >= od->nex) { srenew(nr_ex,ex+1); for(j=od->nex; j<ex+1; j++) nr_ex[j] = 0; od->nex = ex+1; } nr_ex[ex]++; } snew(od->S,od->nex); /* When not doing time averaging, the instaneous and time averaged data * are indentical and the pointers can point to the same memory. */ snew(od->Dinsl,od->nr); if (mcr) snew(od->Dins,od->nr); else od->Dins = od->Dinsl; if (fabs(ir->orires_tau) < GMX_REAL_MIN) od->Dtav = od->Dins; else snew(od->Dtav,od->nr); snew(od->oinsl,od->nr); if (mcr) snew(od->oins,od->nr); else od->oins = od->oinsl; if ( fabs(ir->orires_tau) < GMX_REAL_MIN) od->otav = od->oins; else snew(od->otav,od->nr); snew(od->tmp,od->nex); snew(od->TMP,od->nex); for(ex=0; ex<od->nex; ex++) { snew(od->TMP[ex],5); for(i=0; i<5; i++) snew(od->TMP[ex][i],5); } od->nref = 0; for(i=0; i<md->nr; i++) if (md->cORF[i] == 0) od->nref++; snew(od->mref,od->nref); snew(od->xref,od->nref); snew(od->xtmp,od->nref); /* Determine the reference structure on the master node. * Copy it to the other nodes after checking multi compatibility, * so we are sure the subsystems match before copying. */ clear_rvec(com); mtot = 0.0; j = 0; for(i=0; i<md->nr; i++) { if (md->cORF[i] == 0) { od->mref[j] = md->massT[i]; if (mcr==NULL || MASTER(mcr)) { copy_rvec(xref[i],od->xref[j]); for(d=0; d<DIM; d++) com[d] += od->mref[j]*xref[i][d]; } mtot += od->mref[j]; j++; } } od->invmref = 1.0/mtot; svmul(od->invmref,com,com); if (mcr==NULL || MASTER(mcr)) for(j=0; j<od->nref; j++) rvec_dec(od->xref[j],com); fprintf(log,"Found %d orientation experiments\n",od->nex); for(i=0; i<od->nex; i++) fprintf(log," experiment %d has %d restraints\n",i+1,nr_ex[i]); sfree(nr_ex); fprintf(log," the fit group consists of %d atoms and has total mass %g\n", od->nref,mtot); if (mcr) { fprintf(log," the orientation restraints are ensemble averaged over %d systems\n",mcr->nnodes); check_multi_int(log,mcr,fcd->orires.nr, "the number of orientation restraints"); check_multi_int(log,mcr,fcd->orires.nref, "the number of fit atoms for orientation restraining"); /* Copy the reference coordinates from the master to the other nodes */ gmx_sum(DIM*fcd->orires.nref,fcd->orires.xref[0],mcr); } }
real calc_orires_dev(t_commrec *mcr, int nfa,t_iatom forceatoms[],t_iparams ip[], t_mdatoms *md,rvec x[],t_fcdata *fcd) { int fa,d,i,j,type,ex,nref; real edt,edt1,invn,pfac,r2,invr,corrfac,weight,wsv2,sw,dev; tensor *S,R,TMP; rvec5 *Dinsl,*Dins,*Dtav,*rhs; real *mref,***T; rvec *xref,*xtmp,com,r_unrot,r; t_oriresdata *od; bool bTAV; static real two_thr=2.0/3.0; od = &(fcd->orires); bTAV = (fabs(od->edt)>GMX_REAL_MIN); edt = od->edt; edt1 = od->edt1; S = od->S; Dinsl= od->Dinsl; Dins = od->Dins; Dtav = od->Dtav; T = od->TMP; rhs = od->tmp; nref = od->nref; mref = od->mref; xref = od->xref; xtmp = od->xtmp; od->exp_min_t_tau *= edt; if (mcr) invn = 1.0/mcr->nnodes; else invn = 1.0; j=0; for(i=0; i<md->nr; i++) if (md->cORF[i] == 0) { copy_rvec(x[i],xtmp[j]); for(d=0; d<DIM; d++) com[d] += mref[j]*xref[j][d]; j++; } svmul(od->invmref,com,com); for(j=0; j<nref; j++) rvec_dec(xtmp[j],com); /* Calculate the rotation matrix to rotate x to the reference orientation */ calc_fit_R(nref,mref,xref,xtmp,R); copy_mat(R,od->R); d = 0; for(fa=0; fa<nfa; fa+=3) { type = forceatoms[fa]; rvec_sub(x[forceatoms[fa+1]],x[forceatoms[fa+2]],r_unrot); mvmul(R,r_unrot,r); r2 = norm2(r); invr = invsqrt(r2); /* Calculate the prefactor for the D tensor, this includes the factor 3! */ pfac = ip[type].orires.c*invr*invr*3; for(i=0; i<ip[type].orires.pow; i++) pfac *= invr; Dinsl[d][0] = pfac*(2*r[0]*r[0] + r[1]*r[1] - r2); Dinsl[d][1] = pfac*(2*r[0]*r[1]); Dinsl[d][2] = pfac*(2*r[0]*r[2]); Dinsl[d][3] = pfac*(2*r[1]*r[1] + r[0]*r[0] - r2); Dinsl[d][4] = pfac*(2*r[1]*r[2]); if (mcr) for(i=0; i<5; i++) Dins[d][i] = Dinsl[d][i]*invn; d++; } if (mcr) gmx_sum(5*od->nr,Dins[0],mcr); /* Correction factor to correct for the lack of history for short times */ corrfac = 1.0/(1.0-od->exp_min_t_tau); /* Calculate the order tensor S for each experiment via optimization */ for(ex=0; ex<od->nex; ex++) for(i=0; i<5; i++) { rhs[ex][i] = 0; for(j=0; j<=i; j++) T[ex][i][j] = 0; } d = 0; for(fa=0; fa<nfa; fa+=3) { if (bTAV) for(i=0; i<5; i++) Dtav[d][i] = edt*Dtav[d][i] + edt1*Dins[d][i]; type = forceatoms[fa]; ex = ip[type].orires.ex; weight = ip[type].orires.kfac; /* Calculate the vector rhs and half the matrix T for the 5 equations */ for(i=0; i<5; i++) { rhs[ex][i] += Dtav[d][i]*ip[type].orires.obs*weight; for(j=0; j<=i; j++) T[ex][i][j] += Dtav[d][i]*Dtav[d][j]*weight; } d++; } /* Now we have all the data we can calculate S */ for(ex=0; ex<od->nex; ex++) { /* Correct corrfac and copy one half of T to the other half */ for(i=0; i<5; i++) { rhs[ex][i] *= corrfac; T[ex][i][i] *= sqr(corrfac); for(j=0; j<i; j++) { T[ex][i][j] *= sqr(corrfac); T[ex][j][i] = T[ex][i][j]; } } m_inv_gen(T[ex],5,T[ex]); /* Calculate the orientation tensor S for this experiment */ S[ex][0][0] = 0; S[ex][0][1] = 0; S[ex][0][2] = 0; S[ex][1][1] = 0; S[ex][1][2] = 0; for(i=0; i<5; i++) { S[ex][0][0] += 1.5*T[ex][0][i]*rhs[ex][i]; S[ex][0][1] += 1.5*T[ex][1][i]*rhs[ex][i]; S[ex][0][2] += 1.5*T[ex][2][i]*rhs[ex][i]; S[ex][1][1] += 1.5*T[ex][3][i]*rhs[ex][i]; S[ex][1][2] += 1.5*T[ex][4][i]*rhs[ex][i]; } S[ex][1][0] = S[ex][0][1]; S[ex][2][0] = S[ex][0][2]; S[ex][2][1] = S[ex][1][2]; S[ex][2][2] = -S[ex][0][0] - S[ex][1][1]; } wsv2 = 0; sw = 0; d = 0; for(fa=0; fa<nfa; fa+=3) { type = forceatoms[fa]; ex = ip[type].orires.ex; od->otav[d] = two_thr* corrfac*(S[ex][0][0]*Dtav[d][0] + S[ex][0][1]*Dtav[d][1] + S[ex][0][2]*Dtav[d][2] + S[ex][1][1]*Dtav[d][3] + S[ex][1][2]*Dtav[d][4]); if (bTAV) od->oins[d] = two_thr*(S[ex][0][0]*Dins[d][0] + S[ex][0][1]*Dins[d][1] + S[ex][0][2]*Dins[d][2] + S[ex][1][1]*Dins[d][3] + S[ex][1][2]*Dins[d][4]); if (mcr) /* When ensemble averaging is used recalculate the local orientation * for output to the energy file. */ od->oinsl[d] = two_thr* (S[ex][0][0]*Dinsl[d][0] + S[ex][0][1]*Dinsl[d][1] + S[ex][0][2]*Dinsl[d][2] + S[ex][1][1]*Dinsl[d][3] + S[ex][1][2]*Dinsl[d][4]); dev = od->otav[d] - ip[type].orires.obs; wsv2 += ip[type].orires.kfac*sqr(dev); sw += ip[type].orires.kfac; d++; } od->rmsdev = sqrt(wsv2/sw); /* Rotate the S matrices back, so we get the correct grad(tr(S D)) */ for(ex=0; ex<od->nex; ex++) { tmmul(R,S[ex],TMP); mmul(TMP,R,S[ex]); } return od->rmsdev; /* Approx. 120*nfa/3 flops */ }