Example #1
0
static void do_lincs(rvec *x,rvec *xp,matrix box,t_pbc *pbc,
                     struct gmx_lincsdata *lincsd,real *invmass,
					 t_commrec *cr,
                     real wangle,int *warn,
                     real invdt,rvec *v,
                     gmx_bool bCalcVir,tensor rmdr)
{
    int     b,i,j,k,n,iter;
    real    tmp0,tmp1,tmp2,im1,im2,mvb,rlen,len,len2,dlen2,wfac,lam;  
    rvec    dx;
    int     ncons,*bla,*blnr,*blbnb;
    rvec    *r;
    real    *blc,*blmf,*bllen,*blcc,*rhs1,*rhs2,*sol,*lambda;
    int     *nlocat;
    
    ncons  = lincsd->nc;
    bla    = lincsd->bla;
    r      = lincsd->tmpv;
    blnr   = lincsd->blnr;
    blbnb  = lincsd->blbnb;
    blc    = lincsd->blc;
    blmf   = lincsd->blmf;
    bllen  = lincsd->bllen;
    blcc   = lincsd->tmpncc;
    rhs1   = lincsd->tmp1;
    rhs2   = lincsd->tmp2;
    sol    = lincsd->tmp3;
    lambda = lincsd->lambda;
    
    if (DOMAINDECOMP(cr) && cr->dd->constraints)
    {
        nlocat = dd_constraints_nlocalatoms(cr->dd);
    }
    else if (PARTDECOMP(cr))
    {
        nlocat = pd_constraints_nlocalatoms(cr->pd);
    }
    else
    {
        nlocat = NULL;
    }
    
    *warn = 0;

    if (pbc)
    {
        /* Compute normalized i-j vectors */
        for(b=0; b<ncons; b++)
        {
            pbc_dx_aiuc(pbc,x[bla[2*b]],x[bla[2*b+1]],dx);
            unitv(dx,r[b]);
        }  
        for(b=0; b<ncons; b++)
        {
            for(n=blnr[b]; n<blnr[b+1]; n++)
            {
                blcc[n] = blmf[n]*iprod(r[b],r[blbnb[n]]);
            }
            pbc_dx_aiuc(pbc,xp[bla[2*b]],xp[bla[2*b+1]],dx);
            mvb = blc[b]*(iprod(r[b],dx) - bllen[b]);
            rhs1[b] = mvb;
            sol[b]  = mvb;
        }
    }
    else
    {
        /* Compute normalized i-j vectors */
        for(b=0; b<ncons; b++)
        {
            i = bla[2*b];
            j = bla[2*b+1];
            tmp0 = x[i][0] - x[j][0];
            tmp1 = x[i][1] - x[j][1];
            tmp2 = x[i][2] - x[j][2];
            rlen = gmx_invsqrt(tmp0*tmp0+tmp1*tmp1+tmp2*tmp2);
            r[b][0] = rlen*tmp0;
            r[b][1] = rlen*tmp1;
            r[b][2] = rlen*tmp2;
        } /* 16 ncons flops */
        
        for(b=0; b<ncons; b++)
        {
            tmp0 = r[b][0];
            tmp1 = r[b][1];
            tmp2 = r[b][2];
            len = bllen[b];
            i = bla[2*b];
            j = bla[2*b+1];
            for(n=blnr[b]; n<blnr[b+1]; n++)
            {
                k = blbnb[n];
                blcc[n] = blmf[n]*(tmp0*r[k][0] + tmp1*r[k][1] + tmp2*r[k][2]); 
            } /* 6 nr flops */
            mvb = blc[b]*(tmp0*(xp[i][0] - xp[j][0]) +
                          tmp1*(xp[i][1] - xp[j][1]) +    
                          tmp2*(xp[i][2] - xp[j][2]) - len);
            rhs1[b] = mvb;
            sol[b]  = mvb;
            /* 10 flops */
        }
        /* Together: 26*ncons + 6*nrtot flops */
    }
    
    lincs_matrix_expand(lincsd,blcc,rhs1,rhs2,sol);
    /* nrec*(ncons+2*nrtot) flops */
    
    for(b=0; b<ncons; b++)
    {
        i = bla[2*b];
        j = bla[2*b+1];
        mvb = blc[b]*sol[b];
        lambda[b] = -mvb;
        im1 = invmass[i];
        im2 = invmass[j];
        tmp0 = r[b][0]*mvb;
        tmp1 = r[b][1]*mvb;
        tmp2 = r[b][2]*mvb;
        xp[i][0] -= tmp0*im1;
        xp[i][1] -= tmp1*im1;
        xp[i][2] -= tmp2*im1;
        xp[j][0] += tmp0*im2;
        xp[j][1] += tmp1*im2;
        xp[j][2] += tmp2*im2;
    } /* 16 ncons flops */


    /*     
     ********  Correction for centripetal effects  ********  
     */
  
    wfac = cos(DEG2RAD*wangle);
    wfac = wfac*wfac;
	
    for(iter=0; iter<lincsd->nIter; iter++)
    {
        if (DOMAINDECOMP(cr) && cr->dd->constraints)
        {
            /* Communicate the corrected non-local coordinates */
            dd_move_x_constraints(cr->dd,box,xp,NULL);
        } 
		else if (PARTDECOMP(cr))
		{
			pd_move_x_constraints(cr,xp,NULL);
		}	
        
        for(b=0; b<ncons; b++)
        {
            len = bllen[b];
            if (pbc)
            {
                pbc_dx_aiuc(pbc,xp[bla[2*b]],xp[bla[2*b+1]],dx);
            }
            else
            {
                rvec_sub(xp[bla[2*b]],xp[bla[2*b+1]],dx);
            }
            len2 = len*len;
            dlen2 = 2*len2 - norm2(dx);
            if (dlen2 < wfac*len2 && (nlocat==NULL || nlocat[b]))
            {
                *warn = b;
            }
            if (dlen2 > 0)
            {
                mvb = blc[b]*(len - dlen2*gmx_invsqrt(dlen2));
            }
            else
            {
                mvb = blc[b]*len;
            }
            rhs1[b] = mvb;
            sol[b]  = mvb;
        } /* 20*ncons flops */
        
        lincs_matrix_expand(lincsd,blcc,rhs1,rhs2,sol);
        /* nrec*(ncons+2*nrtot) flops */
        
        for(b=0; b<ncons; b++)
        {
            i = bla[2*b];
            j = bla[2*b+1];
            lam = lambda[b];
            mvb = blc[b]*sol[b];
            lambda[b] = lam - mvb;
            im1 = invmass[i];
            im2 = invmass[j];
            tmp0 = r[b][0]*mvb;
            tmp1 = r[b][1]*mvb;
            tmp2 = r[b][2]*mvb;
            xp[i][0] -= tmp0*im1;
            xp[i][1] -= tmp1*im1;
            xp[i][2] -= tmp2*im1;
            xp[j][0] += tmp0*im2;
            xp[j][1] += tmp1*im2;
            xp[j][2] += tmp2*im2;
        } /* 17 ncons flops */
    } /* nit*ncons*(37+9*nrec) flops */
    
    if (v)
    {
        /* Correct the velocities */
        for(b=0; b<ncons; b++)
        {
            i = bla[2*b];
            j = bla[2*b+1];
            im1 = invmass[i]*lambda[b]*invdt;
            im2 = invmass[j]*lambda[b]*invdt;
            v[i][0] += im1*r[b][0];
            v[i][1] += im1*r[b][1];
            v[i][2] += im1*r[b][2];
            v[j][0] -= im2*r[b][0];
            v[j][1] -= im2*r[b][1];
            v[j][2] -= im2*r[b][2];
        } /* 16 ncons flops */
    }
    
    if (nlocat)
    {
        /* Only account for local atoms */
        for(b=0; b<ncons; b++)
        {
            lambda[b] *= 0.5*nlocat[b];
        }
    }
    
    if (bCalcVir)
    {
        /* Constraint virial */
        for(b=0; b<ncons; b++)
        {
            tmp0 = bllen[b]*lambda[b];
            for(i=0; i<DIM; i++)
            {
                tmp1 = tmp0*r[b][i];
                for(j=0; j<DIM; j++)
                {
                    rmdr[i][j] -= tmp1*r[b][j];
                }
            }
        } /* 22 ncons flops */
    }
    
    /* Total:
     * 26*ncons + 6*nrtot + nrec*(ncons+2*nrtot)
     * + nit * (20*ncons + nrec*(ncons+2*nrtot) + 17 ncons)
     *
     * (26+nrec)*ncons + (6+2*nrec)*nrtot
     * + nit * ((37+nrec)*ncons + 2*nrec*nrtot)
     * if nit=1
     * (63+nrec)*ncons + (6+4*nrec)*nrtot
     */
}
Example #2
0
void set_lincs(t_idef *idef,t_mdatoms *md,
               gmx_bool bDynamics,t_commrec *cr,
               struct gmx_lincsdata *li)
{
    int      start,natoms,nflexcon;
    t_blocka at2con;
    t_iatom  *iatom;
    int      i,k,ncc_alloc,ni,con,nconnect,concon;
    int      type,a1,a2;
    real     lenA=0,lenB;
    gmx_bool     bLocal;

    li->nc = 0;
    li->ncc = 0;
		
    /* This is the local topology, so there are only F_CONSTR constraints */
    if (idef->il[F_CONSTR].nr == 0)
    {
        /* There are no constraints,
         * we do not need to fill any data structures.
         */
        return;
    }
    
    if (debug)
    {
        fprintf(debug,"Building the LINCS connectivity\n");
    }
    
    if (DOMAINDECOMP(cr))
    {
        if (cr->dd->constraints)
        {
            dd_get_constraint_range(cr->dd,&start,&natoms);
        }
        else
        {
            natoms = cr->dd->nat_home;
        }
        start = 0;
    }
    else if(PARTDECOMP(cr))
	{
		pd_get_constraint_range(cr->pd,&start,&natoms);
	}
	else
    {
        start  = md->start;
        natoms = md->homenr;
    }
    at2con = make_at2con(start,natoms,idef->il,idef->iparams,bDynamics,
                         &nflexcon);

	
    if (idef->il[F_CONSTR].nr/3 > li->nc_alloc || li->nc_alloc == 0)
    {
        li->nc_alloc = over_alloc_dd(idef->il[F_CONSTR].nr/3);
        srenew(li->bllen0,li->nc_alloc);
        srenew(li->ddist,li->nc_alloc);
        srenew(li->bla,2*li->nc_alloc);
        srenew(li->blc,li->nc_alloc);
        srenew(li->blc1,li->nc_alloc);
        srenew(li->blnr,li->nc_alloc+1);
        srenew(li->bllen,li->nc_alloc);
        srenew(li->tmpv,li->nc_alloc);
        srenew(li->tmp1,li->nc_alloc);
        srenew(li->tmp2,li->nc_alloc);
        srenew(li->tmp3,li->nc_alloc);
        srenew(li->lambda,li->nc_alloc);
        if (li->ncg_triangle > 0)
        {
            /* This is allocating too much, but it is difficult to improve */
            srenew(li->triangle,li->nc_alloc);
            srenew(li->tri_bits,li->nc_alloc);
        }
    }
    
    iatom = idef->il[F_CONSTR].iatoms;
    
    ncc_alloc = li->ncc_alloc;
    li->blnr[0] = 0;
    
    ni = idef->il[F_CONSTR].nr/3;

    con = 0;
    nconnect = 0;
    li->blnr[con] = nconnect;
    for(i=0; i<ni; i++)
    {
        bLocal = TRUE;
        type = iatom[3*i];
        a1   = iatom[3*i+1];
        a2   = iatom[3*i+2];
        lenA = idef->iparams[type].constr.dA;
        lenB = idef->iparams[type].constr.dB;
        /* Skip the flexible constraints when not doing dynamics */
        if (bDynamics || lenA!=0 || lenB!=0)
        {
            li->bllen0[con]  = lenA;
            li->ddist[con]   = lenB - lenA;
            /* Set the length to the topology A length */
            li->bllen[con]   = li->bllen0[con];
            li->bla[2*con]   = a1;
            li->bla[2*con+1] = a2;
            /* Construct the constraint connection matrix blbnb */
            for(k=at2con.index[a1-start]; k<at2con.index[a1-start+1]; k++)
            {
                concon = at2con.a[k];
                if (concon != i)
                {
                    if (nconnect >= ncc_alloc)
                    {
                        ncc_alloc = over_alloc_small(nconnect+1);
                        srenew(li->blbnb,ncc_alloc);
                    }
                    li->blbnb[nconnect++] = concon;
                }
            }
            for(k=at2con.index[a2-start]; k<at2con.index[a2-start+1]; k++)
            {
                concon = at2con.a[k];
                if (concon != i)
                {
                    if (nconnect+1 > ncc_alloc)
                    {
                        ncc_alloc = over_alloc_small(nconnect+1);
                        srenew(li->blbnb,ncc_alloc);
                    }
                    li->blbnb[nconnect++] = concon;
                }
            }
            li->blnr[con+1] = nconnect;
            
            if (cr->dd == NULL)
            {
                /* Order the blbnb matrix to optimize memory access */
                qsort(&(li->blbnb[li->blnr[con]]),li->blnr[con+1]-li->blnr[con],
                      sizeof(li->blbnb[0]),int_comp);
            }
            /* Increase the constraint count */
            con++;
        }
    }
    
    done_blocka(&at2con);

    /* This is the real number of constraints,
     * without dynamics the flexible constraints are not present.
     */
    li->nc = con;
    
    li->ncc = li->blnr[con];
    if (cr->dd == NULL)
    {
        /* Since the matrix is static, we can free some memory */
        ncc_alloc = li->ncc;
        srenew(li->blbnb,ncc_alloc);
    }
    
    if (ncc_alloc > li->ncc_alloc)
    {
        li->ncc_alloc = ncc_alloc;
        srenew(li->blmf,li->ncc_alloc);
        srenew(li->blmf1,li->ncc_alloc);
        srenew(li->tmpncc,li->ncc_alloc);
    }
    
    if (debug)
    {
        fprintf(debug,"Number of constraints is %d, couplings %d\n",
                li->nc,li->ncc);
    }

    set_lincs_matrix(li,md->invmass,md->lambda);
}
Example #3
0
int relax_shell_flexcon(FILE *fplog,t_commrec *cr,gmx_bool bVerbose,
			gmx_large_int_t mdstep,t_inputrec *inputrec,
			gmx_bool bDoNS,int force_flags,
			gmx_bool bStopCM,
			gmx_localtop_t *top,
			gmx_mtop_t* mtop,
			gmx_constr_t constr,
			gmx_enerdata_t *enerd,t_fcdata *fcd,
			t_state *state,rvec f[],
			tensor force_vir,
			t_mdatoms *md,
			t_nrnb *nrnb,gmx_wallcycle_t wcycle,
			t_graph *graph,
			gmx_groups_t *groups,
			struct gmx_shellfc *shfc,
			t_forcerec *fr,
			gmx_bool bBornRadii,
			double t,rvec mu_tot,
			int natoms,gmx_bool *bConverged,
			gmx_vsite_t *vsite,
			FILE *fp_field)
{
  int    nshell;
  t_shell *shell;
  t_idef *idef;
  rvec   *pos[2],*force[2],*acc_dir=NULL,*x_old=NULL;
  real   Epot[2],df[2];
  rvec   dx;
  real   sf_dir,invdt;
  real   ftol,xiH,xiS,dum=0;
  char   sbuf[22];
  gmx_bool   bCont,bInit;
  int    nat,dd_ac0,dd_ac1=0,i;
  int    start=md->start,homenr=md->homenr,end=start+homenr,cg0,cg1;
  int    nflexcon,g,number_steps,d,Min=0,count=0;
#define  Try (1-Min)             /* At start Try = 1 */

  bCont        = (mdstep == inputrec->init_step) && inputrec->bContinuation;
  bInit        = (mdstep == inputrec->init_step) || shfc->bRequireInit;
  ftol         = inputrec->em_tol;
  number_steps = inputrec->niter;
  nshell       = shfc->nshell;
  shell        = shfc->shell;
  nflexcon     = shfc->nflexcon;

  idef = &top->idef;

  if (DOMAINDECOMP(cr)) {
    nat = dd_natoms_vsite(cr->dd);
    if (nflexcon > 0) {
      dd_get_constraint_range(cr->dd,&dd_ac0,&dd_ac1);
      nat = max(nat,dd_ac1);
    }
  } else {
    nat = state->natoms;
  }

  if (nat > shfc->x_nalloc) {
    /* Allocate local arrays */
    shfc->x_nalloc = over_alloc_dd(nat);
    for(i=0; (i<2); i++) {
      srenew(shfc->x[i],shfc->x_nalloc);
      srenew(shfc->f[i],shfc->x_nalloc);
    }
  }
  for(i=0; (i<2); i++) {
    pos[i]   = shfc->x[i];
    force[i] = shfc->f[i];
  }
     
  /* With particle decomposition this code only works
   * when all particles involved with each shell are in the same cg.
   */

  if (bDoNS && inputrec->ePBC != epbcNONE && !DOMAINDECOMP(cr)) {
    /* This is the only time where the coordinates are used
     * before do_force is called, which normally puts all
     * charge groups in the box.
     */
    if (PARTDECOMP(cr)) {
      pd_cg_range(cr,&cg0,&cg1);
    } else {
      cg0 = 0;
      cg1 = top->cgs.nr;
    }
    put_charge_groups_in_box(fplog,cg0,cg1,fr->ePBC,state->box,
			     &(top->cgs),state->x,fr->cg_cm);
    if (graph)
      mk_mshift(fplog,graph,fr->ePBC,state->box,state->x);
  }

  /* After this all coordinate arrays will contain whole molecules */
  if (graph)
    shift_self(graph,state->box,state->x);

  if (nflexcon) {
    if (nat > shfc->flex_nalloc) {
      shfc->flex_nalloc = over_alloc_dd(nat);
      srenew(shfc->acc_dir,shfc->flex_nalloc);
      srenew(shfc->x_old,shfc->flex_nalloc);
    }
    acc_dir = shfc->acc_dir;
    x_old   = shfc->x_old;
    for(i=0; i<homenr; i++) {
      for(d=0; d<DIM; d++)
        shfc->x_old[i][d] =
	  state->x[start+i][d] - state->v[start+i][d]*inputrec->delta_t;
    }
  }

  /* Do a prediction of the shell positions */
  if (shfc->bPredict && !bCont) {
    predict_shells(fplog,state->x,state->v,inputrec->delta_t,nshell,shell,
		   md->massT,NULL,bInit);
  }

  /* do_force expected the charge groups to be in the box */
  if (graph)
    unshift_self(graph,state->box,state->x);

  /* Calculate the forces first time around */
  if (gmx_debug_at) {
    pr_rvecs(debug,0,"x b4 do_force",state->x + start,homenr);
  }
  do_force(fplog,cr,inputrec,mdstep,nrnb,wcycle,top,mtop,groups,
	   state->box,state->x,&state->hist,
	   force[Min],force_vir,md,enerd,fcd,
	   state->lambda,graph,
	   fr,vsite,mu_tot,t,fp_field,NULL,bBornRadii,
	   (bDoNS ? GMX_FORCE_NS : 0) | force_flags);

  sf_dir = 0;
  if (nflexcon) {
    init_adir(fplog,shfc,
	      constr,idef,inputrec,cr,dd_ac1,mdstep,md,start,end,
	      shfc->x_old-start,state->x,state->x,force[Min],
	      shfc->acc_dir-start,
	      fr->bMolPBC,state->box,state->lambda,&dum,nrnb);

    for(i=start; i<end; i++)
      sf_dir += md->massT[i]*norm2(shfc->acc_dir[i-start]);
  }

  Epot[Min] = enerd->term[F_EPOT];

  df[Min]=rms_force(cr,shfc->f[Min],nshell,shell,nflexcon,&sf_dir,&Epot[Min]);
  df[Try]=0;
  if (debug) {
    fprintf(debug,"df = %g  %g\n",df[Min],df[Try]);
  }

  if (gmx_debug_at) {
    pr_rvecs(debug,0,"force0",force[Min],md->nr);
  }

  if (nshell+nflexcon > 0) {
    /* Copy x to pos[Min] & pos[Try]: during minimization only the
     * shell positions are updated, therefore the other particles must
     * be set here.
     */
    memcpy(pos[Min],state->x,nat*sizeof(state->x[0]));
    memcpy(pos[Try],state->x,nat*sizeof(state->x[0]));
  }
  
  if (bVerbose && MASTER(cr))
    print_epot(stdout,mdstep,0,Epot[Min],df[Min],nflexcon,sf_dir);

  if (debug) {
    fprintf(debug,"%17s: %14.10e\n",
	    interaction_function[F_EKIN].longname,enerd->term[F_EKIN]);
    fprintf(debug,"%17s: %14.10e\n",
	    interaction_function[F_EPOT].longname,enerd->term[F_EPOT]);
    fprintf(debug,"%17s: %14.10e\n",
	    interaction_function[F_ETOT].longname,enerd->term[F_ETOT]);
    fprintf(debug,"SHELLSTEP %s\n",gmx_step_str(mdstep,sbuf));
  }
  
  /* First check whether we should do shells, or whether the force is 
   * low enough even without minimization.
   */
  *bConverged = (df[Min] < ftol);
  
  for(count=1; (!(*bConverged) && (count < number_steps)); count++) {
    if (vsite)
      construct_vsites(fplog,vsite,pos[Min],nrnb,inputrec->delta_t,state->v,
		       idef->iparams,idef->il,
		       fr->ePBC,fr->bMolPBC,graph,cr,state->box);
     
    if (nflexcon) {
      init_adir(fplog,shfc,
		constr,idef,inputrec,cr,dd_ac1,mdstep,md,start,end,
		x_old-start,state->x,pos[Min],force[Min],acc_dir-start,
		fr->bMolPBC,state->box,state->lambda,&dum,nrnb);
      
      directional_sd(fplog,pos[Min],pos[Try],acc_dir-start,start,end,
		     fr->fc_stepsize);
    }
    
    /* New positions, Steepest descent */
    shell_pos_sd(fplog,pos[Min],pos[Try],force[Min],nshell,shell,count); 

    /* do_force expected the charge groups to be in the box */
    if (graph)
      unshift_self(graph,state->box,pos[Try]);

    if (gmx_debug_at) {
      pr_rvecs(debug,0,"RELAX: pos[Min]  ",pos[Min] + start,homenr);
      pr_rvecs(debug,0,"RELAX: pos[Try]  ",pos[Try] + start,homenr);
    }
    /* Try the new positions */
    do_force(fplog,cr,inputrec,1,nrnb,wcycle,
	     top,mtop,groups,state->box,pos[Try],&state->hist,
	     force[Try],force_vir,
	     md,enerd,fcd,state->lambda,graph,
	     fr,vsite,mu_tot,t,fp_field,NULL,bBornRadii,
	     force_flags);
    
    if (gmx_debug_at) {
      pr_rvecs(debug,0,"RELAX: force[Min]",force[Min] + start,homenr);
      pr_rvecs(debug,0,"RELAX: force[Try]",force[Try] + start,homenr);
    }
    sf_dir = 0;
    if (nflexcon) {
      init_adir(fplog,shfc,
		constr,idef,inputrec,cr,dd_ac1,mdstep,md,start,end,
		x_old-start,state->x,pos[Try],force[Try],acc_dir-start,
		fr->bMolPBC,state->box,state->lambda,&dum,nrnb);

      for(i=start; i<end; i++)
	sf_dir += md->massT[i]*norm2(acc_dir[i-start]);
    }

    Epot[Try] = enerd->term[F_EPOT]; 
    
    df[Try]=rms_force(cr,force[Try],nshell,shell,nflexcon,&sf_dir,&Epot[Try]);

    if (debug)
      fprintf(debug,"df = %g  %g\n",df[Min],df[Try]);

    if (debug) {
      if (gmx_debug_at)
	pr_rvecs(debug,0,"F na do_force",force[Try] + start,homenr);
      if (gmx_debug_at) {
	fprintf(debug,"SHELL ITER %d\n",count);
	dump_shells(debug,pos[Try],force[Try],ftol,nshell,shell);
      }
    }

    if (bVerbose && MASTER(cr))
      print_epot(stdout,mdstep,count,Epot[Try],df[Try],nflexcon,sf_dir);
      
    *bConverged = (df[Try] < ftol);
    
    if ((df[Try] < df[Min])) {
      if (debug)
	fprintf(debug,"Swapping Min and Try\n");
      if (nflexcon) {
	/* Correct the velocities for the flexible constraints */
	invdt = 1/inputrec->delta_t;
	for(i=start; i<end; i++) {
	  for(d=0; d<DIM; d++)
	    state->v[i][d] += (pos[Try][i][d] - pos[Min][i][d])*invdt;
	}
      }
      Min  = Try;
    } else {
      decrease_step_size(nshell,shell);
    }
  }
  if (MASTER(cr) && !(*bConverged)) {
    /* Note that the energies and virial are incorrect when not converged */
    if (fplog)
      fprintf(fplog,
	      "step %s: EM did not converge in %d iterations, RMS force %.3f\n",
	      gmx_step_str(mdstep,sbuf),number_steps,df[Min]);
    fprintf(stderr,
	    "step %s: EM did not converge in %d iterations, RMS force %.3f\n",
	    gmx_step_str(mdstep,sbuf),number_steps,df[Min]);
  }

  /* Copy back the coordinates and the forces */
  memcpy(state->x,pos[Min],nat*sizeof(state->x[0]));
  memcpy(f,force[Min],nat*sizeof(f[0]));

  return count; 
}
Example #4
0
gmx_bool constrain_lincs(FILE *fplog,gmx_bool bLog,gmx_bool bEner,
                     t_inputrec *ir,
                     gmx_large_int_t step,
                     struct gmx_lincsdata *lincsd,t_mdatoms *md,
                     t_commrec *cr, 
                     rvec *x,rvec *xprime,rvec *min_proj,matrix box,
                     real lambda,real *dvdlambda,
                     real invdt,rvec *v,
                     gmx_bool bCalcVir,tensor rmdr,
                     int econq,
                     t_nrnb *nrnb,
                     int maxwarn,int *warncount)
{
    char  buf[STRLEN],buf2[22],buf3[STRLEN];
    int   i,warn,p_imax,error;
    real  ncons_loc,p_ssd,p_max;
    t_pbc pbc,*pbc_null;
    rvec  dx;
    gmx_bool  bOK;
    
    bOK = TRUE;
    
    if (lincsd->nc == 0 && cr->dd == NULL)
    {
        if (bLog || bEner)
        {
            lincsd->rmsd_data[0] = 0;
            if (ir->eI == eiSD2 && v == NULL)
            {
                i = 2;
            }
            else
            {
                i = 1;
            }
            lincsd->rmsd_data[i] = 0;
        }
        
        return bOK;
    }
    
    /* We do not need full pbc when constraints do not cross charge groups,
     * i.e. when dd->constraint_comm==NULL
     */
    if ((cr->dd || ir->bPeriodicMols) && !(cr->dd && cr->dd->constraint_comm==NULL))
    {
        /* With pbc=screw the screw has been changed to a shift
         * by the constraint coordinate communication routine,
         * so that here we can use normal pbc.
         */
        pbc_null = set_pbc_dd(&pbc,ir->ePBC,cr->dd,FALSE,box);
    }
    else
    {
        pbc_null = NULL;
    }
    if (cr->dd)
    {
        /* Communicate the coordinates required for the non-local constraints */
        dd_move_x_constraints(cr->dd,box,x,xprime);
        /* dump_conf(dd,lincsd,NULL,"con",TRUE,xprime,box); */
    }
	else if (PARTDECOMP(cr))
	{
		pd_move_x_constraints(cr,x,xprime);
	}	
	
    if (econq == econqCoord)
    {
        if (ir->efep != efepNO)
        {
            if (md->nMassPerturbed && lincsd->matlam != md->lambda)
            {
                set_lincs_matrix(lincsd,md->invmass,md->lambda);
            }
            
            for(i=0; i<lincsd->nc; i++)
            {
                lincsd->bllen[i] = lincsd->bllen0[i] + lambda*lincsd->ddist[i];
            }
        }
        
        if (lincsd->ncg_flex)
        {
            /* Set the flexible constraint lengths to the old lengths */
            if (pbc_null)
            {
                for(i=0; i<lincsd->nc; i++)
                {
                    if (lincsd->bllen[i] == 0) {
                        pbc_dx_aiuc(pbc_null,x[lincsd->bla[2*i]],x[lincsd->bla[2*i+1]],dx);
                        lincsd->bllen[i] = norm(dx);
                    }
                }
            }
            else
            {
                for(i=0; i<lincsd->nc; i++)
                {
                    if (lincsd->bllen[i] == 0)
                    {
                        lincsd->bllen[i] =
                            sqrt(distance2(x[lincsd->bla[2*i]],
                                           x[lincsd->bla[2*i+1]]));
                    }
                }
            }
        }
        
        if (bLog && fplog)
        {
            cconerr(cr->dd,lincsd->nc,lincsd->bla,lincsd->bllen,xprime,pbc_null,
                    &ncons_loc,&p_ssd,&p_max,&p_imax);
        }
        
        do_lincs(x,xprime,box,pbc_null,lincsd,md->invmass,cr,
                 ir->LincsWarnAngle,&warn,
                 invdt,v,bCalcVir,rmdr);
        
        if (ir->efep != efepNO)
        {
            real dt_2,dvdl=0;
            
            dt_2 = 1.0/(ir->delta_t*ir->delta_t);
            for(i=0; (i<lincsd->nc); i++)
            {
                dvdl += lincsd->lambda[i]*dt_2*lincsd->ddist[i];
            }
            *dvdlambda += dvdl;
		}
        
        if (bLog && fplog && lincsd->nc > 0)
        {
            fprintf(fplog,"   Rel. Constraint Deviation:  RMS         MAX     between atoms\n");
            fprintf(fplog,"       Before LINCS          %.6f    %.6f %6d %6d\n",
                    sqrt(p_ssd/ncons_loc),p_max,
                    ddglatnr(cr->dd,lincsd->bla[2*p_imax]),
                    ddglatnr(cr->dd,lincsd->bla[2*p_imax+1]));
        }
        if (bLog || bEner)
        {
            cconerr(cr->dd,lincsd->nc,lincsd->bla,lincsd->bllen,xprime,pbc_null,
                    &ncons_loc,&p_ssd,&p_max,&p_imax);
            /* Check if we are doing the second part of SD */
            if (ir->eI == eiSD2 && v == NULL)
            {
                i = 2;
            }
            else
            {
                i = 1;
            }
            lincsd->rmsd_data[0] = ncons_loc;
            lincsd->rmsd_data[i] = p_ssd;
        }
        else
        {
            lincsd->rmsd_data[0] = 0;
            lincsd->rmsd_data[1] = 0;
            lincsd->rmsd_data[2] = 0;
        }
        if (bLog && fplog && lincsd->nc > 0)
        {
            fprintf(fplog,
                    "        After LINCS          %.6f    %.6f %6d %6d\n\n",
                    sqrt(p_ssd/ncons_loc),p_max,
                    ddglatnr(cr->dd,lincsd->bla[2*p_imax]),
                    ddglatnr(cr->dd,lincsd->bla[2*p_imax+1]));
        }
        
        if (warn > 0)
        {
            if (maxwarn >= 0)
            {
                cconerr(cr->dd,lincsd->nc,lincsd->bla,lincsd->bllen,xprime,pbc_null,
                        &ncons_loc,&p_ssd,&p_max,&p_imax);
                if (MULTISIM(cr))
                {
                    sprintf(buf3," in simulation %d", cr->ms->sim);
                }
                else
                {
                    buf3[0] = 0;
                }
                sprintf(buf,"\nStep %s, time %g (ps)  LINCS WARNING%s\n"
                        "relative constraint deviation after LINCS:\n"
                        "rms %.6f, max %.6f (between atoms %d and %d)\n",
                        gmx_step_str(step,buf2),ir->init_t+step*ir->delta_t,
                        buf3,
                        sqrt(p_ssd/ncons_loc),p_max,
                        ddglatnr(cr->dd,lincsd->bla[2*p_imax]),
                        ddglatnr(cr->dd,lincsd->bla[2*p_imax+1]));
                if (fplog)
                {
                    fprintf(fplog,"%s",buf);
                }
                fprintf(stderr,"%s",buf);
                lincs_warning(fplog,cr->dd,x,xprime,pbc_null,
                              lincsd->nc,lincsd->bla,lincsd->bllen,
                              ir->LincsWarnAngle,maxwarn,warncount);
            }
            bOK = (p_max < 0.5);
        }
        
        if (lincsd->ncg_flex) {
            for(i=0; (i<lincsd->nc); i++)
                if (lincsd->bllen0[i] == 0 && lincsd->ddist[i] == 0)
                    lincsd->bllen[i] = 0;
        }
    } 
    else
    {
        do_lincsp(x,xprime,min_proj,pbc_null,lincsd,md->invmass,econq,dvdlambda,
                  bCalcVir,rmdr);
    }
  
    /* count assuming nit=1 */
    inc_nrnb(nrnb,eNR_LINCS,lincsd->nc);
    inc_nrnb(nrnb,eNR_LINCSMAT,(2+lincsd->nOrder)*lincsd->ncc);
    if (lincsd->ntriangle > 0)
    {
        inc_nrnb(nrnb,eNR_LINCSMAT,lincsd->nOrder*lincsd->ncc_triangle);
    }
    if (v)
    {
        inc_nrnb(nrnb,eNR_CONSTR_V,lincsd->nc*2);
    }
    if (bCalcVir)
    {
        inc_nrnb(nrnb,eNR_CONSTR_VIR,lincsd->nc);
    }

    return bOK;
}
void finish_run(FILE *fplog,t_commrec *cr,char *confout,
		t_inputrec *inputrec,
		t_nrnb nrnb[],gmx_wallcycle_t wcycle,
		double nodetime,double realtime,int nsteps_done,
		bool bWriteStat)
{
  int    i,j;
  t_nrnb *nrnb_all=NULL,ntot;
  real   delta_t;
  double nbfs,mflop;
  double cycles[ewcNR];
#ifdef GMX_MPI
  int    sender;
  double nrnb_buf[4];
  MPI_Status status;
#endif

  wallcycle_sum(cr,wcycle,cycles);

  if (cr->nnodes > 1) {
    if (SIMMASTER(cr))
      snew(nrnb_all,cr->nnodes);
#ifdef GMX_MPI
    MPI_Gather(nrnb,sizeof(t_nrnb),MPI_BYTE,
	       nrnb_all,sizeof(t_nrnb),MPI_BYTE,
	       0,cr->mpi_comm_mysim);
#endif  
  } else {
    nrnb_all = nrnb;
  }
    
  if (SIMMASTER(cr)) {
    for(i=0; (i<eNRNB); i++)
      ntot.n[i]=0;
    for(i=0; (i<cr->nnodes); i++)
      for(j=0; (j<eNRNB); j++)
	ntot.n[j] += nrnb_all[i].n[j];

    print_flop(fplog,&ntot,&nbfs,&mflop);
    if (nrnb_all) {
      sfree(nrnb_all);
    }
  }

  if ((cr->duty & DUTY_PP) && DOMAINDECOMP(cr)) {
    print_dd_statistics(cr,inputrec,fplog);
  }

  if (SIMMASTER(cr)) {
    if (PARTDECOMP(cr)) {
      pr_load(fplog,cr,nrnb_all);
    }

    wallcycle_print(fplog,cr->nnodes,cr->npmenodes,realtime,wcycle,cycles);

    if (EI_DYNAMICS(inputrec->eI)) {
      delta_t = inputrec->delta_t;
    } else {
      delta_t = 0;
    }
    
    if (fplog) {
      print_perf(fplog,nodetime,realtime,cr->nnodes-cr->npmenodes,
		 nsteps_done,delta_t,nbfs,mflop);
    }
    if (bWriteStat) {
      print_perf(stderr,nodetime,realtime,cr->nnodes-cr->npmenodes,
		 nsteps_done,delta_t,nbfs,mflop);
    }

    /*
    runtime=inputrec->nsteps*inputrec->delta_t;
    if (bWriteStat) {
      if (cr->nnodes == 1)
	fprintf(stderr,"\n\n");
      print_perf(stderr,nodetime,realtime,runtime,&ntot,
		 cr->nnodes-cr->npmenodes,FALSE);
    }
    wallcycle_print(fplog,cr->nnodes,cr->npmenodes,realtime,wcycle,cycles);
    print_perf(fplog,nodetime,realtime,runtime,&ntot,cr->nnodes-cr->npmenodes,
	       TRUE);
    if (PARTDECOMP(cr))
      pr_load(fplog,cr,nrnb_all);
    if (cr->nnodes > 1)
      sfree(nrnb_all);
    */
  }
}
void do_force(FILE *fplog,t_commrec *cr,
	      t_inputrec *inputrec,
	      int step,t_nrnb *nrnb,gmx_wallcycle_t wcycle,
	      gmx_localtop_t *top,
	      gmx_groups_t *groups,
	      matrix box,rvec x[],history_t *hist,
	      rvec f[],rvec buf[],
	      tensor vir_force,
	      t_mdatoms *mdatoms,
	      gmx_enerdata_t *enerd,t_fcdata *fcd,
	      real lambda,t_graph *graph,
	      t_forcerec *fr,gmx_vsite_t *vsite,rvec mu_tot,
	      real t,FILE *field,gmx_edsam_t ed,
	      int flags)
{
  static rvec box_size;
  int    cg0,cg1,i,j;
  int    start,homenr;
  static double mu[2*DIM]; 
  rvec   mu_tot_AB[2];
  bool   bSepDVDL,bStateChanged,bNS,bFillGrid,bCalcCGCM,bBS,bDoForces;
  matrix boxs;
  real   e,v,dvdl;
  t_pbc  pbc;
  float  cycles_ppdpme,cycles_pme,cycles_force;
  
  start  = mdatoms->start;
  homenr = mdatoms->homenr;

  bSepDVDL = (fr->bSepDVDL && do_per_step(step,inputrec->nstlog));

  clear_mat(vir_force);
  
  if (PARTDECOMP(cr)) {
    pd_cg_range(cr,&cg0,&cg1);
  } else {
    cg0 = 0;
    if (DOMAINDECOMP(cr))
      cg1 = cr->dd->ncg_tot;
    else
      cg1 = top->cgs.nr;
    if (fr->n_tpi > 0)
      cg1--;
  }

  bStateChanged = (flags & GMX_FORCE_STATECHANGED);
  bNS           = (flags & GMX_FORCE_NS);
  bFillGrid     = (bNS && bStateChanged);
  bCalcCGCM     = (bFillGrid && !DOMAINDECOMP(cr));
  bDoForces     = (flags & GMX_FORCE_FORCES);

  if (bStateChanged) {
    update_forcerec(fplog,fr,box);
    
    /* Calculate total (local) dipole moment in a temporary common array. 
     * This makes it possible to sum them over nodes faster.
     */
    calc_mu(start,homenr,
	    x,mdatoms->chargeA,mdatoms->chargeB,mdatoms->nChargePerturbed,
	    mu,mu+DIM);
  }
  
  if (fr->ePBC != epbcNONE) { 
    /* Compute shift vectors every step,
     * because of pressure coupling or box deformation!
     */
    if (DYNAMIC_BOX(*inputrec) && bStateChanged)
      calc_shifts(box,fr->shift_vec);
    
    if (bCalcCGCM) { 
      put_charge_groups_in_box(fplog,cg0,cg1,fr->ePBC,box,
			       &(top->cgs),x,fr->cg_cm);
      inc_nrnb(nrnb,eNR_CGCM,homenr);
      inc_nrnb(nrnb,eNR_RESETX,cg1-cg0);
    } 
    else if (EI_ENERGY_MINIMIZATION(inputrec->eI) && graph) {
      unshift_self(graph,box,x);
    }
  } 
  else if (bCalcCGCM) {
    calc_cgcm(fplog,cg0,cg1,&(top->cgs),x,fr->cg_cm);
    inc_nrnb(nrnb,eNR_CGCM,homenr);
  }
  
  if (bCalcCGCM) {
    if (PAR(cr)) {
      move_cgcm(fplog,cr,fr->cg_cm);
    }
    if (gmx_debug_at)
      pr_rvecs(debug,0,"cgcm",fr->cg_cm,top->cgs.nr);
  }

#ifdef GMX_MPI
  if (!(cr->duty & DUTY_PME)) {
    /* Send particle coordinates to the pme nodes.
     * Since this is only implemented for domain decomposition
     * and domain decomposition does not use the graph,
     * we do not need to worry about shifting.
     */    

    wallcycle_start(wcycle,ewcPP_PMESENDX);
    GMX_MPE_LOG(ev_send_coordinates_start);

    bBS = (inputrec->nwall == 2);
    if (bBS) {
      copy_mat(box,boxs);
      svmul(inputrec->wall_ewald_zfac,boxs[ZZ],boxs[ZZ]);
    }

    gmx_pme_send_x(cr,bBS ? boxs : box,x,mdatoms->nChargePerturbed,lambda);

    GMX_MPE_LOG(ev_send_coordinates_finish);
    wallcycle_stop(wcycle,ewcPP_PMESENDX);
  }
#endif /* GMX_MPI */

  /* Communicate coordinates and sum dipole if necessary */
  if (PAR(cr)) {
    wallcycle_start(wcycle,ewcMOVEX);
    if (DOMAINDECOMP(cr)) {
      dd_move_x(cr->dd,box,x,buf);
    } else {
      move_x(fplog,cr,GMX_LEFT,GMX_RIGHT,x,nrnb);
    }
    /* When we don't need the total dipole we sum it in global_stat */
    if (NEED_MUTOT(*inputrec))
      gmx_sumd(2*DIM,mu,cr);
    wallcycle_stop(wcycle,ewcMOVEX);
  }
  for(i=0; i<2; i++)
    for(j=0;j<DIM;j++)
      mu_tot_AB[i][j] = mu[i*DIM + j];
  if (fr->efep == efepNO)
    copy_rvec(mu_tot_AB[0],mu_tot);
  else
    for(j=0; j<DIM; j++)
      mu_tot[j] = (1.0 - lambda)*mu_tot_AB[0][j] + lambda*mu_tot_AB[1][j];

  /* Reset energies */
  reset_energies(&(inputrec->opts),fr,bNS,enerd,MASTER(cr));    
  if (bNS) {
    wallcycle_start(wcycle,ewcNS);
    
    if (graph && bStateChanged)
      /* Calculate intramolecular shift vectors to make molecules whole */
      mk_mshift(fplog,graph,fr->ePBC,box,x);

    /* Reset long range forces if necessary */
    if (fr->bTwinRange) {
      clear_rvecs(fr->f_twin_n,fr->f_twin);
      clear_rvecs(SHIFTS,fr->fshift_twin);
    }
    /* Do the actual neighbour searching and if twin range electrostatics
     * also do the calculation of long range forces and energies.
     */
    dvdl = 0; 
    ns(fplog,fr,x,f,box,groups,&(inputrec->opts),top,mdatoms,
       cr,nrnb,step,lambda,&dvdl,&enerd->grpp,bFillGrid,bDoForces);
    if (bSepDVDL)
      fprintf(fplog,sepdvdlformat,"LR non-bonded",0,dvdl);
    enerd->dvdl_lr       = dvdl;
    enerd->term[F_DVDL] += dvdl;

    wallcycle_stop(wcycle,ewcNS);
  }
  
  if (DOMAINDECOMP(cr)) {
    if (!(cr->duty & DUTY_PME)) {
      wallcycle_start(wcycle,ewcPPDURINGPME);
      dd_force_flop_start(cr->dd,nrnb);
    }
  }
  /* Start the force cycle counter.
   * This counter is stopped in do_forcelow_level.
   * No parallel communication should occur while this counter is running,
   * since that will interfere with the dynamic load balancing.
   */
  wallcycle_start(wcycle,ewcFORCE);

  if (bDoForces) {
      /* Reset PME/Ewald forces if necessary */
    if (fr->bF_NoVirSum) 
    {
      GMX_BARRIER(cr->mpi_comm_mygroup);
      if (fr->bDomDec)
	clear_rvecs(fr->f_novirsum_n,fr->f_novirsum);
      else
	clear_rvecs(homenr,fr->f_novirsum+start);
      GMX_BARRIER(cr->mpi_comm_mygroup);
    }
    /* Copy long range forces into normal buffers */
    if (fr->bTwinRange) {
      for(i=0; i<fr->f_twin_n; i++)
	copy_rvec(fr->f_twin[i],f[i]);
      for(i=0; i<SHIFTS; i++)
	copy_rvec(fr->fshift_twin[i],fr->fshift[i]);
    } 
    else {
      if (DOMAINDECOMP(cr))
	clear_rvecs(cr->dd->nat_tot,f);
      else
	clear_rvecs(mdatoms->nr,f);
      clear_rvecs(SHIFTS,fr->fshift);
    }
    clear_rvec(fr->vir_diag_posres);
    GMX_BARRIER(cr->mpi_comm_mygroup);
  }
  if (inputrec->ePull == epullCONSTRAINT)
    clear_pull_forces(inputrec->pull);

  /* update QMMMrec, if necessary */
  if(fr->bQMMM)
    update_QMMMrec(cr,fr,x,mdatoms,box,top);

  if ((flags & GMX_FORCE_BONDED) && top->idef.il[F_POSRES].nr > 0) {
    /* Position restraints always require full pbc */
    set_pbc(&pbc,inputrec->ePBC,box);
    v = posres(top->idef.il[F_POSRES].nr,top->idef.il[F_POSRES].iatoms,
	       top->idef.iparams_posres,
	       (const rvec*)x,fr->f_novirsum,fr->vir_diag_posres,
	       inputrec->ePBC==epbcNONE ? NULL : &pbc,lambda,&dvdl,
	       fr->rc_scaling,fr->ePBC,fr->posres_com,fr->posres_comB);
    if (bSepDVDL) {
      fprintf(fplog,sepdvdlformat,
	      interaction_function[F_POSRES].longname,v,dvdl);
    }
    enerd->term[F_POSRES] += v;
    enerd->term[F_DVDL]   += dvdl;
    inc_nrnb(nrnb,eNR_POSRES,top->idef.il[F_POSRES].nr/2);
  }
  /* Compute the bonded and non-bonded forces */    
  do_force_lowlevel(fplog,step,fr,inputrec,&(top->idef),
		    cr,nrnb,wcycle,mdatoms,&(inputrec->opts),
		    x,hist,f,enerd,fcd,box,lambda,graph,&(top->excls),mu_tot_AB,
		    flags,&cycles_force);
  GMX_BARRIER(cr->mpi_comm_mygroup);

  if (ed) {
    do_flood(fplog,cr,x,f,ed,box,step);
  }
	
  if (DOMAINDECOMP(cr)) {
    dd_force_flop_stop(cr->dd,nrnb);
    if (wcycle)
      dd_cycles_add(cr->dd,cycles_force,ddCyclF);
  }
  
  if (bDoForces) {
    /* Compute forces due to electric field */
    calc_f_el(MASTER(cr) ? field : NULL,
	      start,homenr,mdatoms->chargeA,x,f,inputrec->ex,inputrec->et,t);
    
    /* When using PME/Ewald we compute the long range virial there.
     * otherwise we do it based on long range forces from twin range
     * cut-off based calculation (or not at all).
     */
    
    /* Communicate the forces */
    if (PAR(cr)) {
      wallcycle_start(wcycle,ewcMOVEF);
      if (DOMAINDECOMP(cr)) {
	dd_move_f(cr->dd,f,buf,fr->fshift);
	/* Position restraint do not introduce inter-cg forces */
	if (EEL_FULL(fr->eeltype) && cr->dd->n_intercg_excl)
	  dd_move_f(cr->dd,fr->f_novirsum,buf,NULL);
      } else {
	move_f(fplog,cr,GMX_LEFT,GMX_RIGHT,f,buf,nrnb);
      }
      wallcycle_stop(wcycle,ewcMOVEF);
    }
  }

  if (bDoForces) {
    if (vsite) {
      wallcycle_start(wcycle,ewcVSITESPREAD);
      spread_vsite_f(fplog,vsite,x,f,fr->fshift,nrnb,
		     &top->idef,fr->ePBC,fr->bMolPBC,graph,box,cr);
      wallcycle_stop(wcycle,ewcVSITESPREAD);
    }
    
    /* Calculation of the virial must be done after vsites! */
    calc_virial(fplog,mdatoms->start,mdatoms->homenr,x,f,
		vir_force,graph,box,nrnb,fr,inputrec->ePBC);
  }

  if (inputrec->ePull == epullUMBRELLA || inputrec->ePull == epullCONST_F) {
    /* Calculate the center of mass forces, this requires communication,
     * which is why pull_potential is called close to other communication.
     * The virial contribution is calculated directly,
     * which is why we call pull_potential after calc_virial.
     */
    set_pbc(&pbc,inputrec->ePBC,box);
    dvdl = 0; 
    enerd->term[F_COM_PULL] =
      pull_potential(inputrec->ePull,inputrec->pull,mdatoms,&pbc,
		     cr,t,lambda,x,f,vir_force,&dvdl);
    if (bSepDVDL)
      fprintf(fplog,sepdvdlformat,"Com pull",enerd->term[F_COM_PULL],dvdl);
    enerd->term[F_DVDL] += dvdl;
  }

  if (!(cr->duty & DUTY_PME)) {
    cycles_ppdpme = wallcycle_stop(wcycle,ewcPPDURINGPME);
    dd_cycles_add(cr->dd,cycles_ppdpme,ddCyclPPduringPME);
  }

#ifdef GMX_MPI
  if (PAR(cr) && !(cr->duty & DUTY_PME)) {
    /* In case of node-splitting, the PP nodes receive the long-range 
     * forces, virial and energy from the PME nodes here.
     */    
    wallcycle_start(wcycle,ewcPP_PMEWAITRECVF);
    dvdl = 0;
    gmx_pme_receive_f(cr,fr->f_novirsum,fr->vir_el_recip,&e,&dvdl,
		      &cycles_pme);
    if (bSepDVDL)
      fprintf(fplog,sepdvdlformat,"PME mesh",e,dvdl);
    enerd->term[F_COUL_RECIP] += e;
    enerd->term[F_DVDL] += dvdl;
    if (wcycle)
      dd_cycles_add(cr->dd,cycles_pme,ddCyclPME);
    wallcycle_stop(wcycle,ewcPP_PMEWAITRECVF);
  }
#endif

  if (bDoForces && fr->bF_NoVirSum) {
    if (vsite) {
      /* Spread the mesh force on virtual sites to the other particles... 
       * This is parallellized. MPI communication is performed
       * if the constructing atoms aren't local.
       */
      wallcycle_start(wcycle,ewcVSITESPREAD);
      spread_vsite_f(fplog,vsite,x,fr->f_novirsum,NULL,nrnb,
		     &top->idef,fr->ePBC,fr->bMolPBC,graph,box,cr);
      wallcycle_stop(wcycle,ewcVSITESPREAD);
    }
    /* Now add the forces, this is local */
    if (fr->bDomDec) {
      sum_forces(0,fr->f_novirsum_n,f,fr->f_novirsum);
    } else {
      sum_forces(start,start+homenr,f,fr->f_novirsum);
    }
    if (EEL_FULL(fr->eeltype)) {
      /* Add the mesh contribution to the virial */
      m_add(vir_force,fr->vir_el_recip,vir_force);
    }
    if (debug)
      pr_rvecs(debug,0,"vir_force",vir_force,DIM);
  }

  /* Sum the potential energy terms from group contributions */
  sum_epot(&(inputrec->opts),enerd);

  if (fr->print_force >= 0 && bDoForces)
    print_large_forces(stderr,mdatoms,cr,step,fr->print_force,x,f);
}
Example #7
0
void init_pull(FILE *fplog, t_inputrec *ir, int nfile, const t_filenm fnm[],
               gmx_mtop_t *mtop, t_commrec *cr, const output_env_t oenv, real lambda,
               gmx_bool bOutFile, unsigned long Flags)
{
    t_pull       *pull;
    t_pull_group *pgrp;
    int           c, g, start = 0, end = 0, m;

    pull = ir->pull;

    pull->ePBC = ir->ePBC;
    switch (pull->ePBC)
    {
        case epbcNONE: pull->npbcdim = 0; break;
        case epbcXY:   pull->npbcdim = 2; break;
        default:       pull->npbcdim = 3; break;
    }

    if (fplog)
    {
        gmx_bool bAbs, bCos;

        bAbs = FALSE;
        for (c = 0; c < pull->ncoord; c++)
        {
            if (pull->group[pull->coord[c].group[0]].nat == 0 ||
                pull->group[pull->coord[c].group[1]].nat == 0)
            {
                bAbs = TRUE;
            }
        }
        
        fprintf(fplog, "\nWill apply %s COM pulling in geometry '%s'\n",
                EPULLTYPE(ir->ePull), EPULLGEOM(pull->eGeom));
        fprintf(fplog, "with %d pull coordinate%s and %d group%s\n",
                pull->ncoord, pull->ncoord == 1 ? "" : "s",
                pull->ngroup, pull->ngroup == 1 ? "" : "s");
        if (bAbs)
        {
            fprintf(fplog, "with an absolute reference\n");
        }
        bCos = FALSE;
        for (g = 0; g < pull->ngroup; g++)
        {
            if (pull->group[g].nat > 1 &&
                pull->group[g].pbcatom < 0)
            {
                /* We are using cosine weighting */
                fprintf(fplog, "Cosine weighting is used for group %d\n", g);
                bCos = TRUE;
            }
        }
        if (bCos)
        {
            please_cite(fplog, "Engin2010");
        }
    }

    /* We always add the virial contribution,
     * except for geometry = direction_periodic where this is impossible.
     */
    pull->bVirial = (pull->eGeom != epullgDIRPBC);
    if (getenv("GMX_NO_PULLVIR") != NULL)
    {
        if (fplog)
        {
            fprintf(fplog, "Found env. var., will not add the virial contribution of the COM pull forces\n");
        }
        pull->bVirial = FALSE;
    }

    if (cr && PARTDECOMP(cr))
    {
        pd_at_range(cr, &start, &end);
    }
    pull->rbuf     = NULL;
    pull->dbuf     = NULL;
    pull->dbuf_cyl = NULL;
    pull->bRefAt   = FALSE;
    pull->cosdim   = -1;
    for (g = 0; g < pull->ngroup; g++)
    {
        pgrp           = &pull->group[g];
        pgrp->epgrppbc = epgrppbcNONE;
        if (pgrp->nat > 0)
        {
            /* Determine if we need to take PBC into account for calculating
             * the COM's of the pull groups.
             */
            for (m = 0; m < pull->npbcdim; m++)
            {
                if (pull->dim[m] && pgrp->nat > 1)
                {
                    if (pgrp->pbcatom >= 0)
                    {
                        pgrp->epgrppbc = epgrppbcREFAT;
                        pull->bRefAt   = TRUE;
                    }
                    else
                    {
                        if (pgrp->weight)
                        {
                            gmx_fatal(FARGS, "Pull groups can not have relative weights and cosine weighting at same time");
                        }
                        pgrp->epgrppbc = epgrppbcCOS;
                        if (pull->cosdim >= 0 && pull->cosdim != m)
                        {
                            gmx_fatal(FARGS, "Can only use cosine weighting with pulling in one dimension (use mdp option pull_dim)");
                        }
                        pull->cosdim = m;
                    }
                }
            }
            /* Set the indices */
            init_pull_group_index(fplog, cr, start, end, g, pgrp, pull->dim, mtop, ir, lambda);
            if (PULL_CYL(pull) && pgrp->invtm == 0)
            {
                gmx_fatal(FARGS, "Can not have frozen atoms in a cylinder pull group");
            }
        }
        else
        {
            /* Absolute reference, set the inverse mass to zero */
            pgrp->invtm  = 0;
            pgrp->wscale = 1;
        }
    }

    /* if we use dynamic reference groups, do some initialising for them */
    if (PULL_CYL(pull))
    {
        if (ir->ePull == epullCONSTRAINT && pull->ncoord > 1)
        {
            /* We can't easily update the single reference group with multiple
             * constraints. This would require recalculating COMs.
             */
            gmx_fatal(FARGS, "Constraint COM pulling supports only one coordinate with geometry=cylinder, you can use umbrella pulling with multiple coordinates");
        }

        for (c = 0; c < pull->ncoord; c++)
        {
            if (pull->group[pull->coord[c].group[0]].nat == 0)
            {
                gmx_fatal(FARGS, "Dynamic reference groups are not supported when using absolute reference!\n");
            }
        }

        snew(pull->dyna, pull->ncoord);
    }

    /* Only do I/O when we are doing dynamics and if we are the MASTER */
    pull->out_x = NULL;
    pull->out_f = NULL;
    if (bOutFile)
    {
        if (pull->nstxout > 0)
        {
            pull->out_x = open_pull_out(opt2fn("-px", nfile, fnm), pull, oenv, TRUE, Flags);
        }
        if (pull->nstfout > 0)
        {
            pull->out_f = open_pull_out(opt2fn("-pf", nfile, fnm), pull, oenv,
                                        FALSE, Flags);
        }
    }
}
Example #8
0
gmx_bool constrain(FILE *fplog,gmx_bool bLog,gmx_bool bEner,
                   struct gmx_constr *constr,
                   t_idef *idef,t_inputrec *ir,gmx_ekindata_t *ekind,
                   t_commrec *cr,
                   gmx_large_int_t step,int delta_step,
                   t_mdatoms *md,
                   rvec *x,rvec *xprime,rvec *min_proj,
                   gmx_bool bMolPBC,matrix box,
                   real lambda,real *dvdlambda,
                   rvec *v,tensor *vir,
                   t_nrnb *nrnb,int econq,gmx_bool bPscal,
                   real veta, real vetanew)
{
    gmx_bool    bOK,bDump;
    int     start,homenr,nrend;
    int     i,j,d;
    int     ncons,settle_error;
    tensor  vir_r_m_dr;
    rvec    *vstor;
    real    invdt,vir_fac,t;
    t_ilist *settle;
    int     nsettle;
    t_pbc   pbc,*pbc_null;
    char    buf[22];
    t_vetavars vetavar;
    int     nth,th;

    if (econq == econqForceDispl && !EI_ENERGY_MINIMIZATION(ir->eI))
    {
        gmx_incons("constrain called for forces displacements while not doing energy minimization, can not do this while the LINCS and SETTLE constraint connection matrices are mass weighted");
    }
    
    bOK   = TRUE;
    bDump = FALSE;
    
    start  = md->start;
    homenr = md->homenr;
    nrend = start+homenr;

    /* set constants for pressure control integration */ 
    init_vetavars(&vetavar,econq!=econqCoord,
                  veta,vetanew,ir,ekind,bPscal);

    if (ir->delta_t == 0)
    {
        invdt = 0;
    }
    else
    {
        invdt  = 1/ir->delta_t;
    }

    if (ir->efep != efepNO && EI_DYNAMICS(ir->eI))
    {
        /* Set the constraint lengths for the step at which this configuration
         * is meant to be. The invmasses should not be changed.
         */
        lambda += delta_step*ir->fepvals->delta_lambda;
    }
    
    if (vir != NULL)
    {
        clear_mat(vir_r_m_dr);
    }
    
    where();

    settle  = &idef->il[F_SETTLE];
    nsettle = settle->nr/(1+NRAL(F_SETTLE));

    if (nsettle > 0)
    {
        nth = gmx_omp_nthreads_get(emntSETTLE);
    }
    else
    {
        nth = 1;
    }

    if (nth > 1 && constr->vir_r_m_dr_th == NULL)
    {
        snew(constr->vir_r_m_dr_th,nth);
        snew(constr->settle_error,nth);
    }
    
    settle_error = -1;

    /* We do not need full pbc when constraints do not cross charge groups,
     * i.e. when dd->constraint_comm==NULL.
     * Note that PBC for constraints is different from PBC for bondeds.
     * For constraints there is both forward and backward communication.
     */
    if (ir->ePBC != epbcNONE &&
        (cr->dd || bMolPBC) && !(cr->dd && cr->dd->constraint_comm==NULL))
    {
        /* With pbc=screw the screw has been changed to a shift
         * by the constraint coordinate communication routine,
         * so that here we can use normal pbc.
         */
        pbc_null = set_pbc_dd(&pbc,ir->ePBC,cr->dd,FALSE,box);
    }
    else
    {
        pbc_null = NULL;
    }

    /* Communicate the coordinates required for the non-local constraints
     * for LINCS and/or SETTLE.
     */
    if (cr->dd)
    {
        dd_move_x_constraints(cr->dd,box,x,xprime);
    }
	else if (PARTDECOMP(cr))
	{
		pd_move_x_constraints(cr,x,xprime);
	}	

    if (constr->lincsd != NULL)
    {
        bOK = constrain_lincs(fplog,bLog,bEner,ir,step,constr->lincsd,md,cr,
                              x,xprime,min_proj,
                              box,pbc_null,lambda,dvdlambda,
                              invdt,v,vir!=NULL,vir_r_m_dr,
                              econq,nrnb,
                              constr->maxwarn,&constr->warncount_lincs);
        if (!bOK && constr->maxwarn >= 0)
        {
            if (fplog != NULL)
            {
                fprintf(fplog,"Constraint error in algorithm %s at step %s\n",
                        econstr_names[econtLINCS],gmx_step_str(step,buf));
            }
            bDump = TRUE;
        }
    }	
    
    if (constr->nblocks > 0)
    {
        switch (econq) {
        case (econqCoord):
            bOK = bshakef(fplog,constr->shaked,
                          homenr,md->invmass,constr->nblocks,constr->sblock,
                          idef,ir,x,xprime,nrnb,
                          constr->lagr,lambda,dvdlambda,
                          invdt,v,vir!=NULL,vir_r_m_dr,
                          constr->maxwarn>=0,econq,&vetavar);
            break;
        case (econqVeloc):
            bOK = bshakef(fplog,constr->shaked,
                          homenr,md->invmass,constr->nblocks,constr->sblock,
                          idef,ir,x,min_proj,nrnb,
                          constr->lagr,lambda,dvdlambda,
                          invdt,NULL,vir!=NULL,vir_r_m_dr,
                          constr->maxwarn>=0,econq,&vetavar);
            break;
        default:
            gmx_fatal(FARGS,"Internal error, SHAKE called for constraining something else than coordinates");
            break;
        }
        
        if (!bOK && constr->maxwarn >= 0)
        {
            if (fplog != NULL)
            {
                fprintf(fplog,"Constraint error in algorithm %s at step %s\n",
                        econstr_names[econtSHAKE],gmx_step_str(step,buf));
            }
            bDump = TRUE;
        }
    }
    
    if (nsettle > 0)
    {
        int calcvir_atom_end;

        if (vir == NULL)
        {
            calcvir_atom_end = 0;
        }
        else
        {
            calcvir_atom_end = md->start + md->homenr;
        }

        switch (econq)
        {
        case econqCoord:
#pragma omp parallel for num_threads(nth) schedule(static)
            for(th=0; th<nth; th++)
            {
                int start_th,end_th;

                if (th > 0)
                {
                    clear_mat(constr->vir_r_m_dr_th[th]);
                }

                start_th = (nsettle* th   )/nth;
                end_th   = (nsettle*(th+1))/nth;
                if (start_th >= 0 && end_th - start_th > 0)
                {
                    csettle(constr->settled,
                            end_th-start_th,
                            settle->iatoms+start_th*(1+NRAL(F_SETTLE)),
                            pbc_null,
                            x[0],xprime[0],
                            invdt,v?v[0]:NULL,calcvir_atom_end,
                            th == 0 ? vir_r_m_dr : constr->vir_r_m_dr_th[th],
                            th == 0 ? &settle_error : &constr->settle_error[th],
                            &vetavar);
                }
            }
            inc_nrnb(nrnb,eNR_SETTLE,nsettle);
            if (v != NULL)
            {
                inc_nrnb(nrnb,eNR_CONSTR_V,nsettle*3);
            }
            if (vir != NULL)
            {
                inc_nrnb(nrnb,eNR_CONSTR_VIR,nsettle*3);
            }
            break;
        case econqVeloc:
        case econqDeriv:
        case econqForce:
        case econqForceDispl:
#pragma omp parallel for num_threads(nth) schedule(static)
            for(th=0; th<nth; th++)
            {
                int start_th,end_th;

                if (th > 0)
                {
                    clear_mat(constr->vir_r_m_dr_th[th]);
                }
                
                start_th = (nsettle* th   )/nth;
                end_th   = (nsettle*(th+1))/nth;

                if (start_th >= 0 && end_th - start_th > 0)
                {
                    settle_proj(fplog,constr->settled,econq,
                                end_th-start_th,
                                settle->iatoms+start_th*(1+NRAL(F_SETTLE)),
                                pbc_null,
                                x,
                                xprime,min_proj,calcvir_atom_end,
                                th == 0 ? vir_r_m_dr : constr->vir_r_m_dr_th[th],
                                &vetavar);
                }
            }
            /* This is an overestimate */
            inc_nrnb(nrnb,eNR_SETTLE,nsettle);
            break;
        case econqDeriv_FlexCon:
            /* Nothing to do, since the are no flexible constraints in settles */
            break;
        default:
            gmx_incons("Unknown constraint quantity for settle");
        }
    }

    if (settle->nr > 0)
    {
        /* Combine virial and error info of the other threads */
        for(i=1; i<nth; i++)
        {
            m_add(vir_r_m_dr,constr->vir_r_m_dr_th[i],vir_r_m_dr);
            settle_error = constr->settle_error[i];
        } 

        if (econq == econqCoord && settle_error >= 0)
        {
            bOK = FALSE;
            if (constr->maxwarn >= 0)
            {
                char buf[256];
                sprintf(buf,
                        "\nstep " gmx_large_int_pfmt ": Water molecule starting at atom %d can not be "
                        "settled.\nCheck for bad contacts and/or reduce the timestep if appropriate.\n",
                        step,ddglatnr(cr->dd,settle->iatoms[settle_error*(1+NRAL(F_SETTLE))+1]));
                if (fplog)
                {
                    fprintf(fplog,"%s",buf);
                }
                fprintf(stderr,"%s",buf);
                constr->warncount_settle++;
                if (constr->warncount_settle > constr->maxwarn)
                {
                    too_many_constraint_warnings(-1,constr->warncount_settle);
                }
                bDump = TRUE;
            }
        }
    }
        
    free_vetavars(&vetavar);
    
    if (vir != NULL)
    {
        switch (econq)
        {
        case econqCoord:
            vir_fac = 0.5/(ir->delta_t*ir->delta_t);
            break;
        case econqVeloc:
            vir_fac = 0.5/ir->delta_t;
            break;
        case econqForce:
        case econqForceDispl:
            vir_fac = 0.5;
            break;
        default:
            vir_fac = 0;
            gmx_incons("Unsupported constraint quantity for virial");
        }
        
        if (EI_VV(ir->eI))
        {
            vir_fac *= 2;  /* only constraining over half the distance here */
        }
        for(i=0; i<DIM; i++)
        {
            for(j=0; j<DIM; j++)
            {
                (*vir)[i][j] = vir_fac*vir_r_m_dr[i][j];
            }
        }
    }
    
    if (bDump)
    {
        dump_confs(fplog,step,constr->warn_mtop,start,homenr,cr,x,xprime,box);
    }
    
    if (econq == econqCoord)
    {
        if (ir->ePull == epullCONSTRAINT)
        {
            if (EI_DYNAMICS(ir->eI))
            {
                t = ir->init_t + (step + delta_step)*ir->delta_t;
            }
            else
            {
                t = ir->init_t;
            }
            set_pbc(&pbc,ir->ePBC,box);
            pull_constraint(ir->pull,md,&pbc,cr,ir->delta_t,t,x,xprime,v,*vir);
        }
        if (constr->ed && delta_step > 0)
        {
            /* apply the essential dynamcs constraints here */
            do_edsam(ir,step,md,cr,xprime,v,box,constr->ed);
        }
    }
    
    return bOK;
}
int 
calc_gb_rad_still_sse2_double(t_commrec *cr, t_forcerec *fr,
                              int natoms, gmx_localtop_t *top,
                              const t_atomtypes *atype, double *x, t_nblist *nl,
                              gmx_genborn_t *born)
{
	int i,k,n,ii,is3,ii3,nj0,nj1,offset;
	int jnrA,jnrB,j3A,j3B;
    int *mdtype;
	double shX,shY,shZ;
    int *jjnr;
    double *shiftvec;
    
	double gpi_ai,gpi2;
	double factor;
	double *gb_radius;
    double *vsolv;
    double *work;
    double *dadx;
    
	__m128d ix,iy,iz;
	__m128d jx,jy,jz;
	__m128d dx,dy,dz;
	__m128d tx,ty,tz;
	__m128d rsq,rinv,rinv2,rinv4,rinv6;
	__m128d ratio,gpi,rai,raj,vai,vaj,rvdw;
	__m128d ccf,dccf,theta,cosq,term,sinq,res,prod,prod_ai,tmp;
	__m128d mask,icf4,icf6,mask_cmp;
	    
	const __m128d half   = _mm_set1_pd(0.5);
	const __m128d three  = _mm_set1_pd(3.0);
	const __m128d one    = _mm_set1_pd(1.0);
	const __m128d two    = _mm_set1_pd(2.0);
	const __m128d zero   = _mm_set1_pd(0.0);
	const __m128d four   = _mm_set1_pd(4.0);
	
	const __m128d still_p5inv  = _mm_set1_pd(STILL_P5INV);
	const __m128d still_pip5   = _mm_set1_pd(STILL_PIP5);
	const __m128d still_p4     = _mm_set1_pd(STILL_P4);
    
	factor  = 0.5 * ONE_4PI_EPS0;
    
    gb_radius = born->gb_radius;
    vsolv     = born->vsolv;
    work      = born->gpol_still_work;
	jjnr      = nl->jjnr;
    shiftvec  = fr->shift_vec[0];
    dadx      = fr->dadx;
    
	jnrA = jnrB = 0;
    jx = _mm_setzero_pd();
    jy = _mm_setzero_pd();
    jz = _mm_setzero_pd();
    
	n = 0;
    
	for(i=0;i<natoms;i++)
	{
		work[i]=0;
	}
    
	for(i=0;i<nl->nri;i++)
	{
        ii     = nl->iinr[i];
		ii3	   = ii*3;
        is3    = 3*nl->shift[i];     
        shX    = shiftvec[is3];  
        shY    = shiftvec[is3+1];
        shZ    = shiftvec[is3+2];
        nj0    = nl->jindex[i];      
        nj1    = nl->jindex[i+1];    
        
        ix     = _mm_set1_pd(shX+x[ii3+0]);
		iy     = _mm_set1_pd(shY+x[ii3+1]);
		iz     = _mm_set1_pd(shZ+x[ii3+2]);
		

		/* Polarization energy for atom ai */
		gpi    = _mm_setzero_pd();
		
        rai     = _mm_load1_pd(gb_radius+ii);
        prod_ai = _mm_set1_pd(STILL_P4*vsolv[ii]);

		for(k=nj0;k<nj1-1;k+=2)
		{
			jnrA        = jjnr[k];   
			jnrB        = jjnr[k+1];
            
            j3A         = 3*jnrA;  
			j3B         = 3*jnrB;
            
            GMX_MM_LOAD_1RVEC_2POINTERS_PD(x+j3A,x+j3B,jx,jy,jz);
            
            GMX_MM_LOAD_2VALUES_PD(gb_radius+jnrA,gb_radius+jnrB,raj);
			GMX_MM_LOAD_2VALUES_PD(vsolv+jnrA,vsolv+jnrB,vaj);
            
			dx          = _mm_sub_pd(ix,jx);
			dy          = _mm_sub_pd(iy,jy);
			dz          = _mm_sub_pd(iz,jz);
            
            rsq         = gmx_mm_calc_rsq_pd(dx,dy,dz);
            rinv        = gmx_mm_invsqrt_pd(rsq);
            rinv2       = _mm_mul_pd(rinv,rinv);
            rinv4       = _mm_mul_pd(rinv2,rinv2);
            rinv6       = _mm_mul_pd(rinv4,rinv2);
            
            rvdw        = _mm_add_pd(rai,raj);
            ratio       = _mm_mul_pd(rsq, gmx_mm_inv_pd( _mm_mul_pd(rvdw,rvdw)));
            
            mask_cmp    = _mm_cmple_pd(ratio,still_p5inv);

            /* gmx_mm_sincos_pd() is quite expensive, so avoid calculating it if we can! */
            if( 0 == _mm_movemask_pd(mask_cmp) )
            {
                /* if ratio>still_p5inv for ALL elements */
                ccf         = one;
                dccf        = _mm_setzero_pd();
            }
            else 
            {
                ratio       = _mm_min_pd(ratio,still_p5inv);
                theta       = _mm_mul_pd(ratio,still_pip5);
                gmx_mm_sincos_pd(theta,&sinq,&cosq);
                term        = _mm_mul_pd(half,_mm_sub_pd(one,cosq));
                ccf         = _mm_mul_pd(term,term);
                dccf        = _mm_mul_pd(_mm_mul_pd(two,term),
                                         _mm_mul_pd(sinq,theta));
            }

            prod        = _mm_mul_pd(still_p4,vaj);
            icf4        = _mm_mul_pd(ccf,rinv4);
            icf6        = _mm_mul_pd( _mm_sub_pd( _mm_mul_pd(four,ccf),dccf), rinv6);
                        
            GMX_MM_INCREMENT_2VALUES_PD(work+jnrA,work+jnrB,_mm_mul_pd(prod_ai,icf4));
            
            gpi           = _mm_add_pd(gpi, _mm_mul_pd(prod,icf4) );
            
            _mm_store_pd(dadx,_mm_mul_pd(prod,icf6));
            dadx+=2;
            _mm_store_pd(dadx,_mm_mul_pd(prod_ai,icf6));
            dadx+=2;
		} 
        
        if(k<nj1)
		{
			jnrA        = jjnr[k];   
            
            j3A         = 3*jnrA;  
            
            GMX_MM_LOAD_1RVEC_1POINTER_PD(x+j3A,jx,jy,jz);
            
            GMX_MM_LOAD_1VALUE_PD(gb_radius+jnrA,raj);
			GMX_MM_LOAD_1VALUE_PD(vsolv+jnrA,vaj);
            
			dx          = _mm_sub_sd(ix,jx);
			dy          = _mm_sub_sd(iy,jy);
			dz          = _mm_sub_sd(iz,jz);
            
            rsq         = gmx_mm_calc_rsq_pd(dx,dy,dz);
            rinv        = gmx_mm_invsqrt_pd(rsq);
            rinv2       = _mm_mul_sd(rinv,rinv);
            rinv4       = _mm_mul_sd(rinv2,rinv2);
            rinv6       = _mm_mul_sd(rinv4,rinv2);
            
            rvdw        = _mm_add_sd(rai,raj);
            ratio       = _mm_mul_sd(rsq, gmx_mm_inv_pd( _mm_mul_pd(rvdw,rvdw)));
            
            mask_cmp    = _mm_cmple_sd(ratio,still_p5inv);
            
            /* gmx_mm_sincos_pd() is quite expensive, so avoid calculating it if we can! */
            if( 0 == _mm_movemask_pd(mask_cmp) )
            {
                /* if ratio>still_p5inv for ALL elements */
                ccf         = one;
                dccf        = _mm_setzero_pd();
            }
            else 
            {
                ratio       = _mm_min_sd(ratio,still_p5inv);
                theta       = _mm_mul_sd(ratio,still_pip5);
                gmx_mm_sincos_pd(theta,&sinq,&cosq);
                term        = _mm_mul_sd(half,_mm_sub_sd(one,cosq));
                ccf         = _mm_mul_sd(term,term);
                dccf        = _mm_mul_sd(_mm_mul_sd(two,term),
                                         _mm_mul_sd(sinq,theta));
            }
            
            prod        = _mm_mul_sd(still_p4,vaj);
            icf4        = _mm_mul_sd(ccf,rinv4);
            icf6        = _mm_mul_sd( _mm_sub_sd( _mm_mul_sd(four,ccf),dccf), rinv6);

            GMX_MM_INCREMENT_1VALUE_PD(work+jnrA,_mm_mul_sd(prod_ai,icf4));
            
            gpi           = _mm_add_sd(gpi, _mm_mul_sd(prod,icf4) );
            
            _mm_store_pd(dadx,_mm_mul_pd(prod,icf6));
            dadx+=2;
            _mm_store_pd(dadx,_mm_mul_pd(prod_ai,icf6));
            dadx+=2;
		} 
        gmx_mm_update_1pot_pd(gpi,work+ii);
	}
    
	/* Sum up the polarization energy from other nodes */
	if(PARTDECOMP(cr))
	{
		gmx_sum(natoms, work, cr);
	}
	else if(DOMAINDECOMP(cr))
	{
		dd_atom_sum_real(cr->dd, work);
	}
	
	/* Compute the radii */
	for(i=0;i<fr->natoms_force;i++) /* PELA born->nr */
	{		
		if(born->use[i] != 0)
		{
			gpi_ai           = born->gpol[i] + work[i]; /* add gpi to the initial pol energy gpi_ai*/
			gpi2             = gpi_ai * gpi_ai;
			born->bRad[i]   = factor*gmx_invsqrt(gpi2);
			fr->invsqrta[i] = gmx_invsqrt(born->bRad[i]);
		}
	}
    
	/* Extra (local) communication required for DD */
	if(DOMAINDECOMP(cr))
	{
		dd_atom_spread_real(cr->dd, born->bRad);
		dd_atom_spread_real(cr->dd, fr->invsqrta);
	}
    
	return 0;	
}
int 
calc_gb_rad_hct_obc_sse2_double(t_commrec *cr, t_forcerec * fr, int natoms, gmx_localtop_t *top,
                                const t_atomtypes *atype, double *x, t_nblist *nl, gmx_genborn_t *born,t_mdatoms *md,int gb_algorithm)
{
	int i,ai,k,n,ii,ii3,is3,nj0,nj1,at0,at1,offset;
    int jnrA,jnrB;
    int j3A,j3B;
	double shX,shY,shZ;
	double rr,rr_inv,rr_inv2,sum_tmp,sum,sum2,sum3,gbr;
	double sum_ai2, sum_ai3,tsum,tchain,doffset;
	double *obc_param;
    double *gb_radius;
    double *work;
    int *  jjnr;
    double *dadx;
    double *shiftvec;
    double min_rad,rad;
    
	__m128d ix,iy,iz,jx,jy,jz;
	__m128d dx,dy,dz,t1,t2,t3,t4;
	__m128d rsq,rinv,r;
	__m128d rai,rai_inv,raj, raj_inv,rai_inv2,sk,sk2,lij,dlij,duij;
	__m128d uij,lij2,uij2,lij3,uij3,diff2;
	__m128d lij_inv,sk2_inv,prod,log_term,tmp,tmp_sum;
	__m128d sum_ai, tmp_ai,sk_ai,sk_aj,sk2_ai,sk2_aj,sk2_rinv;
	__m128d dadx1,dadx2;
    __m128d logterm;
	__m128d mask;
	__m128d obc_mask1,obc_mask2,obc_mask3;    
    
    __m128d oneeighth   = _mm_set1_pd(0.125);
    __m128d onefourth   = _mm_set1_pd(0.25);
    
	const __m128d half  = _mm_set1_pd(0.5);
	const __m128d three = _mm_set1_pd(3.0);
	const __m128d one   = _mm_set1_pd(1.0);
	const __m128d two   = _mm_set1_pd(2.0);
	const __m128d zero  = _mm_set1_pd(0.0);
	const __m128d neg   = _mm_set1_pd(-1.0);
	
	/* Set the dielectric offset */
	doffset   = born->gb_doffset;
	gb_radius = born->gb_radius;
    obc_param = born->param;
    work      = born->gpol_hct_work;
    jjnr      = nl->jjnr;
    dadx      = fr->dadx;
    shiftvec  = fr->shift_vec[0];
    
    jx        = _mm_setzero_pd();
    jy        = _mm_setzero_pd();
    jz        = _mm_setzero_pd();
    
    jnrA = jnrB = 0;
    
	for(i=0;i<born->nr;i++)
	{
		work[i] = 0;
	}
	
	for(i=0;i<nl->nri;i++)
	{
        ii     = nl->iinr[i];
		ii3	   = ii*3;
        is3    = 3*nl->shift[i];     
        shX    = shiftvec[is3];  
        shY    = shiftvec[is3+1];
        shZ    = shiftvec[is3+2];
        nj0    = nl->jindex[i];      
        nj1    = nl->jindex[i+1];    
        
        ix     = _mm_set1_pd(shX+x[ii3+0]);
		iy     = _mm_set1_pd(shY+x[ii3+1]);
		iz     = _mm_set1_pd(shZ+x[ii3+2]);
		        
		rai    = _mm_load1_pd(gb_radius+ii);
		rai_inv= gmx_mm_inv_pd(rai);
        
		sum_ai = _mm_setzero_pd();
		
		sk_ai  = _mm_load1_pd(born->param+ii);
		sk2_ai = _mm_mul_pd(sk_ai,sk_ai);
        
		for(k=nj0;k<nj1-1;k+=2)
		{
			jnrA        = jjnr[k];   
			jnrB        = jjnr[k+1];
			
            j3A         = 3*jnrA;  
			j3B         = 3*jnrB;
            
            GMX_MM_LOAD_1RVEC_2POINTERS_PD(x+j3A,x+j3B,jx,jy,jz);
            GMX_MM_LOAD_2VALUES_PD(gb_radius+jnrA,gb_radius+jnrB,raj);
            GMX_MM_LOAD_2VALUES_PD(obc_param+jnrA,obc_param+jnrB,sk_aj);
			
            dx    = _mm_sub_pd(ix, jx);
			dy    = _mm_sub_pd(iy, jy);
			dz    = _mm_sub_pd(iz, jz);
			
            rsq         = gmx_mm_calc_rsq_pd(dx,dy,dz);
            
            rinv        = gmx_mm_invsqrt_pd(rsq);
            r           = _mm_mul_pd(rsq,rinv);
            
			/* Compute raj_inv aj1-4 */
            raj_inv     = gmx_mm_inv_pd(raj);
            
            /* Evaluate influence of atom aj -> ai */
            t1            = _mm_add_pd(r,sk_aj);
            t2            = _mm_sub_pd(r,sk_aj);
            t3            = _mm_sub_pd(sk_aj,r);
            obc_mask1     = _mm_cmplt_pd(rai, t1);
            obc_mask2     = _mm_cmplt_pd(rai, t2);
            obc_mask3     = _mm_cmplt_pd(rai, t3);
            
            uij           = gmx_mm_inv_pd(t1);
            lij           = _mm_or_pd(   _mm_and_pd(obc_mask2,gmx_mm_inv_pd(t2)),
                                      _mm_andnot_pd(obc_mask2,rai_inv));
            dlij          = _mm_and_pd(one,obc_mask2);
            uij2          = _mm_mul_pd(uij, uij);
            uij3          = _mm_mul_pd(uij2,uij);
            lij2          = _mm_mul_pd(lij, lij);
            lij3          = _mm_mul_pd(lij2,lij);
                        
            diff2         = _mm_sub_pd(uij2,lij2);
            lij_inv       = gmx_mm_invsqrt_pd(lij2);
            sk2_aj        = _mm_mul_pd(sk_aj,sk_aj);
            sk2_rinv      = _mm_mul_pd(sk2_aj,rinv);
            prod          = _mm_mul_pd(onefourth,sk2_rinv);
                        
            logterm       = gmx_mm_log_pd(_mm_mul_pd(uij,lij_inv));
            
            t1            = _mm_sub_pd(lij,uij);
            t2            = _mm_mul_pd(diff2,
                                       _mm_sub_pd(_mm_mul_pd(onefourth,r),
                                                  prod));
            t3            = _mm_mul_pd(half,_mm_mul_pd(rinv,logterm));
            t1            = _mm_add_pd(t1,_mm_add_pd(t2,t3));
            t4            = _mm_mul_pd(two,_mm_sub_pd(rai_inv,lij));
            t4            = _mm_and_pd(t4,obc_mask3);
            t1            = _mm_mul_pd(half,_mm_add_pd(t1,t4));
                        
            sum_ai        = _mm_add_pd(sum_ai, _mm_and_pd(t1,obc_mask1) );
            
            t1            = _mm_add_pd(_mm_mul_pd(half,lij2),
                                       _mm_mul_pd(prod,lij3));
            t1            = _mm_sub_pd(t1,
                                       _mm_mul_pd(onefourth,
                                                  _mm_add_pd(_mm_mul_pd(lij,rinv),
                                                             _mm_mul_pd(lij3,r))));
            t2            = _mm_mul_pd(onefourth,
                                       _mm_add_pd(_mm_mul_pd(uij,rinv),
                                                  _mm_mul_pd(uij3,r)));
            t2            = _mm_sub_pd(t2,
                                       _mm_add_pd(_mm_mul_pd(half,uij2),
                                                  _mm_mul_pd(prod,uij3)));
            t3            = _mm_mul_pd(_mm_mul_pd(onefourth,logterm),
                                       _mm_mul_pd(rinv,rinv));
            t3            = _mm_sub_pd(t3,
                                       _mm_mul_pd(_mm_mul_pd(diff2,oneeighth),
                                                  _mm_add_pd(one,
                                                             _mm_mul_pd(sk2_rinv,rinv))));
            t1            = _mm_mul_pd(rinv,
                                       _mm_add_pd(_mm_mul_pd(dlij,t1),
                                                  _mm_add_pd(t2,t3)));
            
            dadx1         = _mm_and_pd(t1,obc_mask1);
            
            /* Evaluate influence of atom ai -> aj */
            t1            = _mm_add_pd(r,sk_ai);
            t2            = _mm_sub_pd(r,sk_ai);
            t3            = _mm_sub_pd(sk_ai,r);
            obc_mask1     = _mm_cmplt_pd(raj, t1);
            obc_mask2     = _mm_cmplt_pd(raj, t2);
            obc_mask3     = _mm_cmplt_pd(raj, t3);
            
            uij           = gmx_mm_inv_pd(t1);
            lij           = _mm_or_pd(   _mm_and_pd(obc_mask2,gmx_mm_inv_pd(t2)),
                                      _mm_andnot_pd(obc_mask2,raj_inv));
            dlij          = _mm_and_pd(one,obc_mask2);
            uij2          = _mm_mul_pd(uij, uij);
            uij3          = _mm_mul_pd(uij2,uij);
            lij2          = _mm_mul_pd(lij, lij);
            lij3          = _mm_mul_pd(lij2,lij);
                        
            diff2         = _mm_sub_pd(uij2,lij2);
            lij_inv       = gmx_mm_invsqrt_pd(lij2);
            sk2_rinv      = _mm_mul_pd(sk2_ai,rinv);
            prod          = _mm_mul_pd(onefourth,sk2_rinv);
                        
            logterm       = gmx_mm_log_pd(_mm_mul_pd(uij,lij_inv));
            
            t1            = _mm_sub_pd(lij,uij);
            t2            = _mm_mul_pd(diff2,
                                       _mm_sub_pd(_mm_mul_pd(onefourth,r),
                                                  prod));
            t3            = _mm_mul_pd(half,_mm_mul_pd(rinv,logterm));
            t1            = _mm_add_pd(t1,_mm_add_pd(t2,t3));
            t4            = _mm_mul_pd(two,_mm_sub_pd(raj_inv,lij));
            t4            = _mm_and_pd(t4,obc_mask3);
            t1            = _mm_mul_pd(half,_mm_add_pd(t1,t4));
                        
            GMX_MM_INCREMENT_2VALUES_PD(work+jnrA,work+jnrB,_mm_and_pd(t1,obc_mask1));
            
            t1            = _mm_add_pd(_mm_mul_pd(half,lij2),
                                       _mm_mul_pd(prod,lij3));
            t1            = _mm_sub_pd(t1,
                                       _mm_mul_pd(onefourth,
                                                  _mm_add_pd(_mm_mul_pd(lij,rinv),
                                                             _mm_mul_pd(lij3,r))));
            t2            = _mm_mul_pd(onefourth,
                                       _mm_add_pd(_mm_mul_pd(uij,rinv),
                                                  _mm_mul_pd(uij3,r)));
            t2            = _mm_sub_pd(t2,
                                       _mm_add_pd(_mm_mul_pd(half,uij2),
                                                  _mm_mul_pd(prod,uij3)));
            t3            = _mm_mul_pd(_mm_mul_pd(onefourth,logterm),
                                       _mm_mul_pd(rinv,rinv));
            t3            = _mm_sub_pd(t3,
                                       _mm_mul_pd(_mm_mul_pd(diff2,oneeighth),
                                                  _mm_add_pd(one,
                                                             _mm_mul_pd(sk2_rinv,rinv))));
            t1            = _mm_mul_pd(rinv,
                                       _mm_add_pd(_mm_mul_pd(dlij,t1),
                                                  _mm_add_pd(t2,t3)));
            
            dadx2         = _mm_and_pd(t1,obc_mask1);
            
            _mm_store_pd(dadx,dadx1);
            dadx += 2;
            _mm_store_pd(dadx,dadx2);
            dadx += 2;
        } /* end normal inner loop */
        
		if(k<nj1)
		{
			jnrA        = jjnr[k];   
			
            j3A         = 3*jnrA;  
            
            GMX_MM_LOAD_1RVEC_1POINTER_PD(x+j3A,jx,jy,jz);
            GMX_MM_LOAD_1VALUE_PD(gb_radius+jnrA,raj);
            GMX_MM_LOAD_1VALUE_PD(obc_param+jnrA,sk_aj);
			
            dx    = _mm_sub_sd(ix, jx);
			dy    = _mm_sub_sd(iy, jy);
			dz    = _mm_sub_sd(iz, jz);
			
            rsq         = gmx_mm_calc_rsq_pd(dx,dy,dz);
            
            rinv        = gmx_mm_invsqrt_pd(rsq);
            r           = _mm_mul_sd(rsq,rinv);
            
			/* Compute raj_inv aj1-4 */
            raj_inv     = gmx_mm_inv_pd(raj);
            
            /* Evaluate influence of atom aj -> ai */
            t1            = _mm_add_sd(r,sk_aj);
            t2            = _mm_sub_sd(r,sk_aj);
            t3            = _mm_sub_sd(sk_aj,r);
            obc_mask1     = _mm_cmplt_sd(rai, t1);
            obc_mask2     = _mm_cmplt_sd(rai, t2);
            obc_mask3     = _mm_cmplt_sd(rai, t3);
            
            uij           = gmx_mm_inv_pd(t1);
            lij           = _mm_or_pd(_mm_and_pd(obc_mask2,gmx_mm_inv_pd(t2)),
                                      _mm_andnot_pd(obc_mask2,rai_inv));
            dlij          = _mm_and_pd(one,obc_mask2);
            uij2          = _mm_mul_sd(uij, uij);
            uij3          = _mm_mul_sd(uij2,uij);
            lij2          = _mm_mul_sd(lij, lij);
            lij3          = _mm_mul_sd(lij2,lij);
            
            diff2         = _mm_sub_sd(uij2,lij2);
            lij_inv       = gmx_mm_invsqrt_pd(lij2);
            sk2_aj        = _mm_mul_sd(sk_aj,sk_aj);
            sk2_rinv      = _mm_mul_sd(sk2_aj,rinv);
            prod          = _mm_mul_sd(onefourth,sk2_rinv);
            
            logterm       = gmx_mm_log_pd(_mm_mul_sd(uij,lij_inv));
            
            t1            = _mm_sub_sd(lij,uij);
            t2            = _mm_mul_sd(diff2,
                                       _mm_sub_sd(_mm_mul_pd(onefourth,r),
                                                  prod));
            t3            = _mm_mul_sd(half,_mm_mul_sd(rinv,logterm));
            t1            = _mm_add_sd(t1,_mm_add_sd(t2,t3));
            t4            = _mm_mul_sd(two,_mm_sub_sd(rai_inv,lij));
            t4            = _mm_and_pd(t4,obc_mask3);
            t1            = _mm_mul_sd(half,_mm_add_sd(t1,t4));
            
            sum_ai        = _mm_add_sd(sum_ai, _mm_and_pd(t1,obc_mask1) );
            
            t1            = _mm_add_sd(_mm_mul_sd(half,lij2),
                                       _mm_mul_sd(prod,lij3));
            t1            = _mm_sub_sd(t1,
                                       _mm_mul_sd(onefourth,
                                                  _mm_add_sd(_mm_mul_sd(lij,rinv),
                                                             _mm_mul_sd(lij3,r))));
            t2            = _mm_mul_sd(onefourth,
                                       _mm_add_sd(_mm_mul_sd(uij,rinv),
                                                  _mm_mul_sd(uij3,r)));
            t2            = _mm_sub_sd(t2,
                                       _mm_add_sd(_mm_mul_sd(half,uij2),
                                                  _mm_mul_sd(prod,uij3)));
            t3            = _mm_mul_sd(_mm_mul_sd(onefourth,logterm),
                                       _mm_mul_sd(rinv,rinv));
            t3            = _mm_sub_sd(t3,
                                       _mm_mul_sd(_mm_mul_sd(diff2,oneeighth),
                                                  _mm_add_sd(one,
                                                             _mm_mul_sd(sk2_rinv,rinv))));
            t1            = _mm_mul_sd(rinv,
                                       _mm_add_sd(_mm_mul_sd(dlij,t1),
                                                  _mm_add_pd(t2,t3)));
            
            dadx1         = _mm_and_pd(t1,obc_mask1);
            
            /* Evaluate influence of atom ai -> aj */
            t1            = _mm_add_sd(r,sk_ai);
            t2            = _mm_sub_sd(r,sk_ai);
            t3            = _mm_sub_sd(sk_ai,r);
            obc_mask1     = _mm_cmplt_sd(raj, t1);
            obc_mask2     = _mm_cmplt_sd(raj, t2);
            obc_mask3     = _mm_cmplt_sd(raj, t3);
            
            uij           = gmx_mm_inv_pd(t1);
            lij           = _mm_or_pd(   _mm_and_pd(obc_mask2,gmx_mm_inv_pd(t2)),
                                      _mm_andnot_pd(obc_mask2,raj_inv));
            dlij          = _mm_and_pd(one,obc_mask2);
            uij2          = _mm_mul_sd(uij, uij);
            uij3          = _mm_mul_sd(uij2,uij);
            lij2          = _mm_mul_sd(lij, lij);
            lij3          = _mm_mul_sd(lij2,lij);
            
            diff2         = _mm_sub_sd(uij2,lij2);
            lij_inv       = gmx_mm_invsqrt_pd(lij2);
            sk2_rinv      = _mm_mul_sd(sk2_ai,rinv);
            prod          = _mm_mul_sd(onefourth,sk2_rinv);
            
            logterm       = gmx_mm_log_pd(_mm_mul_sd(uij,lij_inv));
            
            t1            = _mm_sub_sd(lij,uij);
            t2            = _mm_mul_sd(diff2,
                                       _mm_sub_sd(_mm_mul_sd(onefourth,r),
                                                  prod));
            t3            = _mm_mul_sd(half,_mm_mul_sd(rinv,logterm));
            t1            = _mm_add_sd(t1,_mm_add_sd(t2,t3));
            t4            = _mm_mul_sd(two,_mm_sub_sd(raj_inv,lij));
            t4            = _mm_and_pd(t4,obc_mask3);
            t1            = _mm_mul_sd(half,_mm_add_sd(t1,t4));
            
            GMX_MM_INCREMENT_1VALUE_PD(work+jnrA,_mm_and_pd(t1,obc_mask1));
            
            t1            = _mm_add_sd(_mm_mul_sd(half,lij2),
                                       _mm_mul_sd(prod,lij3));
            t1            = _mm_sub_sd(t1,
                                       _mm_mul_sd(onefourth,
                                                  _mm_add_sd(_mm_mul_sd(lij,rinv),
                                                             _mm_mul_sd(lij3,r))));
            t2            = _mm_mul_sd(onefourth,
                                       _mm_add_sd(_mm_mul_sd(uij,rinv),
                                                  _mm_mul_sd(uij3,r)));
            t2            = _mm_sub_sd(t2,
                                       _mm_add_sd(_mm_mul_sd(half,uij2),
                                                  _mm_mul_sd(prod,uij3)));
            t3            = _mm_mul_sd(_mm_mul_sd(onefourth,logterm),
                                       _mm_mul_sd(rinv,rinv));
            t3            = _mm_sub_sd(t3,
                                       _mm_mul_sd(_mm_mul_sd(diff2,oneeighth),
                                                  _mm_add_sd(one,
                                                             _mm_mul_sd(sk2_rinv,rinv))));
            t1            = _mm_mul_sd(rinv,
                                       _mm_add_sd(_mm_mul_sd(dlij,t1),
                                                  _mm_add_sd(t2,t3)));
            
            dadx2         = _mm_and_pd(t1,obc_mask1);
            
            _mm_store_pd(dadx,dadx1);
            dadx += 2;
            _mm_store_pd(dadx,dadx2);
            dadx += 2;
        } 
        gmx_mm_update_1pot_pd(sum_ai,work+ii);
        
	}
	
	/* Parallel summations */
	if(PARTDECOMP(cr))
	{
		gmx_sum(natoms, work, cr);
	}
	else if(DOMAINDECOMP(cr))
	{
		dd_atom_sum_real(cr->dd, work);
	}
	
    if(gb_algorithm==egbHCT)
    {
        /* HCT */
        for(i=0;i<fr->natoms_force;i++) /* PELA born->nr */
        {
			if(born->use[i] != 0)
            {
                rr      = top->atomtypes.gb_radius[md->typeA[i]]-doffset; 
                sum     = 1.0/rr - work[i];
                min_rad = rr + doffset;
                rad     = 1.0/sum; 
                
                born->bRad[i]   = rad > min_rad ? rad : min_rad;
                fr->invsqrta[i] = gmx_invsqrt(born->bRad[i]);
            }
        }
        
        /* Extra communication required for DD */
        if(DOMAINDECOMP(cr))
        {
            dd_atom_spread_real(cr->dd, born->bRad);
            dd_atom_spread_real(cr->dd, fr->invsqrta);
        }
    }
    else
    {
        /* OBC */
        for(i=0;i<fr->natoms_force;i++) /* PELA born->nr */
        {
			if(born->use[i] != 0)
            {
                rr      = top->atomtypes.gb_radius[md->typeA[i]];
                rr_inv2 = 1.0/rr;
                rr      = rr-doffset; 
                rr_inv  = 1.0/rr;
                sum     = rr * work[i];
                sum2    = sum  * sum;
                sum3    = sum2 * sum;
                
                tsum    = tanh(born->obc_alpha*sum-born->obc_beta*sum2+born->obc_gamma*sum3);
                born->bRad[i] = rr_inv - tsum*rr_inv2;
                born->bRad[i] = 1.0 / born->bRad[i];
                
                fr->invsqrta[i]=gmx_invsqrt(born->bRad[i]);
                
                tchain  = rr * (born->obc_alpha-2*born->obc_beta*sum+3*born->obc_gamma*sum2);
                born->drobc[i] = (1.0-tsum*tsum)*tchain*rr_inv2;
            }
        }
        /* Extra (local) communication required for DD */
        if(DOMAINDECOMP(cr))
        {
            dd_atom_spread_real(cr->dd, born->bRad);
            dd_atom_spread_real(cr->dd, fr->invsqrta);
            dd_atom_spread_real(cr->dd, born->drobc);
        }
    }
    
	
	
	return 0;
}
Example #11
0
void do_mmcg (int natoms,		// number of atoms in simulation
 	    t_inputrec   *inputrec, 	// input record and box stuff
	    t_mdatoms    *md,		// the atoms
	    t_state      *state,	// positions & velocities
	    gmx_mtop_t   *top,		// global topology
	    t_commrec    *cr,   	// communicators
	    rvec         *cg_cm,	// centre of mass of charge groups
	    int *allcgid, 		// charge groups ids
	    int allcgnr,		// charge groups number
	    int *allsolid,		// solvent groups ids
	    int allsolnr,		// solvent groups number
    	    FILE *log)			// logfile
{
 int i, j, p, q, qmin;
 real dvmod, shell2w2, dmin, d;
 rvec vecdist, dv, *v;
 shell2w2 = pow (inputrec->mmcg.shell2wt, 2.0);	// Threshold
 v = state->v;			               	// Velocities
 t_block cgs;					// charge groups
 cgs = gmx_mtop_global_cgs(top);
 rvec *all_cg_cm=NULL;
 snew(all_cg_cm,allcgnr);

 // for DD, we need to get cg_cm of all given charge groups (fr->cg_cm is local), 
 // the nearest one from a monitored water
 // may be more than one DD cell distant.
 if (DOMAINDECOMP(cr)) {
    if(cr->nnodes!=1) gmx_barrier(cr);
    for(i=0; i<allcgnr; i++) {
	int sender = 0,senderf,k;
	for(j=0; j<cr->dd->ncg_home; j++) { // is the cg a home cg ?
	   if(cr->dd->index_gl[j]==allcgid[i] ) {
		sender = cr->sim_nodeid;
		for(k=0;k<3;k++) all_cg_cm[i][k]=cg_cm[j][k];//FIXME improve ! compilation error when done with pointers
	   } 
	}
	MPI_Allreduce(&sender,&senderf,1,MPI_INT,MPI_SUM,cr->dd->mpi_comm_all);
	for(k=0;k<3;k++) MPI_Bcast(&all_cg_cm[i][k],sizeof(all_cg_cm[i][k]),MPI_BYTE,senderf,cr->dd->mpi_comm_all); //FIXME again !
    }
 }

 for(i=0; i<allsolnr; i++) {	// Loop over waters - START
    p = allsolid[i];
    if (PARTDECOMP(cr)) { // water i is in the node
        if((cgs.index[p]>=md->start) && (cgs.index[p]<(md->start+md->homenr))) {
      		for (j=0; j<allcgnr; j++) { // Looking for min dist (dmin)
		   q = allcgid[j];	    // and for the nearest charge group (qmin)
		   d = distance2(cg_cm[p],cg_cm[q]);
		   if(!j) 		{ dmin = d; qmin = q; } 
		   else if (d < dmin) 	{ dmin = d; qmin = q; }
        	}
	
      		if (dmin >= shell2w2) {	// Modifing velocity
		    rvec_sub(cg_cm[p], cg_cm[qmin], vecdist);
	 	    unitv (vecdist, vecdist);
		      for (j=cgs.index[p]; j<(3+cgs.index[p]); j++) {
	  		 dvmod = iprod (v[j], vecdist);
	  		 if (dvmod <= 0) continue;
	  		 svmul (2.0*dvmod, vecdist, dv);
	  		 rvec_dec (&v[j], dv); // Warning (=>?)				
		      }
      		}
	}
    }
    if (DOMAINDECOMP(cr)) { 
	int g_atnr; // global atom ID
	int l_atnr; // local atom ID in the DD cell
	int l_cgnr; // local charge group number

	for(g_atnr=cgs.index[p];g_atnr<=cgs.index[p]+2;g_atnr++) {
	    if(ga2la_get_home(cr->dd->ga2la,g_atnr,&l_atnr)) {// search in global to local lookup table 
							      // if the atom (not water) is in home atoms 
							      // and get local atom number
      		l_cgnr = cr->dd->la2lc[l_atnr]; // get local charge group number
	
		for (j=0; j<allcgnr; j++) { // Looking for min dist (dmin)
					    // and for the nearest charge group (qmin)
		   d = distance2(cg_cm[l_cgnr],all_cg_cm[j]);
		   if(!j) 		{ dmin = d; qmin = j; } 
		   else if (d < dmin) 	{ dmin = d; qmin = j; }
        	}

      		if (dmin >= shell2w2) {	// Modifing velocity
		    rvec_sub(cg_cm[l_cgnr], all_cg_cm[qmin], vecdist);
	 	    unitv (vecdist, vecdist);
	  	    dvmod = iprod (v[l_atnr], vecdist);
	  	    if (dvmod <= 0) continue;
	  	    svmul (2.0*dvmod, vecdist, dv);
	  	    rvec_dec (&v[l_atnr], dv);	// Warning (=>?)
      		}
	    }
	}
    }
  } // Loop over waters - END
  return;
}