Example #1
0
void erffit (real x, real a[], real *y, real dyda[])
{
/* Fuction
 *	y=(a1+a2)/2-(a1-a2)/2*erf((x-a3)/a4^2)
 */

    real erfarg;
    real erfval;
    real erfarg2;
    real derf;

    erfarg  = (x-a[3])/(a[4]*a[4]);
    erfarg2 = erfarg*erfarg;
    erfval  = gmx_erf(erfarg)/2;
    derf    = M_2_SQRTPI*(a[1]-a[2])/2*exp(-erfarg2)/(a[4]*a[4]);
    *y      = (a[1]+a[2])/2-(a[1]-a[2])*erfval;
    dyda[1] = 1/2-erfval;
    dyda[2] = 1/2+erfval;
    dyda[3] = derf;
    dyda[4] = 2*derf*erfarg;
}
Example #2
0
real ewald_LRcorrection(FILE *fplog,
                        int start, int end,
                        t_commrec *cr, int thread, t_forcerec *fr,
                        real *chargeA, real *chargeB,
                        gmx_bool calc_excl_corr,
                        t_blocka *excl, rvec x[],
                        matrix box, rvec mu_tot[],
                        int ewald_geometry, real epsilon_surface,
                        rvec *f, tensor vir,
                        real lambda, real *dvdlambda)
{
    int      i, i1, i2, j, k, m, iv, jv, q;
    atom_id *AA;
    double   q2sumA, q2sumB, Vexcl, dvdl_excl; /* Necessary for precision */
    real     one_4pi_eps;
    real     v, vc, qiA, qiB, dr, dr2, rinv, fscal, enercorr;
    real     Vself[2], Vdipole[2], rinv2, ewc = fr->ewaldcoeff, ewcdr;
    rvec     df, dx, mutot[2], dipcorrA, dipcorrB;
    tensor   dxdf;
    real     vol = box[XX][XX]*box[YY][YY]*box[ZZ][ZZ];
    real     L1, dipole_coeff, qqA, qqB, qqL, vr0;
    /*#define TABLES*/
#ifdef TABLES
    real        tabscale = fr->tabscale;
    real        eps, eps2, VV, FF, F, Y, Geps, Heps2, Fp, fijC, r1t;
    real       *VFtab = fr->coulvdwtab;
    int         n0, n1, nnn;
#endif
    gmx_bool    bFreeEnergy = (chargeB != NULL);
    gmx_bool    bMolPBC     = fr->bMolPBC;

    one_4pi_eps = ONE_4PI_EPS0/fr->epsilon_r;
    vr0         = ewc*M_2_SQRTPI;

    AA         = excl->a;
    Vexcl      = 0;
    dvdl_excl  = 0;
    q2sumA     = 0;
    q2sumB     = 0;
    Vdipole[0] = 0;
    Vdipole[1] = 0;
    L1         = 1.0-lambda;

    /* Note that we have to transform back to gromacs units, since
     * mu_tot contains the dipole in debye units (for output).
     */
    for (i = 0; (i < DIM); i++)
    {
        mutot[0][i] = mu_tot[0][i]*DEBYE2ENM;
        mutot[1][i] = mu_tot[1][i]*DEBYE2ENM;
        dipcorrA[i] = 0;
        dipcorrB[i] = 0;
    }
    dipole_coeff = 0;
    switch (ewald_geometry)
    {
        case eewg3D:
            if (epsilon_surface != 0)
            {
                dipole_coeff =
                    2*M_PI*ONE_4PI_EPS0/((2*epsilon_surface + fr->epsilon_r)*vol);
                for (i = 0; (i < DIM); i++)
                {
                    dipcorrA[i] = 2*dipole_coeff*mutot[0][i];
                    dipcorrB[i] = 2*dipole_coeff*mutot[1][i];
                }
            }
            break;
        case eewg3DC:
            dipole_coeff = 2*M_PI*one_4pi_eps/vol;
            dipcorrA[ZZ] = 2*dipole_coeff*mutot[0][ZZ];
            dipcorrB[ZZ] = 2*dipole_coeff*mutot[1][ZZ];
            break;
        default:
            gmx_incons("Unsupported Ewald geometry");
            break;
    }
    if (debug)
    {
        fprintf(debug, "dipcorr = %8.3f  %8.3f  %8.3f\n",
                dipcorrA[XX], dipcorrA[YY], dipcorrA[ZZ]);
        fprintf(debug, "mutot   = %8.3f  %8.3f  %8.3f\n",
                mutot[0][XX], mutot[0][YY], mutot[0][ZZ]);
    }

    clear_mat(dxdf);
    if ((calc_excl_corr || dipole_coeff != 0) && !bFreeEnergy)
    {
        for (i = start; (i < end); i++)
        {
            /* Initiate local variables (for this i-particle) to 0 */
            qiA = chargeA[i]*one_4pi_eps;

            if (calc_excl_corr)
            {
                i1  = excl->index[i];
                i2  = excl->index[i+1];

                /* Loop over excluded neighbours */
                for (j = i1; (j < i2); j++)
                {
                    k = AA[j];
                    /*
                     * First we must test whether k <> i, and then, because the
                     * exclusions are all listed twice i->k and k->i we must select
                     * just one of the two.
                     * As a minor optimization we only compute forces when the charges
                     * are non-zero.
                     */
                    if (k > i)
                    {
                        qqA = qiA*chargeA[k];
                        if (qqA != 0.0)
                        {
                            rvec_sub(x[i], x[k], dx);
                            if (bMolPBC)
                            {
                                /* Cheap pbc_dx, assume excluded pairs are at short distance. */
                                for (m = DIM-1; (m >= 0); m--)
                                {
                                    if (dx[m] > 0.5*box[m][m])
                                    {
                                        rvec_dec(dx, box[m]);
                                    }
                                    else if (dx[m] < -0.5*box[m][m])
                                    {
                                        rvec_inc(dx, box[m]);
                                    }
                                }
                            }
                            dr2 = norm2(dx);
                            /* Distance between two excluded particles may be zero in the
                             * case of shells
                             */
                            if (dr2 != 0)
                            {
                                rinv              = gmx_invsqrt(dr2);
                                rinv2             = rinv*rinv;
                                dr                = 1.0/rinv;
#ifdef TABLES
                                r1t               = tabscale*dr;
                                n0                = r1t;
                                assert(n0 >= 3);
                                n1                = 12*n0;
                                eps               = r1t-n0;
                                eps2              = eps*eps;
                                nnn               = n1;
                                Y                 = VFtab[nnn];
                                F                 = VFtab[nnn+1];
                                Geps              = eps*VFtab[nnn+2];
                                Heps2             = eps2*VFtab[nnn+3];
                                Fp                = F+Geps+Heps2;
                                VV                = Y+eps*Fp;
                                FF                = Fp+Geps+2.0*Heps2;
                                vc                = qqA*(rinv-VV);
                                fijC              = qqA*FF;
                                Vexcl            += vc;

                                fscal             = vc*rinv2+fijC*tabscale*rinv;
                                /* End of tabulated interaction part */
#else

                                /* This is the code you would want instead if not using
                                 * tables:
                                 */
                                ewcdr   = ewc*dr;
                                vc      = qqA*gmx_erf(ewcdr)*rinv;
                                Vexcl  += vc;
#ifdef GMX_DOUBLE
                                /* Relative accuracy at R_ERF_R_INACC of 3e-10 */
#define       R_ERF_R_INACC 0.006
#else
                                /* Relative accuracy at R_ERF_R_INACC of 2e-5 */
#define       R_ERF_R_INACC 0.1
#endif
                                if (ewcdr > R_ERF_R_INACC)
                                {
                                    fscal = rinv2*(vc - qqA*ewc*M_2_SQRTPI*exp(-ewcdr*ewcdr));
                                }
                                else
                                {
                                    /* Use a fourth order series expansion for small ewcdr */
                                    fscal = ewc*ewc*qqA*vr0*(2.0/3.0 - 0.4*ewcdr*ewcdr);
                                }
#endif
                                /* The force vector is obtained by multiplication with the
                                 * distance vector
                                 */
                                svmul(fscal, dx, df);
                                rvec_inc(f[k], df);
                                rvec_dec(f[i], df);
                                for (iv = 0; (iv < DIM); iv++)
                                {
                                    for (jv = 0; (jv < DIM); jv++)
                                    {
                                        dxdf[iv][jv] += dx[iv]*df[jv];
                                    }
                                }
                            }
                            else
                            {
                                Vexcl += qqA*vr0;
                            }
                        }
                    }
                }
            }
            /* Dipole correction on force */
            if (dipole_coeff != 0)
            {
                for (j = 0; (j < DIM); j++)
                {
                    f[i][j] -= dipcorrA[j]*chargeA[i];
                }
            }
        }
    }
    else if (calc_excl_corr || dipole_coeff != 0)
    {
        for (i = start; (i < end); i++)
        {
            /* Initiate local variables (for this i-particle) to 0 */
            qiA = chargeA[i]*one_4pi_eps;
            qiB = chargeB[i]*one_4pi_eps;

            if (calc_excl_corr)
            {
                i1  = excl->index[i];
                i2  = excl->index[i+1];

                /* Loop over excluded neighbours */
                for (j = i1; (j < i2); j++)
                {
                    k = AA[j];
                    if (k > i)
                    {
                        qqA = qiA*chargeA[k];
                        qqB = qiB*chargeB[k];
                        if (qqA != 0.0 || qqB != 0.0)
                        {
                            qqL = L1*qqA + lambda*qqB;
                            rvec_sub(x[i], x[k], dx);
                            if (bMolPBC)
                            {
                                /* Cheap pbc_dx, assume excluded pairs are at short distance. */
                                for (m = DIM-1; (m >= 0); m--)
                                {
                                    if (dx[m] > 0.5*box[m][m])
                                    {
                                        rvec_dec(dx, box[m]);
                                    }
                                    else if (dx[m] < -0.5*box[m][m])
                                    {
                                        rvec_inc(dx, box[m]);
                                    }
                                }
                            }
                            dr2 = norm2(dx);
                            if (dr2 != 0)
                            {
                                rinv   = gmx_invsqrt(dr2);
                                rinv2  = rinv*rinv;
                                dr     = 1.0/rinv;
                                v      = gmx_erf(ewc*dr)*rinv;
                                vc     = qqL*v;
                                Vexcl += vc;
                                fscal  = rinv2*(vc-qqL*ewc*M_2_SQRTPI*exp(-ewc*ewc*dr2));
                                svmul(fscal, dx, df);
                                rvec_inc(f[k], df);
                                rvec_dec(f[i], df);
                                for (iv = 0; (iv < DIM); iv++)
                                {
                                    for (jv = 0; (jv < DIM); jv++)
                                    {
                                        dxdf[iv][jv] += dx[iv]*df[jv];
                                    }
                                }
                                dvdl_excl += (qqB - qqA)*v;
                            }
                            else
                            {
                                Vexcl     +=         qqL*vr0;
                                dvdl_excl += (qqB - qqA)*vr0;
                            }
                        }
                    }
                }
            }
            /* Dipole correction on force */
            if (dipole_coeff != 0)
            {
                for (j = 0; (j < DIM); j++)
                {
                    f[i][j] -= L1*dipcorrA[j]*chargeA[i]
                        + lambda*dipcorrB[j]*chargeB[i];
                }
            }
        }
    }
    for (iv = 0; (iv < DIM); iv++)
    {
        for (jv = 0; (jv < DIM); jv++)
        {
            vir[iv][jv] += 0.5*dxdf[iv][jv];
        }
    }


    Vself[0] = 0;
    Vself[1] = 0;
    /* Global corrections only on master process */
    if (MASTER(cr) && thread == 0)
    {
        for (q = 0; q < (bFreeEnergy ? 2 : 1); q++)
        {
            if (calc_excl_corr)
            {
                /* Self-energy correction */
                Vself[q] = ewc*one_4pi_eps*fr->q2sum[q]*M_1_SQRTPI;
            }

            /* Apply surface dipole correction:
             * correction = dipole_coeff * (dipole)^2
             */
            if (dipole_coeff != 0)
            {
                if (ewald_geometry == eewg3D)
                {
                    Vdipole[q] = dipole_coeff*iprod(mutot[q], mutot[q]);
                }
                else if (ewald_geometry == eewg3DC)
                {
                    Vdipole[q] = dipole_coeff*mutot[q][ZZ]*mutot[q][ZZ];
                }
            }
        }
    }

    if (!bFreeEnergy)
    {
        enercorr = Vdipole[0] - Vself[0] - Vexcl;
    }
    else
    {
        enercorr = L1*(Vdipole[0] - Vself[0])
            + lambda*(Vdipole[1] - Vself[1])
            - Vexcl;
        *dvdlambda += Vdipole[1] - Vself[1]
            - (Vdipole[0] - Vself[0]) - dvdl_excl;
    }

    if (debug)
    {
        fprintf(debug, "Long Range corrections for Ewald interactions:\n");
        fprintf(debug, "start=%d,natoms=%d\n", start, end-start);
        fprintf(debug, "q2sum = %g, Vself=%g\n",
                L1*q2sumA+lambda*q2sumB, L1*Vself[0]+lambda*Vself[1]);
        fprintf(debug, "Long Range correction: Vexcl=%g\n", Vexcl);
        if (MASTER(cr) && thread == 0)
        {
            if (epsilon_surface > 0 || ewald_geometry == eewg3DC)
            {
                fprintf(debug, "Total dipole correction: Vdipole=%g\n",
                        L1*Vdipole[0]+lambda*Vdipole[1]);
            }
        }
    }

    /* Return the correction to the energy */
    return enercorr;
}
static void fill_table(t_tabledata *td,int tp,const t_forcerec *fr)
{
  /* Fill the table according to the formulas in the manual.
   * In principle, we only need the potential and the second
   * derivative, but then we would have to do lots of calculations
   * in the inner loop. By precalculating some terms (see manual)
   * we get better eventual performance, despite a larger table.
   *
   * Since some of these higher-order terms are very small,
   * we always use double precision to calculate them here, in order
   * to avoid unnecessary loss of precision.
   */
#ifdef DEBUG_SWITCH
  FILE *fp;
#endif
  int  i;
  double reppow,p;
  double r1,rc,r12,r13;
  double r,r2,r6,rc6;
  double expr,Vtab,Ftab;
  /* Parameters for David's function */
  double A=0,B=0,C=0,A_3=0,B_4=0;
  /* Parameters for the switching function */
  double ksw,swi,swi1;
  /* Temporary parameters */
  gmx_bool bSwitch,bShift;
  double ewc=fr->ewaldcoeff;
  double isp= 0.564189583547756;
   
  bSwitch = ((tp == etabLJ6Switch) || (tp == etabLJ12Switch) || 
	     (tp == etabCOULSwitch) ||
	     (tp == etabEwaldSwitch) || (tp == etabEwaldUserSwitch));
  bShift  = ((tp == etabLJ6Shift) || (tp == etabLJ12Shift) || 
	     (tp == etabShift));

  reppow = fr->reppow;

  if (tprops[tp].bCoulomb) {
    r1 = fr->rcoulomb_switch;
    rc = fr->rcoulomb;
  } 
  else {
    r1 = fr->rvdw_switch;
    rc = fr->rvdw;
  }
  if (bSwitch)
    ksw  = 1.0/(pow5(rc-r1));
  else
    ksw  = 0.0;
  if (bShift) {
    if (tp == etabShift)
      p = 1;
    else if (tp == etabLJ6Shift) 
      p = 6; 
    else 
      p = reppow;
    
    A = p * ((p+1)*r1-(p+4)*rc)/(pow(rc,p+2)*pow2(rc-r1));
    B = -p * ((p+1)*r1-(p+3)*rc)/(pow(rc,p+2)*pow3(rc-r1));
    C = 1.0/pow(rc,p)-A/3.0*pow3(rc-r1)-B/4.0*pow4(rc-r1);
    if (tp == etabLJ6Shift) {
      A=-A;
      B=-B;
      C=-C;
    }
    A_3=A/3.0;
    B_4=B/4.0;
  }
  if (debug) { fprintf(debug,"Setting up tables\n"); fflush(debug); }
    
#ifdef DEBUG_SWITCH
  fp=xvgropen("switch.xvg","switch","r","s");
#endif
  
  for(i=td->nx0; (i<td->nx); i++) {
    r     = td->x[i];
    r2    = r*r;
    r6    = 1.0/(r2*r2*r2);
    if (gmx_within_tol(reppow,12.0,10*GMX_DOUBLE_EPS)) {
      r12 = r6*r6;
    } else {
      r12 = pow(r,-reppow);   
    }
    Vtab  = 0.0;
    Ftab  = 0.0;
    if (bSwitch) {
      /* swi is function, swi1 1st derivative and swi2 2nd derivative */
      /* The switch function is 1 for r<r1, 0 for r>rc, and smooth for
       * r1<=r<=rc. The 1st and 2nd derivatives are both zero at
       * r1 and rc.
       * ksw is just the constant 1/(rc-r1)^5, to save some calculations...
       */ 
      if(r<=r1) {
	swi  = 1.0;
	swi1 = 0.0;
      } else if (r>=rc) {
	swi  = 0.0;
	swi1 = 0.0;
      } else {
	swi      = 1 - 10*pow3(r-r1)*ksw*pow2(rc-r1) 
	  + 15*pow4(r-r1)*ksw*(rc-r1) - 6*pow5(r-r1)*ksw;
	swi1     = -30*pow2(r-r1)*ksw*pow2(rc-r1) 
	  + 60*pow3(r-r1)*ksw*(rc-r1) - 30*pow4(r-r1)*ksw;
      }
    }
    else { /* not really needed, but avoids compiler warnings... */
      swi  = 1.0;
      swi1 = 0.0;
    }
#ifdef DEBUG_SWITCH
    fprintf(fp,"%10g  %10g  %10g  %10g\n",r,swi,swi1,swi2);
#endif

    rc6 = rc*rc*rc;
    rc6 = 1.0/(rc6*rc6);

    switch (tp) {
    case etabLJ6:
      /* Dispersion */
      Vtab  = -r6;
      Ftab  = 6.0*Vtab/r;
      break;
    case etabLJ6Switch:
    case etabLJ6Shift:
      /* Dispersion */
      if (r < rc) {      
	Vtab  = -r6;
	Ftab  = 6.0*Vtab/r;
      }
      break;
    case etabLJ12:
      /* Repulsion */
      Vtab  = r12;
      Ftab  = reppow*Vtab/r;
      break;
    case etabLJ12Switch:
    case etabLJ12Shift:
      /* Repulsion */
      if (r < rc) {                
	Vtab  = r12;
	Ftab  = reppow*Vtab/r;
      }  
      break;
	case etabLJ6Encad:
        if(r < rc) {
            Vtab  = -(r6-6.0*(rc-r)*rc6/rc-rc6);
            Ftab  = -(6.0*r6/r-6.0*rc6/rc);
        } else { /* r>rc */ 
            Vtab  = 0;
            Ftab  = 0;
        } 
        break;
    case etabLJ12Encad:
        if(r < rc) {
            Vtab  = r12-12.0*(rc-r)*rc6*rc6/rc-1.0*rc6*rc6;
            Ftab  = 12.0*r12/r-12.0*rc6*rc6/rc;
        } else { /* r>rc */ 
            Vtab  = 0;
            Ftab  = 0;
        } 
        break;        
    case etabCOUL:
      Vtab  = 1.0/r;
      Ftab  = 1.0/r2;
      break;
    case etabCOULSwitch:
    case etabShift:
      if (r < rc) { 
	Vtab  = 1.0/r;
	Ftab  = 1.0/r2;
      }
      break;
    case etabEwald:
    case etabEwaldSwitch:
      Vtab  = gmx_erfc(ewc*r)/r;
      Ftab  = gmx_erfc(ewc*r)/r2+2*exp(-(ewc*ewc*r2))*ewc*isp/r;
      break;
    case etabEwaldUser:
    case etabEwaldUserSwitch:
      /* Only calculate minus the reciprocal space contribution */
      Vtab  = -gmx_erf(ewc*r)/r;
      Ftab  = -gmx_erf(ewc*r)/r2+2*exp(-(ewc*ewc*r2))*ewc*isp/r;
      break;
    case etabRF:
    case etabRF_ZERO:
      Vtab  = 1.0/r      +   fr->k_rf*r2 - fr->c_rf;
      Ftab  = 1.0/r2     - 2*fr->k_rf*r;
      if (tp == etabRF_ZERO && r >= rc) {
	Vtab = 0;
	Ftab = 0;
      }
      break;
    case etabEXPMIN:
      expr  = exp(-r);
      Vtab  = expr;
      Ftab  = expr;
      break;
    case etabCOULEncad:
        if(r < rc) {
            Vtab  = 1.0/r-(rc-r)/(rc*rc)-1.0/rc;
            Ftab  = 1.0/r2-1.0/(rc*rc);
        } else { /* r>rc */ 
            Vtab  = 0;
            Ftab  = 0;
        } 
        break;
    default:
      gmx_fatal(FARGS,"Table type %d not implemented yet. (%s,%d)",
		  tp,__FILE__,__LINE__);
    }
    if (bShift) {
      /* Normal coulomb with cut-off correction for potential */
      if (r < rc) {
	Vtab -= C;
	/* If in Shifting range add something to it */
	if (r > r1) {
	  r12 = (r-r1)*(r-r1);
	  r13 = (r-r1)*r12;
	  Vtab  += - A_3*r13 - B_4*r12*r12;
	  Ftab  +=   A*r12 + B*r13;
	}
      }
    }

    if (ETAB_USER(tp)) {
      Vtab += td->v[i];
      Ftab += td->f[i];
    }

    if ((r > r1) && bSwitch) {
      Ftab = Ftab*swi - Vtab*swi1;
      Vtab = Vtab*swi;
    }  
    
    /* Convert to single precision when we store to mem */
    td->v[i]  = Vtab;
    td->f[i]  = Ftab;
  }

  /* Continue the table linearly from nx0 to 0.
   * These values are only required for energy minimization with overlap or TPI.
   */
  for(i=td->nx0-1; i>=0; i--) {
    td->v[i] = td->v[i+1] + td->f[i+1]*(td->x[i+1] - td->x[i]);
    td->f[i] = td->f[i+1];
  }

#ifdef DEBUG_SWITCH
  gmx_fio_fclose(fp);
#endif
}
Example #4
0
void
gmx_nb_free_energy_kernel(t_nblist *                nlist,
                          rvec *                    xx,
                          rvec *                    ff,
                          t_forcerec *              fr,
                          t_mdatoms *               mdatoms,
                          nb_kernel_data_t *        kernel_data,
                          t_nrnb *                  nrnb)
{

#define  STATE_A  0
#define  STATE_B  1
#define  NSTATES  2
    int           i, j, n, ii, is3, ii3, k, nj0, nj1, jnr, j3, ggid;
    real          shX, shY, shZ;
    real          Fscal, FscalC[NSTATES], FscalV[NSTATES], tx, ty, tz;
    real          Vcoul[NSTATES], Vvdw[NSTATES];
    real          rinv6, r, rt, rtC, rtV;
    real          iqA, iqB;
    real          qq[NSTATES], vctot, krsq;
    int           ntiA, ntiB, tj[NSTATES];
    real          Vvdw6, Vvdw12, vvtot;
    real          ix, iy, iz, fix, fiy, fiz;
    real          dx, dy, dz, rsq, rinv;
    real          c6[NSTATES], c12[NSTATES];
    real          LFC[NSTATES], LFV[NSTATES], DLF[NSTATES];
    double        dvdl_coul, dvdl_vdw;
    real          lfac_coul[NSTATES], dlfac_coul[NSTATES], lfac_vdw[NSTATES], dlfac_vdw[NSTATES];
    real          sigma6[NSTATES], alpha_vdw_eff, alpha_coul_eff, sigma2_def, sigma2_min;
    real          rp, rpm2, rC, rV, rinvC, rpinvC, rinvV, rpinvV;
    real          sigma2[NSTATES], sigma_pow[NSTATES], sigma_powm2[NSTATES], rs, rs2;
    int           do_coultab, do_vdwtab, do_tab, tab_elemsize;
    int           n0, n1C, n1V, nnn;
    real          Y, F, G, H, Fp, Geps, Heps2, epsC, eps2C, epsV, eps2V, VV, FF;
    int           icoul, ivdw;
    int           nri;
    int *         iinr;
    int *         jindex;
    int *         jjnr;
    int *         shift;
    int *         gid;
    int *         typeA;
    int *         typeB;
    int           ntype;
    real *        shiftvec;
    real          dvdl_part;
    real *        fshift;
    real          tabscale;
    real *        VFtab;
    real *        x;
    real *        f;
    real          facel, krf, crf;
    real *        chargeA;
    real *        chargeB;
    real          sigma6_min, sigma6_def, lam_power, sc_power, sc_r_power;
    real          alpha_coul, alpha_vdw, lambda_coul, lambda_vdw, ewc;
    real *        nbfp;
    real *        dvdl;
    real *        Vv;
    real *        Vc;
    gmx_bool      bDoForces;
    real          rcoulomb, rvdw, sh_invrc6;
    gmx_bool      bExactElecCutoff, bExactVdwCutoff;
    real          rcutoff, rcutoff2, rswitch, d, d2, swV3, swV4, swV5, swF2, swF3, swF4, sw, dsw, rinvcorr;

    x                   = xx[0];
    f                   = ff[0];

    fshift              = fr->fshift[0];
    Vc                  = kernel_data->energygrp_elec;
    Vv                  = kernel_data->energygrp_vdw;
    tabscale            = kernel_data->table_elec_vdw->scale;
    VFtab               = kernel_data->table_elec_vdw->data;

    nri                 = nlist->nri;
    iinr                = nlist->iinr;
    jindex              = nlist->jindex;
    jjnr                = nlist->jjnr;
    icoul               = nlist->ielec;
    ivdw                = nlist->ivdw;
    shift               = nlist->shift;
    gid                 = nlist->gid;

    shiftvec            = fr->shift_vec[0];
    chargeA             = mdatoms->chargeA;
    chargeB             = mdatoms->chargeB;
    facel               = fr->epsfac;
    krf                 = fr->k_rf;
    crf                 = fr->c_rf;
    ewc                 = fr->ewaldcoeff;
    Vc                  = kernel_data->energygrp_elec;
    typeA               = mdatoms->typeA;
    typeB               = mdatoms->typeB;
    ntype               = fr->ntype;
    nbfp                = fr->nbfp;
    Vv                  = kernel_data->energygrp_vdw;
    tabscale            = kernel_data->table_elec_vdw->scale;
    VFtab               = kernel_data->table_elec_vdw->data;
    lambda_coul         = kernel_data->lambda[efptCOUL];
    lambda_vdw          = kernel_data->lambda[efptVDW];
    dvdl                = kernel_data->dvdl;
    alpha_coul          = fr->sc_alphacoul;
    alpha_vdw           = fr->sc_alphavdw;
    lam_power           = fr->sc_power;
    sc_r_power          = fr->sc_r_power;
    sigma6_def          = fr->sc_sigma6_def;
    sigma6_min          = fr->sc_sigma6_min;
    bDoForces           = kernel_data->flags & GMX_NONBONDED_DO_FORCE;

    rcoulomb            = fr->rcoulomb;
    rvdw                = fr->rvdw;
    sh_invrc6           = fr->ic->sh_invrc6;

    if (fr->coulomb_modifier == eintmodPOTSWITCH || fr->vdw_modifier == eintmodPOTSWITCH)
    {
        rcutoff         = (fr->coulomb_modifier == eintmodPOTSWITCH) ? fr->rcoulomb : fr->rvdw;
        rcutoff2        = rcutoff*rcutoff;
        rswitch         = (fr->coulomb_modifier == eintmodPOTSWITCH) ? fr->rcoulomb_switch : fr->rvdw_switch;
        d               = rcutoff-rswitch;
        swV3            = -10.0/(d*d*d);
        swV4            =  15.0/(d*d*d*d);
        swV5            =  -6.0/(d*d*d*d*d);
        swF2            = -30.0/(d*d*d);
        swF3            =  60.0/(d*d*d*d);
        swF4            = -30.0/(d*d*d*d*d);
    }
    else
    {
        /* Stupid compilers dont realize these variables will not be used */
        rswitch         = 0.0;
        swV3            = 0.0;
        swV4            = 0.0;
        swV5            = 0.0;
        swF2            = 0.0;
        swF3            = 0.0;
        swF4            = 0.0;
    }

    bExactElecCutoff    = (fr->coulomb_modifier != eintmodNONE) || fr->eeltype == eelRF_ZERO;
    bExactVdwCutoff     = (fr->vdw_modifier != eintmodNONE);

    /* fix compiler warnings */
    nj1   = 0;
    n1C   = n1V   = 0;
    epsC  = epsV  = 0;
    eps2C = eps2V = 0;

    dvdl_coul  = 0;
    dvdl_vdw   = 0;

    /* Lambda factor for state A, 1-lambda*/
    LFC[STATE_A] = 1.0 - lambda_coul;
    LFV[STATE_A] = 1.0 - lambda_vdw;

    /* Lambda factor for state B, lambda*/
    LFC[STATE_B] = lambda_coul;
    LFV[STATE_B] = lambda_vdw;

    /*derivative of the lambda factor for state A and B */
    DLF[STATE_A] = -1;
    DLF[STATE_B] = 1;

    for (i = 0; i < NSTATES; i++)
    {
        lfac_coul[i]  = (lam_power == 2 ? (1-LFC[i])*(1-LFC[i]) : (1-LFC[i]));
        dlfac_coul[i] = DLF[i]*lam_power/sc_r_power*(lam_power == 2 ? (1-LFC[i]) : 1);
        lfac_vdw[i]   = (lam_power == 2 ? (1-LFV[i])*(1-LFV[i]) : (1-LFV[i]));
        dlfac_vdw[i]  = DLF[i]*lam_power/sc_r_power*(lam_power == 2 ? (1-LFV[i]) : 1);
    }
    /* precalculate */
    sigma2_def = pow(sigma6_def, 1.0/3.0);
    sigma2_min = pow(sigma6_min, 1.0/3.0);

    /* Ewald (not PME) table is special (icoul==enbcoulFEWALD) */

    do_coultab = (icoul == GMX_NBKERNEL_ELEC_CUBICSPLINETABLE);
    do_vdwtab  = (ivdw == GMX_NBKERNEL_VDW_CUBICSPLINETABLE);

    do_tab = do_coultab || do_vdwtab;

    /* we always use the combined table here */
    tab_elemsize = 12;

    for (n = 0; (n < nri); n++)
    {
        is3              = 3*shift[n];
        shX              = shiftvec[is3];
        shY              = shiftvec[is3+1];
        shZ              = shiftvec[is3+2];
        nj0              = jindex[n];
        nj1              = jindex[n+1];
        ii               = iinr[n];
        ii3              = 3*ii;
        ix               = shX + x[ii3+0];
        iy               = shY + x[ii3+1];
        iz               = shZ + x[ii3+2];
        iqA              = facel*chargeA[ii];
        iqB              = facel*chargeB[ii];
        ntiA             = 2*ntype*typeA[ii];
        ntiB             = 2*ntype*typeB[ii];
        vctot            = 0;
        vvtot            = 0;
        fix              = 0;
        fiy              = 0;
        fiz              = 0;

        for (k = nj0; (k < nj1); k++)
        {
            jnr              = jjnr[k];
            j3               = 3*jnr;
            dx               = ix - x[j3];
            dy               = iy - x[j3+1];
            dz               = iz - x[j3+2];
            rsq              = dx*dx+dy*dy+dz*dz;
            rinv             = gmx_invsqrt(rsq);
            r                = rsq*rinv;
            if (sc_r_power == 6.0)
            {
                rpm2             = rsq*rsq;  /* r4 */
                rp               = rpm2*rsq; /* r6 */
            }
            else if (sc_r_power == 48.0)
            {
                rp               = rsq*rsq*rsq; /* r6 */
                rp               = rp*rp;       /* r12 */
                rp               = rp*rp;       /* r24 */
                rp               = rp*rp;       /* r48 */
                rpm2             = rp/rsq;      /* r46 */
            }
            else
            {
                rp             = pow(r, sc_r_power);  /* not currently supported as input, but can handle it */
                rpm2           = rp/rsq;
            }

            tj[STATE_A]      = ntiA+2*typeA[jnr];
            tj[STATE_B]      = ntiB+2*typeB[jnr];
            qq[STATE_A]      = iqA*chargeA[jnr];
            qq[STATE_B]      = iqB*chargeB[jnr];

            for (i = 0; i < NSTATES; i++)
            {

                c6[i]              = nbfp[tj[i]];
                c12[i]             = nbfp[tj[i]+1];
                if ((c6[i] > 0) && (c12[i] > 0))
                {
                    /* c12 is stored scaled with 12.0 and c6 is scaled with 6.0 - correct for this */
                    sigma6[i]       = 0.5*c12[i]/c6[i];
                    sigma2[i]       = pow(sigma6[i], 1.0/3.0);
                    /* should be able to get rid of this ^^^ internal pow call eventually.  Will require agreement on
                       what data to store externally.  Can't be fixed without larger scale changes, so not 4.6 */
                    if (sigma6[i] < sigma6_min)   /* for disappearing coul and vdw with soft core at the same time */
                    {
                        sigma6[i] = sigma6_min;
                        sigma2[i] = sigma2_min;
                    }
                }
                else
                {
                    sigma6[i]       = sigma6_def;
                    sigma2[i]       = sigma2_def;
                }
                if (sc_r_power == 6.0)
                {
                    sigma_pow[i]    = sigma6[i];
                    sigma_powm2[i]  = sigma6[i]/sigma2[i];
                }
                else if (sc_r_power == 48.0)
                {
                    sigma_pow[i]    = sigma6[i]*sigma6[i];       /* sigma^12 */
                    sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^24 */
                    sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^48 */
                    sigma_powm2[i]  = sigma_pow[i]/sigma2[i];
                }
                else
                {    /* not really supported as input, but in here for testing the general case*/
                    sigma_pow[i]    = pow(sigma2[i], sc_r_power/2);
                    sigma_powm2[i]  = sigma_pow[i]/(sigma2[i]);
                }
            }

            /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
            if ((c12[STATE_A] > 0) && (c12[STATE_B] > 0))
            {
                alpha_vdw_eff    = 0;
                alpha_coul_eff   = 0;
            }
            else
            {
                alpha_vdw_eff    = alpha_vdw;
                alpha_coul_eff   = alpha_coul;
            }

            for (i = 0; i < NSTATES; i++)
            {
                FscalC[i]    = 0;
                FscalV[i]    = 0;
                Vcoul[i]     = 0;
                Vvdw[i]      = 0;

                /* Only spend time on A or B state if it is non-zero */
                if ( (qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0) )
                {

                    /* this section has to be inside the loop becaue of the dependence on sigma_pow */
                    rpinvC         = 1.0/(alpha_coul_eff*lfac_coul[i]*sigma_pow[i]+rp);
                    rinvC          = pow(rpinvC, 1.0/sc_r_power);
                    rC             = 1.0/rinvC;

                    rpinvV         = 1.0/(alpha_vdw_eff*lfac_vdw[i]*sigma_pow[i]+rp);
                    rinvV          = pow(rpinvV, 1.0/sc_r_power);
                    rV             = 1.0/rinvV;

                    if (do_tab)
                    {
                        rtC        = rC*tabscale;
                        n0         = rtC;
                        epsC       = rtC-n0;
                        eps2C      = epsC*epsC;
                        n1C        = tab_elemsize*n0;

                        rtV        = rV*tabscale;
                        n0         = rtV;
                        epsV       = rtV-n0;
                        eps2V      = epsV*epsV;
                        n1V        = tab_elemsize*n0;
                    }

                    /* With Ewald and soft-core we should put the cut-off on r,
                     * not on the soft-cored rC, as the real-space and
                     * reciprocal space contributions should (almost) cancel.
                     */
                    if (qq[i] != 0 &&
                        !(bExactElecCutoff &&
                          ((icoul != GMX_NBKERNEL_ELEC_EWALD && rC >= rcoulomb) ||
                           (icoul == GMX_NBKERNEL_ELEC_EWALD && r >= rcoulomb))))
                    {
                        switch (icoul)
                        {
                            case GMX_NBKERNEL_ELEC_COULOMB:
                            case GMX_NBKERNEL_ELEC_EWALD:
                                /* simple cutoff (yes, ewald is done all on direct space for free energy) */
                                Vcoul[i]   = qq[i]*rinvC;
                                FscalC[i]  = Vcoul[i]*rpinvC;
                                break;

                            case GMX_NBKERNEL_ELEC_REACTIONFIELD:
                                /* reaction-field */
                                Vcoul[i]   = qq[i]*(rinvC+krf*rC*rC-crf);
                                FscalC[i]  = qq[i]*(rinvC*rpinvC-2.0*krf);
                                break;

                            case GMX_NBKERNEL_ELEC_CUBICSPLINETABLE:
                                /* non-Ewald tabulated coulomb */
                                nnn        = n1C;
                                Y          = VFtab[nnn];
                                F          = VFtab[nnn+1];
                                Geps       = epsC*VFtab[nnn+2];
                                Heps2      = eps2C*VFtab[nnn+3];
                                Fp         = F+Geps+Heps2;
                                VV         = Y+epsC*Fp;
                                FF         = Fp+Geps+2.0*Heps2;
                                Vcoul[i]   = qq[i]*VV;
                                FscalC[i]  = -qq[i]*tabscale*FF*rC*rpinvC;
                                break;

                            default:
                                FscalC[i]  = 0.0;
                                Vcoul[i]   = 0.0;
                                break;
                        }

                        if (fr->coulomb_modifier == eintmodPOTSWITCH)
                        {
                            d                = rC-rswitch;
                            d                = (d > 0.0) ? d : 0.0;
                            d2               = d*d;
                            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
                            dsw              = d2*(swF2+d*(swF3+d*swF4));

                            Vcoul[i]        *= sw;
                            FscalC[i]        = FscalC[i]*sw + Vcoul[i]*dsw;
                        }
                    }

                    if ((c6[i] != 0 || c12[i] != 0) &&
                        !(bExactVdwCutoff && rV >= rvdw))
                    {
                        switch (ivdw)
                        {
                            case GMX_NBKERNEL_VDW_LENNARDJONES:
                                /* cutoff LJ */
                                if (sc_r_power == 6.0)
                                {
                                    rinv6            = rpinvV;
                                }
                                else
                                {
                                    rinv6            = pow(rinvV, 6.0);
                                }
                                Vvdw6            = c6[i]*rinv6;
                                Vvdw12           = c12[i]*rinv6*rinv6;
                                if (fr->vdw_modifier == eintmodPOTSHIFT)
                                {
                                    Vvdw[i]          = ( (Vvdw12-c12[i]*sh_invrc6*sh_invrc6)*(1.0/12.0)
                                                         -(Vvdw6-c6[i]*sh_invrc6)*(1.0/6.0));
                                }
                                else
                                {
                                    Vvdw[i]          = Vvdw12*(1.0/12.0)-Vvdw6*(1.0/6.0);
                                }
                                FscalV[i]        = (Vvdw12-Vvdw6)*rpinvV;
                                break;

                            case GMX_NBKERNEL_VDW_BUCKINGHAM:
                                gmx_fatal(FARGS, "Buckingham free energy not supported.");
                                break;

                            case GMX_NBKERNEL_VDW_CUBICSPLINETABLE:
                                /* Table LJ */
                                nnn = n1V+4;
                                /* dispersion */
                                Y          = VFtab[nnn];
                                F          = VFtab[nnn+1];
                                Geps       = epsV*VFtab[nnn+2];
                                Heps2      = eps2V*VFtab[nnn+3];
                                Fp         = F+Geps+Heps2;
                                VV         = Y+epsV*Fp;
                                FF         = Fp+Geps+2.0*Heps2;
                                Vvdw[i]   += c6[i]*VV;
                                FscalV[i] -= c6[i]*tabscale*FF*rV*rpinvV;

                                /* repulsion */
                                Y          = VFtab[nnn+4];
                                F          = VFtab[nnn+5];
                                Geps       = epsV*VFtab[nnn+6];
                                Heps2      = eps2V*VFtab[nnn+7];
                                Fp         = F+Geps+Heps2;
                                VV         = Y+epsV*Fp;
                                FF         = Fp+Geps+2.0*Heps2;
                                Vvdw[i]   += c12[i]*VV;
                                FscalV[i] -= c12[i]*tabscale*FF*rV*rpinvV;
                                break;

                            default:
                                Vvdw[i]    = 0.0;
                                FscalV[i]  = 0.0;
                                break;
                        }

                        if (fr->vdw_modifier == eintmodPOTSWITCH)
                        {
                            d                = rV-rswitch;
                            d                = (d > 0.0) ? d : 0.0;
                            d2               = d*d;
                            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
                            dsw              = d2*(swF2+d*(swF3+d*swF4));

                            Vvdw[i]         *= sw;
                            FscalV[i]        = FscalV[i]*sw + Vvdw[i]*dsw;

                            FscalV[i]        = (rV < rvdw) ? FscalV[i] : 0.0;
                            Vvdw[i]          = (rV < rvdw) ? Vvdw[i] : 0.0;
                        }
                    }
                }
            }

            Fscal = 0;

            if (icoul == GMX_NBKERNEL_ELEC_EWALD &&
                !(bExactElecCutoff && r >= rcoulomb))
            {
                /* because we compute the softcore normally,
                   we have to remove the ewald short range portion. Done outside of
                   the states loop because this part doesn't depend on the scaled R */

#ifdef GMX_DOUBLE
                /* Relative accuracy at R_ERF_R_INACC of 3e-10 */
#define         R_ERF_R_INACC 0.006
#else
                /* Relative accuracy at R_ERF_R_INACC of 2e-5 */
#define         R_ERF_R_INACC 0.1
#endif
                if (ewc*r > R_ERF_R_INACC)
                {
                    VV    = gmx_erf(ewc*r)*rinv;
                    FF    = rinv*rinv*(VV - ewc*M_2_SQRTPI*exp(-ewc*ewc*rsq));
                }
                else
                {
                    VV    = ewc*M_2_SQRTPI;
                    FF    = ewc*ewc*ewc*M_2_SQRTPI*(2.0/3.0 - 0.4*ewc*ewc*rsq);
                }

                for (i = 0; i < NSTATES; i++)
                {
                    vctot      -= LFC[i]*qq[i]*VV;
                    Fscal      -= LFC[i]*qq[i]*FF;
                    dvdl_coul  -= (DLF[i]*qq[i])*VV;
                }
            }

            /* Assemble A and B states */
            for (i = 0; i < NSTATES; i++)
            {
                vctot         += LFC[i]*Vcoul[i];
                vvtot         += LFV[i]*Vvdw[i];

                Fscal         += LFC[i]*FscalC[i]*rpm2;
                Fscal         += LFV[i]*FscalV[i]*rpm2;

                dvdl_coul     += Vcoul[i]*DLF[i] + LFC[i]*alpha_coul_eff*dlfac_coul[i]*FscalC[i]*sigma_pow[i];
                dvdl_vdw      += Vvdw[i]*DLF[i] + LFV[i]*alpha_vdw_eff*dlfac_vdw[i]*FscalV[i]*sigma_pow[i];
            }

            if (bDoForces)
            {
                tx         = Fscal*dx;
                ty         = Fscal*dy;
                tz         = Fscal*dz;
                fix        = fix + tx;
                fiy        = fiy + ty;
                fiz        = fiz + tz;
                f[j3]      = f[j3]   - tx;
                f[j3+1]    = f[j3+1] - ty;
                f[j3+2]    = f[j3+2] - tz;
            }
        }

        if (bDoForces)
        {
            f[ii3]         = f[ii3]        + fix;
            f[ii3+1]       = f[ii3+1]      + fiy;
            f[ii3+2]       = f[ii3+2]      + fiz;
            fshift[is3]    = fshift[is3]   + fix;
            fshift[is3+1]  = fshift[is3+1] + fiy;
            fshift[is3+2]  = fshift[is3+2] + fiz;
        }
        ggid               = gid[n];
        Vc[ggid]           = Vc[ggid] + vctot;
        Vv[ggid]           = Vv[ggid] + vvtot;
    }

    dvdl[efptCOUL]     += dvdl_coul;
    dvdl[efptVDW]      += dvdl_vdw;

    /* Estimate flops, average for free energy stuff:
     * 12  flops per outer iteration
     * 150 flops per inner iteration
     */
    inc_nrnb(nrnb, eNR_NBKERNEL_FREE_ENERGY, nlist->nri*12 + nlist->jindex[n]*150);
}