Пример #1
0
real calc_ewaldcoeff(real rc, real dtol)
{
    real x = 5, low, high;
    int  n, i = 0;


    do
    {
        i++;
        x *= 2;
    }
    while (gmx_erfc(x*rc) > dtol);

    n    = i+60; /* search tolerance is 2^-60 */
    low  = 0;
    high = x;
    for (i = 0; i < n; i++)
    {
        x = (low+high)/2;
        if (gmx_erfc(x*rc) > dtol)
        {
            low = x;
        }
        else
        {
            high = x;
        }
    }
    return x;
}
real calc_ewaldcoeff_q(real rc, real rtol)
{
    real beta = 5, low, high;
    int  n, i = 0;

    do
    {
        i++;
        beta *= 2;
    }
    while (gmx_erfc(beta*rc) > rtol);

    /* Do a binary search with tolerance 2^-60 */
    n    = i+60;
    low  = 0;
    high = beta;
    for (i = 0; i < n; i++)
    {
        beta = (low+high)/2;
        if (gmx_erfc(beta*rc) > rtol)
        {
            low = beta;
        }
        else
        {
            high = beta;
        }
    }
    return beta;
}
Пример #3
0
/* The scale (1/spacing) for third order spline interpolation
 * of the Ewald mesh contribution which needs to be subtracted
 * from the non-bonded interactions.
 */
real ewald_spline3_table_scale(real ewaldcoeff,real rc)
{
    double erf_x_d3=1.0522; /* max of (erf(x)/x)''' */
    double ftol,etol;
    double sc_f,sc_e;

    /* Force tolerance: single precision accuracy */
    ftol = GMX_FLOAT_EPS;
    sc_f = sqrt(erf_x_d3/(6*4*ftol*ewaldcoeff))*ewaldcoeff;

    /* Energy tolerance: 10x more accurate than the cut-off jump */
    etol = 0.1*gmx_erfc(ewaldcoeff*rc);
    etol = max(etol,GMX_REAL_EPS);
    sc_e = pow(erf_x_d3/(6*12*sqrt(3)*etol),1.0/3.0)*ewaldcoeff;

    return max(sc_f,sc_e);
}
Пример #4
0
static void approx_2dof(real s2, real x, real *shift, real *scale)
{
    /* A particle with 1 DOF constrained has 2 DOFs instead of 3.
     * This code is also used for particles with multiple constraints,
     * in which case we overestimate the displacement.
     * The 2DOF distribution is sqrt(pi/2)*erfc(r/(sqrt(2)*s))/(2*s).
     * We approximate this with scale*Gaussian(s,r+shift),
     * by matching the distribution value and derivative at x.
     * This is a tight overestimate for all r>=0 at any s and x.
     */
    real ex, er;

    ex = exp(-x*x/(2*s2));
    er = gmx_erfc(x/sqrt(2*s2));

    *shift = -x + sqrt(2*s2/M_PI)*ex/er;
    *scale = 0.5*M_PI*exp(ex*ex/(M_PI*er*er))*er;
}
Пример #5
0
gmx_inline static void calc_exponentials_lj(int start, int end, real *r, real *tmp2, real *d)
{
    int  kx;
    real mk;
    for (kx = start; kx < end; kx++)
    {
        d[kx] = 1.0/d[kx];
    }

    for (kx = start; kx < end; kx++)
    {
        r[kx] = exp(r[kx]);
    }

    for (kx = start; kx < end; kx++)
    {
        mk       = tmp2[kx];
        tmp2[kx] = sqrt(M_PI)*mk*gmx_erfc(mk);
    }
}
/* Estimate the error of the SPME Ewald sum. This estimate is based upon
 * a) a homogeneous distribution of the charges
 * b) a total charge of zero.
 */
static void estimate_PME_error(t_inputinfo *info, t_state *state, 
        gmx_mtop_t *mtop, FILE *fp_out, gmx_bool bVerbose, unsigned int seed,
        t_commrec *cr)
{
    rvec *x=NULL; /* The coordinates */
    real *q=NULL; /* The charges     */
    real  edir=0.0; /* real space error */
    real  erec=0.0; /* reciprocal space error */
    real  derr=0.0; /* difference of real and reciprocal space error */
    real  derr0=0.0; /* difference of real and reciprocal space error */
    real  beta=0.0; /* splitting parameter beta */
    real  beta0=0.0; /* splitting parameter beta */
    int ncharges; /* The number of atoms with charges */
    int nsamples; /* The number of samples used for the calculation of the
                   * self-energy error term */
    int i=0;
    
    if (MASTER(cr))
        fprintf(fp_out, "\n--- PME ERROR ESTIMATE ---\n");    

    /* Prepare an x and q array with only the charged atoms */
    ncharges = prepare_x_q(&q, &x, mtop, state->x, cr);
    if (MASTER(cr))
    {
        calc_q2all(mtop, &(info->q2all), &(info->q2allnr));
        info->ewald_rtol[0]=gmx_erfc(info->rcoulomb[0]*info->ewald_beta[0]);
        /* Write some info to log file */
        fprintf(fp_out, "Box volume              : %g nm^3\n", info->volume);
        fprintf(fp_out, "Number of charged atoms : %d (total atoms %d)\n",ncharges, info->natoms);
        fprintf(fp_out, "Coulomb radius          : %g nm\n", info->rcoulomb[0]);
        fprintf(fp_out, "Ewald_rtol              : %g\n", info->ewald_rtol[0]);
        fprintf(fp_out, "Ewald parameter beta    : %g\n", info->ewald_beta[0]);
        fprintf(fp_out, "Interpolation order     : %d\n", info->pme_order[0]);
        fprintf(fp_out, "Fourier grid (nx,ny,nz) : %d x %d x %d\n", 
                info->nkx[0],info->nky[0],info->nkz[0]);
        fflush(fp_out);
        
    }

    if (PAR(cr))
        bcast_info(info, cr);


    /* Calculate direct space error */
    info->e_dir[0] = estimate_direct(info);

    /* Calculate reciprocal space error */
    info->e_rec[0] = estimate_reciprocal(info, x, q, ncharges, fp_out, bVerbose,
                                         seed, &nsamples, cr);

    if (PAR(cr))
        bcast_info(info, cr);
    
    if (MASTER(cr))
    {
        fprintf(fp_out, "Direct space error est. : %10.3e kJ/(mol*nm)\n", info->e_dir[0]);
        fprintf(fp_out, "Reciprocal sp. err. est.: %10.3e kJ/(mol*nm)\n", info->e_rec[0]);
        fprintf(fp_out, "Self-energy error term was estimated using %d samples\n", nsamples);
        fflush(fp_out);
        fprintf(stderr, "Direct space error est. : %10.3e kJ/(mol*nm)\n", info->e_dir[0]);
        fprintf(stderr, "Reciprocal sp. err. est.: %10.3e kJ/(mol*nm)\n", info->e_rec[0]);
    }

    i=0;

    if (info->bTUNE)
    {
        if(MASTER(cr))
            fprintf(stderr,"Starting tuning ...\n");
        edir=info->e_dir[0];
        erec=info->e_rec[0];
        derr0=edir-erec;
        beta0=info->ewald_beta[0];
        if (derr>0.0)
            info->ewald_beta[0]+=0.1;
        else
            info->ewald_beta[0]-=0.1;
        info->e_dir[0] = estimate_direct(info);
        info->e_rec[0] = estimate_reciprocal(info, x, q, ncharges, fp_out, bVerbose,
                                             seed, &nsamples, cr);

        if (PAR(cr))
            bcast_info(info, cr);
    
        
        edir=info->e_dir[0];
        erec=info->e_rec[0];
        derr=edir-erec;
        while ( fabs(derr/min(erec,edir)) > 1e-4)
        {
    
            beta=info->ewald_beta[0];
            beta-=derr*(info->ewald_beta[0]-beta0)/(derr-derr0);
            beta0=info->ewald_beta[0];
            info->ewald_beta[0]=beta;
            derr0=derr;

            info->e_dir[0] = estimate_direct(info);
            info->e_rec[0] = estimate_reciprocal(info, x, q, ncharges, fp_out, bVerbose,
                                                 seed, &nsamples, cr);

            if (PAR(cr))
                bcast_info(info, cr);
            
            edir=info->e_dir[0];
            erec=info->e_rec[0];
            derr=edir-erec;
            
            if (MASTER(cr))
            {
                i++;
                fprintf(stderr,"difference between real and rec. space error (step %d): %g\n",i,fabs(derr));
                fprintf(stderr,"old beta: %f\n",beta0);
                fprintf(stderr,"new beta: %f\n",beta);
            }
        }

        info->ewald_rtol[0]=gmx_erfc(info->rcoulomb[0]*info->ewald_beta[0]);

        if (MASTER(cr))
        {
            /* Write some info to log file */
            fflush(fp_out);
            fprintf(fp_out, "=========  After tuning ========\n");
            fprintf(fp_out, "Direct space error est. : %10.3e kJ/(mol*nm)\n", info->e_dir[0]);
            fprintf(fp_out, "Reciprocal sp. err. est.: %10.3e kJ/(mol*nm)\n", info->e_rec[0]);
            fprintf(stderr, "Direct space error est. : %10.3e kJ/(mol*nm)\n", info->e_dir[0]);
            fprintf(stderr, "Reciprocal sp. err. est.: %10.3e kJ/(mol*nm)\n", info->e_rec[0]);
            fprintf(fp_out, "Ewald_rtol              : %g\n", info->ewald_rtol[0]);
            fprintf(fp_out, "Ewald parameter beta    : %g\n", info->ewald_beta[0]);
            fflush(fp_out);
            
        }   

    }

}
Пример #7
0
void calc_verlet_buffer_size(const gmx_mtop_t *mtop, real boxvol,
                             const t_inputrec *ir,
                             real reference_temperature,
                             const verletbuf_list_setup_t *list_setup,
                             int *n_nonlin_vsite,
                             real *rlist)
{
    double                resolution;
    char                 *env;

    real                  particle_distance;
    real                  nb_clust_frac_pairs_not_in_list_at_cutoff;

    verletbuf_atomtype_t *att  = NULL;
    int                   natt = -1, i;
    double                reppow;
    real                  md1_ljd, d2_ljd, md3_ljd;
    real                  md1_ljr, d2_ljr, md3_ljr;
    real                  md1_el,  d2_el;
    real                  elfac;
    real                  kT_fac, mass_min;
    int                   ib0, ib1, ib;
    real                  rb, rl;
    real                  drift;

    if (reference_temperature < 0)
    {
        if (EI_MD(ir->eI) && ir->etc == etcNO)
        {
            /* This case should be handled outside calc_verlet_buffer_size */
            gmx_incons("calc_verlet_buffer_size called with an NVE ensemble and reference_temperature < 0");
        }

        /* We use the maximum temperature with multiple T-coupl groups.
         * We could use a per particle temperature, but since particles
         * interact, this might underestimate the buffer size.
         */
        reference_temperature = 0;
        for (i = 0; i < ir->opts.ngtc; i++)
        {
            if (ir->opts.tau_t[i] >= 0)
            {
                reference_temperature = max(reference_temperature,
                                            ir->opts.ref_t[i]);
            }
        }
    }

    /* Resolution of the buffer size */
    resolution = 0.001;

    env = getenv("GMX_VERLET_BUFFER_RES");
    if (env != NULL)
    {
        sscanf(env, "%lf", &resolution);
    }

    /* In an atom wise pair-list there would be no pairs in the list
     * beyond the pair-list cut-off.
     * However, we use a pair-list of groups vs groups of atoms.
     * For groups of 4 atoms, the parallelism of SSE instructions, only
     * 10% of the atoms pairs are not in the list just beyond the cut-off.
     * As this percentage increases slowly compared to the decrease of the
     * Gaussian displacement distribution over this range, we can simply
     * reduce the drift by this fraction.
     * For larger groups, e.g. of 8 atoms, this fraction will be lower,
     * so then buffer size will be on the conservative (large) side.
     *
     * Note that the formulas used here do not take into account
     * cancellation of errors which could occur by missing both
     * attractive and repulsive interactions.
     *
     * The only major assumption is homogeneous particle distribution.
     * For an inhomogeneous system, such as a liquid-vapor system,
     * the buffer will be underestimated. The actual energy drift
     * will be higher by the factor: local/homogeneous particle density.
     *
     * The results of this estimate have been checked againt simulations.
     * In most cases the real drift differs by less than a factor 2.
     */

    /* Worst case assumption: HCP packing of particles gives largest distance */
    particle_distance = pow(boxvol*sqrt(2)/mtop->natoms, 1.0/3.0);

    get_verlet_buffer_atomtypes(mtop, &att, &natt, n_nonlin_vsite);
    assert(att != NULL && natt >= 0);

    if (debug)
    {
        fprintf(debug, "particle distance assuming HCP packing: %f nm\n",
                particle_distance);
        fprintf(debug, "energy drift atom types: %d\n", natt);
    }

    reppow   = mtop->ffparams.reppow;
    md1_ljd  = 0;
    d2_ljd   = 0;
    md3_ljd  = 0;
    md1_ljr  = 0;
    d2_ljr   = 0;
    md3_ljr  = 0;
    if (ir->vdwtype == evdwCUT)
    {
        real sw_range, md3_pswf;

        switch (ir->vdw_modifier)
        {
            case eintmodNONE:
            case eintmodPOTSHIFT:
                /* -dV/dr of -r^-6 and r^-reppow */
                md1_ljd =     -6*pow(ir->rvdw, -7.0);
                md1_ljr = reppow*pow(ir->rvdw, -(reppow+1));
                /* The contribution of the higher derivatives is negligible */
                break;
            case eintmodFORCESWITCH:
                /* At the cut-off: V=V'=V''=0, so we use only V''' */
                md3_ljd  = -md3_force_switch(6.0,    ir->rvdw_switch, ir->rvdw);
                md3_ljr  =  md3_force_switch(reppow, ir->rvdw_switch, ir->rvdw);
                break;
            case eintmodPOTSWITCH:
                /* At the cut-off: V=V'=V''=0.
                 * V''' is given by the original potential times
                 * the third derivative of the switch function.
                 */
                sw_range  = ir->rvdw - ir->rvdw_switch;
                md3_pswf  = 60.0*pow(sw_range, -3.0);

                md3_ljd   = -pow(ir->rvdw, -6.0   )*md3_pswf;
                md3_ljr   =  pow(ir->rvdw, -reppow)*md3_pswf;
                break;
            default:
                gmx_incons("Unimplemented VdW modifier");
        }
    }
    else if (EVDW_PME(ir->vdwtype))
    {
        real b, r, br, br2, br4, br6;
        b        = calc_ewaldcoeff_lj(ir->rvdw, ir->ewald_rtol_lj);
        r        = ir->rvdw;
        br       = b*r;
        br2      = br*br;
        br4      = br2*br2;
        br6      = br4*br2;
        /* -dV/dr of g(br)*r^-6 [where g(x) = exp(-x^2)(1+x^2+x^4/2), see LJ-PME equations in manual] and r^-reppow */
        md1_ljd  = -exp(-br2)*(br6 + 3.0*br4 + 6.0*br2 + 6.0)*pow(r, -7.0);
        md1_ljr  = reppow*pow(r, -(reppow+1));
        /* The contribution of the higher derivatives is negligible */
    }
    else
    {
        gmx_fatal(FARGS, "Energy drift calculation is only implemented for plain cut-off Lennard-Jones interactions");
    }

    elfac = ONE_4PI_EPS0/ir->epsilon_r;

    /* Determine md=-dV/dr and dd=d^2V/dr^2 */
    md1_el = 0;
    d2_el  = 0;
    if (ir->coulombtype == eelCUT || EEL_RF(ir->coulombtype))
    {
        real eps_rf, k_rf;

        if (ir->coulombtype == eelCUT)
        {
            eps_rf = 1;
            k_rf   = 0;
        }
        else
        {
            eps_rf = ir->epsilon_rf/ir->epsilon_r;
            if (eps_rf != 0)
            {
                k_rf = pow(ir->rcoulomb, -3.0)*(eps_rf - ir->epsilon_r)/(2*eps_rf + ir->epsilon_r);
            }
            else
            {
                /* epsilon_rf = infinity */
                k_rf = 0.5*pow(ir->rcoulomb, -3.0);
            }
        }

        if (eps_rf > 0)
        {
            md1_el = elfac*(pow(ir->rcoulomb, -2.0) - 2*k_rf*ir->rcoulomb);
        }
        d2_el      = elfac*(2*pow(ir->rcoulomb, -3.0) + 2*k_rf);
    }
    else if (EEL_PME(ir->coulombtype) || ir->coulombtype == eelEWALD)
    {
        real b, rc, br;

        b      = calc_ewaldcoeff_q(ir->rcoulomb, ir->ewald_rtol);
        rc     = ir->rcoulomb;
        br     = b*rc;
        md1_el = elfac*(b*exp(-br*br)*M_2_SQRTPI/rc + gmx_erfc(br)/(rc*rc));
        d2_el  = elfac/(rc*rc)*(2*b*(1 + br*br)*exp(-br*br)*M_2_SQRTPI + 2*gmx_erfc(br)/rc);
    }
    else
    {
        gmx_fatal(FARGS, "Energy drift calculation is only implemented for Reaction-Field and Ewald electrostatics");
    }

    /* Determine the variance of the atomic displacement
     * over nstlist-1 steps: kT_fac
     * For inertial dynamics (not Brownian dynamics) the mass factor
     * is not included in kT_fac, it is added later.
     */
    if (ir->eI == eiBD)
    {
        /* Get the displacement distribution from the random component only.
         * With accurate integration the systematic (force) displacement
         * should be negligible (unless nstlist is extremely large, which
         * you wouldn't do anyhow).
         */
        kT_fac = 2*BOLTZ*reference_temperature*(ir->nstlist-1)*ir->delta_t;
        if (ir->bd_fric > 0)
        {
            /* This is directly sigma^2 of the displacement */
            kT_fac /= ir->bd_fric;

            /* Set the masses to 1 as kT_fac is the full sigma^2,
             * but we divide by m in ener_drift().
             */
            for (i = 0; i < natt; i++)
            {
                att[i].prop.mass = 1;
            }
        }
        else
        {
            real tau_t;

            /* Per group tau_t is not implemented yet, use the maximum */
            tau_t = ir->opts.tau_t[0];
            for (i = 1; i < ir->opts.ngtc; i++)
            {
                tau_t = max(tau_t, ir->opts.tau_t[i]);
            }

            kT_fac *= tau_t;
            /* This kT_fac needs to be divided by the mass to get sigma^2 */
        }
    }
    else
    {
        kT_fac = BOLTZ*reference_temperature*sqr((ir->nstlist-1)*ir->delta_t);
    }

    mass_min = att[0].prop.mass;
    for (i = 1; i < natt; i++)
    {
        mass_min = min(mass_min, att[i].prop.mass);
    }

    if (debug)
    {
        fprintf(debug, "md1_ljd %9.2e d2_ljd %9.2e md3_ljd %9.2e\n", md1_ljd, d2_ljd, md3_ljd);
        fprintf(debug, "md1_ljr %9.2e d2_ljr %9.2e md3_ljr %9.2e\n", md1_ljr, d2_ljr, md3_ljr);
        fprintf(debug, "md1_el  %9.2e d2_el  %9.2e\n", md1_el, d2_el);
        fprintf(debug, "sqrt(kT_fac) %f\n", sqrt(kT_fac));
        fprintf(debug, "mass_min %f\n", mass_min);
    }

    /* Search using bisection */
    ib0 = -1;
    /* The drift will be neglible at 5 times the max sigma */
    ib1 = (int)(5*2*sqrt(kT_fac/mass_min)/resolution) + 1;
    while (ib1 - ib0 > 1)
    {
        ib = (ib0 + ib1)/2;
        rb = ib*resolution;
        rl = max(ir->rvdw, ir->rcoulomb) + rb;

        /* Calculate the average energy drift at the last step
         * of the nstlist steps at which the pair-list is used.
         */
        drift = ener_drift(att, natt, &mtop->ffparams,
                           kT_fac,
                           md1_ljd, d2_ljd, md3_ljd,
                           md1_ljr, d2_ljr, md3_ljr,
                           md1_el,  d2_el,
                           rb,
                           rl, boxvol);

        /* Correct for the fact that we are using a Ni x Nj particle pair list
         * and not a 1 x 1 particle pair list. This reduces the drift.
         */
        /* We don't have a formula for 8 (yet), use 4 which is conservative */
        nb_clust_frac_pairs_not_in_list_at_cutoff =
            surface_frac(min(list_setup->cluster_size_i, 4),
                         particle_distance, rl)*
            surface_frac(min(list_setup->cluster_size_j, 4),
                         particle_distance, rl);
        drift *= nb_clust_frac_pairs_not_in_list_at_cutoff;

        /* Convert the drift to drift per unit time per atom */
        drift /= ir->nstlist*ir->delta_t*mtop->natoms;

        if (debug)
        {
            fprintf(debug, "ib %3d %3d %3d rb %.3f %dx%d fac %.3f drift %.1e\n",
                    ib0, ib, ib1, rb,
                    list_setup->cluster_size_i, list_setup->cluster_size_j,
                    nb_clust_frac_pairs_not_in_list_at_cutoff,
                    drift);
        }

        if (fabs(drift) > ir->verletbuf_tol)
        {
            ib0 = ib;
        }
        else
        {
            ib1 = ib;
        }
    }

    sfree(att);

    *rlist = max(ir->rvdw, ir->rcoulomb) + ib1*resolution;
}
Пример #8
0
static real ener_drift(const verletbuf_atomtype_t *att, int natt,
                       const gmx_ffparams_t *ffp,
                       real kT_fac,
                       real md1_ljd, real d2_ljd, real md3_ljd,
                       real md1_ljr, real d2_ljr, real md3_ljr,
                       real md1_el,  real d2_el,
                       real r_buffer,
                       real rlist, real boxvol)
{
    /* Erfc(8)=1e-29, use this limit so we have some space for arithmetic
     * on the result when using float precision.
     */
    const real erfc_arg_max = 8.0;

    double     drift_tot, pot1, pot2, pot3, pot;
    int        i, j;
    real       s2i_2d, s2i_3d, s2j_2d, s2j_3d, s2, s;
    int        ti, tj;
    real       md1, d2, md3;
    real       sc_fac, rsh, rsh2;
    double     c_exp, c_erfc;

    drift_tot = 0;

    /* Loop over the different atom type pairs */
    for (i = 0; i < natt; i++)
    {
        get_atom_sigma2(kT_fac, &att[i].prop, &s2i_2d, &s2i_3d);
        ti = att[i].prop.type;

        for (j = i; j < natt; j++)
        {
            get_atom_sigma2(kT_fac, &att[j].prop, &s2j_2d, &s2j_3d);
            tj = att[j].prop.type;

            /* Add up the up to four independent variances */
            s2 = s2i_2d + s2i_3d + s2j_2d + s2j_3d;

            /* Note that attractive and repulsive potentials for individual
             * pairs will partially cancel.
             */
            /* -dV/dr at the cut-off for LJ + Coulomb */
            md1 =
                md1_ljd*ffp->iparams[ti*ffp->atnr+tj].lj.c6 +
                md1_ljr*ffp->iparams[ti*ffp->atnr+tj].lj.c12 +
                md1_el*att[i].prop.q*att[j].prop.q;

            /* d2V/dr2 at the cut-off for LJ + Coulomb */
            d2 =
                d2_ljd*ffp->iparams[ti*ffp->atnr+tj].lj.c6 +
                d2_ljr*ffp->iparams[ti*ffp->atnr+tj].lj.c12 +
                d2_el*att[i].prop.q*att[j].prop.q;

            /* -d3V/dr3 at the cut-off for LJ, we neglect Coulomb */
            md3 =
                md3_ljd*ffp->iparams[ti*ffp->atnr+tj].lj.c6 +
                md3_ljr*ffp->iparams[ti*ffp->atnr+tj].lj.c12;

            rsh    = r_buffer;
            sc_fac = 1.0;

            if (rsh*rsh > 2*s2*erfc_arg_max*erfc_arg_max)
            {
                /* Erfc might run out of float and become 0, somewhat before
                 * c_exp becomes 0. To avoid this and to avoid NaN in
                 * approx_2dof, we set both c_expc and c_erfc to zero.
                 * In any relevant case this has no effect on the results,
                 * since c_exp < 6e-29, so the displacement is completely
                 * negligible for such atom pairs (and an overestimate).
                 * In nearly all use cases, there will be other atom
                 * pairs that contribute much more to the total, so zeroing
                 * this particular contribution has no effect at all.
                 */
                c_exp  = 0;
                c_erfc = 0;
            }
            else
            {
                /* For constraints: adapt r and scaling for the Gaussian */
                if (att[i].prop.bConstr)
                {
                    real sh, sc;

                    approx_2dof(s2i_2d, r_buffer*s2i_2d/s2, &sh, &sc);
                    rsh    += sh;
                    sc_fac *= sc;
                }
                if (att[j].prop.bConstr)
                {
                    real sh, sc;

                    approx_2dof(s2j_2d, r_buffer*s2j_2d/s2, &sh, &sc);
                    rsh    += sh;
                    sc_fac *= sc;
                }

                /* Exact contribution of an atom pair with Gaussian displacement
                 * with sigma s to the energy drift for a potential with
                 * derivative -md and second derivative dd at the cut-off.
                 * The only catch is that for potentials that change sign
                 * near the cut-off there could be an unlucky compensation
                 * of positive and negative energy drift.
                 * Such potentials are extremely rare though.
                 *
                 * Note that pot has unit energy*length, as the linear
                 * atom density still needs to be put in.
                 */
                c_exp  = exp(-rsh*rsh/(2*s2))/sqrt(2*M_PI);
                c_erfc = 0.5*gmx_erfc(rsh/(sqrt(2*s2)));
            }
            s      = sqrt(s2);
            rsh2   = rsh*rsh;

            pot1 = sc_fac*
                md1/2*((rsh2 + s2)*c_erfc - rsh*s*c_exp);
            pot2 = sc_fac*
                d2/6*(s*(rsh2 + 2*s2)*c_exp - rsh*(rsh2 + 3*s2)*c_erfc);
            pot3 = sc_fac*
                md3/24*((rsh2*rsh2 + 6*rsh2*s2 + 3*s2*s2)*c_erfc - rsh*s*(rsh2 + 5*s2)*c_exp);
            pot = pot1 + pot2 + pot3;

            if (gmx_debug_at)
            {
                fprintf(debug, "n %d %d d s %.3f %.3f %.3f %.3f con %d -d1 %8.1e d2 %8.1e -d3 %8.1e pot1 %8.1e pot2 %8.1e pot3 %8.1e pot %8.1e\n",
                        att[i].n, att[j].n,
                        sqrt(s2i_2d), sqrt(s2i_3d),
                        sqrt(s2j_2d), sqrt(s2j_3d),
                        att[i].prop.bConstr+att[j].prop.bConstr,
                        md1, d2, md3,
                        pot1, pot2, pot3, pot);
            }

            /* Multiply by the number of atom pairs */
            if (j == i)
            {
                pot *= (double)att[i].n*(att[i].n - 1)/2;
            }
            else
            {
                pot *= (double)att[i].n*att[j].n;
            }
            /* We need the line density to get the energy drift of the system.
             * The effective average r^2 is close to (rlist+sigma)^2.
             */
            pot *= 4*M_PI*sqr(rlist + s)/boxvol;

            /* Add the unsigned drift to avoid cancellation of errors */
            drift_tot += fabs(pot);
        }
    }

    return drift_tot;
}
Пример #9
0
gmx_bool pme_load_balance(pme_load_balancing_t       pme_lb,
                          t_commrec                 *cr,
                          FILE                      *fp_err,
                          FILE                      *fp_log,
                          t_inputrec                *ir,
                          t_state                   *state,
                          double                     cycles,
                          interaction_const_t       *ic,
                          struct nonbonded_verlet_t *nbv,
                          struct gmx_pme_t **        pmedata,
                          gmx_int64_t                step)
{
    gmx_bool     OK;
    pme_setup_t *set;
    double       cycles_fast;
    char         buf[STRLEN], sbuf[22];
    real         rtab;
    gmx_bool     bUsesSimpleTables = TRUE;

    if (pme_lb->stage == pme_lb->nstage)
    {
        return FALSE;
    }

    if (PAR(cr))
    {
        gmx_sumd(1, &cycles, cr);
        cycles /= cr->nnodes;
    }

    set = &pme_lb->setup[pme_lb->cur];
    set->count++;

    rtab = ir->rlistlong + ir->tabext;

    if (set->count % 2 == 1)
    {
        /* Skip the first cycle, because the first step after a switch
         * is much slower due to allocation and/or caching effects.
         */
        return TRUE;
    }

    sprintf(buf, "step %4s: ", gmx_step_str(step, sbuf));
    print_grid(fp_err, fp_log, buf, "timed with", set, cycles);

    if (set->count <= 2)
    {
        set->cycles = cycles;
    }
    else
    {
        if (cycles*PME_LB_ACCEL_TOL < set->cycles &&
            pme_lb->stage == pme_lb->nstage - 1)
        {
            /* The performance went up a lot (due to e.g. DD load balancing).
             * Add a stage, keep the minima, but rescan all setups.
             */
            pme_lb->nstage++;

            if (debug)
            {
                fprintf(debug, "The performance for grid %d %d %d went from %.3f to %.1f M-cycles, this is more than %f\n"
                        "Increased the number stages to %d"
                        " and ignoring the previous performance\n",
                        set->grid[XX], set->grid[YY], set->grid[ZZ],
                        cycles*1e-6, set->cycles*1e-6, PME_LB_ACCEL_TOL,
                        pme_lb->nstage);
            }
        }
        set->cycles = min(set->cycles, cycles);
    }

    if (set->cycles < pme_lb->setup[pme_lb->fastest].cycles)
    {
        pme_lb->fastest = pme_lb->cur;

        if (DOMAINDECOMP(cr))
        {
            /* We found a new fastest setting, ensure that with subsequent
             * shorter cut-off's the dynamic load balancing does not make
             * the use of the current cut-off impossible. This solution is
             * a trade-off, as the PME load balancing and DD domain size
             * load balancing can interact in complex ways.
             * With the Verlet kernels, DD load imbalance will usually be
             * mainly due to bonded interaction imbalance, which will often
             * quickly push the domain boundaries beyond the limit for the
             * optimal, PME load balanced, cut-off. But it could be that
             * better overal performance can be obtained with a slightly
             * shorter cut-off and better DD load balancing.
             */
            change_dd_dlb_cutoff_limit(cr);
        }
    }
    cycles_fast = pme_lb->setup[pme_lb->fastest].cycles;

    /* Check in stage 0 if we should stop scanning grids.
     * Stop when the time is more than SLOW_FAC longer than the fastest.
     */
    if (pme_lb->stage == 0 && pme_lb->cur > 0 &&
        cycles > pme_lb->setup[pme_lb->fastest].cycles*PME_LB_SLOW_FAC)
    {
        pme_lb->n = pme_lb->cur + 1;
        /* Done with scanning, go to stage 1 */
        switch_to_stage1(pme_lb);
    }

    if (pme_lb->stage == 0)
    {
        int gridsize_start;

        gridsize_start = set->grid[XX]*set->grid[YY]*set->grid[ZZ];

        do
        {
            if (pme_lb->cur+1 < pme_lb->n)
            {
                /* We had already generated the next setup */
                OK = TRUE;
            }
            else
            {
                /* Find the next setup */
                OK = pme_loadbal_increase_cutoff(pme_lb, ir->pme_order, cr->dd);

                if (!OK)
                {
                    pme_lb->elimited = epmelblimPMEGRID;
                }
            }

            if (OK && ir->ePBC != epbcNONE)
            {
                OK = (sqr(pme_lb->setup[pme_lb->cur+1].rlistlong)
                      <= max_cutoff2(ir->ePBC, state->box));
                if (!OK)
                {
                    pme_lb->elimited = epmelblimBOX;
                }
            }

            if (OK)
            {
                pme_lb->cur++;

                if (DOMAINDECOMP(cr))
                {
                    OK = change_dd_cutoff(cr, state, ir,
                                          pme_lb->setup[pme_lb->cur].rlistlong);
                    if (!OK)
                    {
                        /* Failed: do not use this setup */
                        pme_lb->cur--;
                        pme_lb->elimited = epmelblimDD;
                    }
                }
            }
            if (!OK)
            {
                /* We hit the upper limit for the cut-off,
                 * the setup should not go further than cur.
                 */
                pme_lb->n = pme_lb->cur + 1;
                print_loadbal_limited(fp_err, fp_log, step, pme_lb);
                /* Switch to the next stage */
                switch_to_stage1(pme_lb);
            }
        }
        while (OK &&
               !(pme_lb->setup[pme_lb->cur].grid[XX]*
                 pme_lb->setup[pme_lb->cur].grid[YY]*
                 pme_lb->setup[pme_lb->cur].grid[ZZ] <
                 gridsize_start*PME_LB_GRID_SCALE_FAC
                 &&
                 pme_lb->setup[pme_lb->cur].grid_efficiency <
                 pme_lb->setup[pme_lb->cur-1].grid_efficiency*PME_LB_GRID_EFFICIENCY_REL_FAC));
    }

    if (pme_lb->stage > 0 && pme_lb->end == 1)
    {
        pme_lb->cur   = 0;
        pme_lb->stage = pme_lb->nstage;
    }
    else if (pme_lb->stage > 0 && pme_lb->end > 1)
    {
        /* If stage = nstage-1:
         *   scan over all setups, rerunning only those setups
         *   which are not much slower than the fastest
         * else:
         *   use the next setup
         */
        do
        {
            pme_lb->cur++;
            if (pme_lb->cur == pme_lb->end)
            {
                pme_lb->stage++;
                pme_lb->cur = pme_lb->start;
            }
        }
        while (pme_lb->stage == pme_lb->nstage - 1 &&
               pme_lb->setup[pme_lb->cur].count > 0 &&
               pme_lb->setup[pme_lb->cur].cycles > cycles_fast*PME_LB_SLOW_FAC);

        if (pme_lb->stage == pme_lb->nstage)
        {
            /* We are done optimizing, use the fastest setup we found */
            pme_lb->cur = pme_lb->fastest;
        }
    }

    if (DOMAINDECOMP(cr) && pme_lb->stage > 0)
    {
        OK = change_dd_cutoff(cr, state, ir, pme_lb->setup[pme_lb->cur].rlistlong);
        if (!OK)
        {
            /* Failsafe solution */
            if (pme_lb->cur > 1 && pme_lb->stage == pme_lb->nstage)
            {
                pme_lb->stage--;
            }
            pme_lb->fastest  = 0;
            pme_lb->start    = 0;
            pme_lb->end      = pme_lb->cur;
            pme_lb->cur      = pme_lb->start;
            pme_lb->elimited = epmelblimDD;
            print_loadbal_limited(fp_err, fp_log, step, pme_lb);
        }
    }

    /* Change the Coulomb cut-off and the PME grid */

    set = &pme_lb->setup[pme_lb->cur];

    ic->rcoulomb     = set->rcut_coulomb;
    ic->rlist        = set->rlist;
    ic->rlistlong    = set->rlistlong;
    ir->nstcalclr    = set->nstcalclr;
    ic->ewaldcoeff_q = set->ewaldcoeff_q;
    /* TODO: centralize the code that sets the potentials shifts */
    if (ic->coulomb_modifier == eintmodPOTSHIFT)
    {
        ic->sh_ewald = gmx_erfc(ic->ewaldcoeff_q*ic->rcoulomb);
    }
    if (EVDW_PME(ic->vdwtype))
    {
        /* We have PME for both Coulomb and VdW, set rvdw equal to rcoulomb */
        ic->rvdw            = set->rcut_coulomb;
        ic->ewaldcoeff_lj   = set->ewaldcoeff_lj;
        if (ic->vdw_modifier == eintmodPOTSHIFT)
        {
            real crc2;

            ic->dispersion_shift.cpot = -pow(ic->rvdw, -6.0);
            ic->repulsion_shift.cpot  = -pow(ic->rvdw, -12.0);
            ic->sh_invrc6             = -ic->dispersion_shift.cpot;
            crc2                      = sqr(ic->ewaldcoeff_lj*ic->rvdw);
            ic->sh_lj_ewald           = (exp(-crc2)*(1 + crc2 + 0.5*crc2*crc2) - 1)*pow(ic->rvdw, -6.0);
        }
    }

    bUsesSimpleTables = uses_simple_tables(ir->cutoff_scheme, nbv, 0);
    nbnxn_gpu_pme_loadbal_update_param(nbv, ic);

    /* With tMPI + GPUs some ranks may be sharing GPU(s) and therefore
     * also sharing texture references. To keep the code simple, we don't
     * treat texture references as shared resources, but this means that
     * the coulomb_tab texture ref will get updated by multiple threads.
     * Hence, to ensure that the non-bonded kernels don't start before all
     * texture binding operations are finished, we need to wait for all ranks
     * to arrive here before continuing.
     *
     * Note that we could omit this barrier if GPUs are not shared (or
     * texture objects are used), but as this is initialization code, there
     * is not point in complicating things.
     */
#ifdef GMX_THREAD_MPI
    if (PAR(cr) && use_GPU(nbv))
    {
        gmx_barrier(cr);
    }
#endif  /* GMX_THREAD_MPI */

    /* Usually we won't need the simple tables with GPUs.
     * But we do with hybrid acceleration and with free energy.
     * To avoid bugs, we always re-initialize the simple tables here.
     */
    init_interaction_const_tables(NULL, ic, bUsesSimpleTables, rtab);

    if (cr->duty & DUTY_PME)
    {
        if (pme_lb->setup[pme_lb->cur].pmedata == NULL)
        {
            /* Generate a new PME data structure,
             * copying part of the old pointers.
             */
            gmx_pme_reinit(&set->pmedata,
                           cr, pme_lb->setup[0].pmedata, ir,
                           set->grid);
        }
        *pmedata = set->pmedata;
    }
    else
    {
        /* Tell our PME-only node to switch grid */
        gmx_pme_send_switchgrid(cr, set->grid, set->ewaldcoeff_q, set->ewaldcoeff_lj);
    }

    if (debug)
    {
        print_grid(NULL, debug, "", "switched to", set, -1);
    }

    if (pme_lb->stage == pme_lb->nstage)
    {
        print_grid(fp_err, fp_log, "", "optimal", set, -1);
    }

    return TRUE;
}
Пример #10
0
static void fill_table(t_tabledata *td,int tp,const t_forcerec *fr)
{
  /* Fill the table according to the formulas in the manual.
   * In principle, we only need the potential and the second
   * derivative, but then we would have to do lots of calculations
   * in the inner loop. By precalculating some terms (see manual)
   * we get better eventual performance, despite a larger table.
   *
   * Since some of these higher-order terms are very small,
   * we always use double precision to calculate them here, in order
   * to avoid unnecessary loss of precision.
   */
#ifdef DEBUG_SWITCH
  FILE *fp;
#endif
  int  i;
  double reppow,p;
  double r1,rc,r12,r13;
  double r,r2,r6,rc6;
  double expr,Vtab,Ftab;
  /* Parameters for David's function */
  double A=0,B=0,C=0,A_3=0,B_4=0;
  /* Parameters for the switching function */
  double ksw,swi,swi1;
  /* Temporary parameters */
  gmx_bool bSwitch,bShift;
  double ewc=fr->ewaldcoeff;
  double isp= 0.564189583547756;
   
  bSwitch = ((tp == etabLJ6Switch) || (tp == etabLJ12Switch) || 
	     (tp == etabCOULSwitch) ||
	     (tp == etabEwaldSwitch) || (tp == etabEwaldUserSwitch));
  bShift  = ((tp == etabLJ6Shift) || (tp == etabLJ12Shift) || 
	     (tp == etabShift));

  reppow = fr->reppow;

  if (tprops[tp].bCoulomb) {
    r1 = fr->rcoulomb_switch;
    rc = fr->rcoulomb;
  } 
  else {
    r1 = fr->rvdw_switch;
    rc = fr->rvdw;
  }
  if (bSwitch)
    ksw  = 1.0/(pow5(rc-r1));
  else
    ksw  = 0.0;
  if (bShift) {
    if (tp == etabShift)
      p = 1;
    else if (tp == etabLJ6Shift) 
      p = 6; 
    else 
      p = reppow;
    
    A = p * ((p+1)*r1-(p+4)*rc)/(pow(rc,p+2)*pow2(rc-r1));
    B = -p * ((p+1)*r1-(p+3)*rc)/(pow(rc,p+2)*pow3(rc-r1));
    C = 1.0/pow(rc,p)-A/3.0*pow3(rc-r1)-B/4.0*pow4(rc-r1);
    if (tp == etabLJ6Shift) {
      A=-A;
      B=-B;
      C=-C;
    }
    A_3=A/3.0;
    B_4=B/4.0;
  }
  if (debug) { fprintf(debug,"Setting up tables\n"); fflush(debug); }
    
#ifdef DEBUG_SWITCH
  fp=xvgropen("switch.xvg","switch","r","s");
#endif
  
  for(i=td->nx0; (i<td->nx); i++) {
    r     = td->x[i];
    r2    = r*r;
    r6    = 1.0/(r2*r2*r2);
    if (gmx_within_tol(reppow,12.0,10*GMX_DOUBLE_EPS)) {
      r12 = r6*r6;
    } else {
      r12 = pow(r,-reppow);   
    }
    Vtab  = 0.0;
    Ftab  = 0.0;
    if (bSwitch) {
      /* swi is function, swi1 1st derivative and swi2 2nd derivative */
      /* The switch function is 1 for r<r1, 0 for r>rc, and smooth for
       * r1<=r<=rc. The 1st and 2nd derivatives are both zero at
       * r1 and rc.
       * ksw is just the constant 1/(rc-r1)^5, to save some calculations...
       */ 
      if(r<=r1) {
	swi  = 1.0;
	swi1 = 0.0;
      } else if (r>=rc) {
	swi  = 0.0;
	swi1 = 0.0;
      } else {
	swi      = 1 - 10*pow3(r-r1)*ksw*pow2(rc-r1) 
	  + 15*pow4(r-r1)*ksw*(rc-r1) - 6*pow5(r-r1)*ksw;
	swi1     = -30*pow2(r-r1)*ksw*pow2(rc-r1) 
	  + 60*pow3(r-r1)*ksw*(rc-r1) - 30*pow4(r-r1)*ksw;
      }
    }
    else { /* not really needed, but avoids compiler warnings... */
      swi  = 1.0;
      swi1 = 0.0;
    }
#ifdef DEBUG_SWITCH
    fprintf(fp,"%10g  %10g  %10g  %10g\n",r,swi,swi1,swi2);
#endif

    rc6 = rc*rc*rc;
    rc6 = 1.0/(rc6*rc6);

    switch (tp) {
    case etabLJ6:
      /* Dispersion */
      Vtab  = -r6;
      Ftab  = 6.0*Vtab/r;
      break;
    case etabLJ6Switch:
    case etabLJ6Shift:
      /* Dispersion */
      if (r < rc) {      
	Vtab  = -r6;
	Ftab  = 6.0*Vtab/r;
      }
      break;
    case etabLJ12:
      /* Repulsion */
      Vtab  = r12;
      Ftab  = reppow*Vtab/r;
      break;
    case etabLJ12Switch:
    case etabLJ12Shift:
      /* Repulsion */
      if (r < rc) {                
	Vtab  = r12;
	Ftab  = reppow*Vtab/r;
      }  
      break;
	case etabLJ6Encad:
        if(r < rc) {
            Vtab  = -(r6-6.0*(rc-r)*rc6/rc-rc6);
            Ftab  = -(6.0*r6/r-6.0*rc6/rc);
        } else { /* r>rc */ 
            Vtab  = 0;
            Ftab  = 0;
        } 
        break;
    case etabLJ12Encad:
        if(r < rc) {
            Vtab  = r12-12.0*(rc-r)*rc6*rc6/rc-1.0*rc6*rc6;
            Ftab  = 12.0*r12/r-12.0*rc6*rc6/rc;
        } else { /* r>rc */ 
            Vtab  = 0;
            Ftab  = 0;
        } 
        break;        
    case etabCOUL:
      Vtab  = 1.0/r;
      Ftab  = 1.0/r2;
      break;
    case etabCOULSwitch:
    case etabShift:
      if (r < rc) { 
	Vtab  = 1.0/r;
	Ftab  = 1.0/r2;
      }
      break;
    case etabEwald:
    case etabEwaldSwitch:
      Vtab  = gmx_erfc(ewc*r)/r;
      Ftab  = gmx_erfc(ewc*r)/r2+2*exp(-(ewc*ewc*r2))*ewc*isp/r;
      break;
    case etabEwaldUser:
    case etabEwaldUserSwitch:
      /* Only calculate minus the reciprocal space contribution */
      Vtab  = -gmx_erf(ewc*r)/r;
      Ftab  = -gmx_erf(ewc*r)/r2+2*exp(-(ewc*ewc*r2))*ewc*isp/r;
      break;
    case etabRF:
    case etabRF_ZERO:
      Vtab  = 1.0/r      +   fr->k_rf*r2 - fr->c_rf;
      Ftab  = 1.0/r2     - 2*fr->k_rf*r;
      if (tp == etabRF_ZERO && r >= rc) {
	Vtab = 0;
	Ftab = 0;
      }
      break;
    case etabEXPMIN:
      expr  = exp(-r);
      Vtab  = expr;
      Ftab  = expr;
      break;
    case etabCOULEncad:
        if(r < rc) {
            Vtab  = 1.0/r-(rc-r)/(rc*rc)-1.0/rc;
            Ftab  = 1.0/r2-1.0/(rc*rc);
        } else { /* r>rc */ 
            Vtab  = 0;
            Ftab  = 0;
        } 
        break;
    default:
      gmx_fatal(FARGS,"Table type %d not implemented yet. (%s,%d)",
		  tp,__FILE__,__LINE__);
    }
    if (bShift) {
      /* Normal coulomb with cut-off correction for potential */
      if (r < rc) {
	Vtab -= C;
	/* If in Shifting range add something to it */
	if (r > r1) {
	  r12 = (r-r1)*(r-r1);
	  r13 = (r-r1)*r12;
	  Vtab  += - A_3*r13 - B_4*r12*r12;
	  Ftab  +=   A*r12 + B*r13;
	}
      }
    }

    if (ETAB_USER(tp)) {
      Vtab += td->v[i];
      Ftab += td->f[i];
    }

    if ((r > r1) && bSwitch) {
      Ftab = Ftab*swi - Vtab*swi1;
      Vtab = Vtab*swi;
    }  
    
    /* Convert to single precision when we store to mem */
    td->v[i]  = Vtab;
    td->f[i]  = Ftab;
  }

  /* Continue the table linearly from nx0 to 0.
   * These values are only required for energy minimization with overlap or TPI.
   */
  for(i=td->nx0-1; i>=0; i--) {
    td->v[i] = td->v[i+1] + td->f[i+1]*(td->x[i+1] - td->x[i]);
    td->f[i] = td->f[i+1];
  }

#ifdef DEBUG_SWITCH
  gmx_fio_fclose(fp);
#endif
}
Пример #11
0
static real ener_drift(const verletbuf_atomtype_t *att, int natt,
                       const gmx_ffparams_t *ffp,
                       real kT_fac,
                       real md_ljd, real md_ljr, real md_el, real dd_el,
                       real r_buffer,
                       real rlist, real boxvol)
{
    double drift_tot, pot1, pot2, pot;
    int    i, j;
    real   s2i, s2j, s2, s;
    int    ti, tj;
    real   md, dd;
    real   sc_fac, rsh;
    double c_exp, c_erfc;

    drift_tot = 0;

    /* Loop over the different atom type pairs */
    for (i = 0; i < natt; i++)
    {
        s2i = kT_fac/att[i].mass;
        ti  = att[i].type;

        for (j = i; j < natt; j++)
        {
            s2j = kT_fac/att[j].mass;
            tj  = att[j].type;

            /* Note that attractive and repulsive potentials for individual
             * pairs will partially cancel.
             */
            /* -dV/dr at the cut-off for LJ + Coulomb */
            md =
                md_ljd*ffp->iparams[ti*ffp->atnr+tj].lj.c6 +
                md_ljr*ffp->iparams[ti*ffp->atnr+tj].lj.c12 +
                md_el*att[i].q*att[j].q;

            /* d2V/dr2 at the cut-off for Coulomb, we neglect LJ */
            dd = dd_el*att[i].q*att[j].q;

            s2  = s2i + s2j;

            rsh    = r_buffer;
            sc_fac = 1.0;
            /* For constraints: adapt r and scaling for the Gaussian */
            if (att[i].con)
            {
                real sh, sc;
                approx_2dof(s2i, r_buffer*s2i/s2, &sh, &sc);
                rsh    += sh;
                sc_fac *= sc;
            }
            if (att[j].con)
            {
                real sh, sc;
                approx_2dof(s2j, r_buffer*s2j/s2, &sh, &sc);
                rsh    += sh;
                sc_fac *= sc;
            }

            /* Exact contribution of an atom pair with Gaussian displacement
             * with sigma s to the energy drift for a potential with
             * derivative -md and second derivative dd at the cut-off.
             * The only catch is that for potentials that change sign
             * near the cut-off there could be an unlucky compensation
             * of positive and negative energy drift.
             * Such potentials are extremely rare though.
             *
             * Note that pot has unit energy*length, as the linear
             * atom density still needs to be put in.
             */
            c_exp  = exp(-rsh*rsh/(2*s2))/sqrt(2*M_PI);
            c_erfc = 0.5*gmx_erfc(rsh/(sqrt(2*s2)));
            s      = sqrt(s2);

            pot1 = sc_fac*
                md/2*((rsh*rsh + s2)*c_erfc - rsh*s*c_exp);
            pot2 = sc_fac*
                dd/6*(s*(rsh*rsh + 2*s2)*c_exp - rsh*(rsh*rsh + 3*s2)*c_erfc);
            pot = pot1 + pot2;

            if (gmx_debug_at)
            {
                fprintf(debug, "n %d %d d s %.3f %.3f con %d md %8.1e dd %8.1e pot1 %8.1e pot2 %8.1e pot %8.1e\n",
                        att[i].n, att[j].n, sqrt(s2i), sqrt(s2j),
                        att[i].con+att[j].con,
                        md, dd, pot1, pot2, pot);
            }

            /* Multiply by the number of atom pairs */
            if (j == i)
            {
                pot *= (double)att[i].n*(att[i].n - 1)/2;
            }
            else
            {
                pot *= (double)att[i].n*att[j].n;
            }
            /* We need the line density to get the energy drift of the system.
             * The effective average r^2 is close to (rlist+sigma)^2.
             */
            pot *= 4*M_PI*sqr(rlist + s)/boxvol;

            /* Add the unsigned drift to avoid cancellation of errors */
            drift_tot += fabs(pot);
        }
    }

    return drift_tot;
}