Beispiel #1
0
void gmx_pme_send_parameters(t_commrec *cr,
                             const interaction_const_t *ic,
                             gmx_bool bFreeEnergy_q, gmx_bool bFreeEnergy_lj,
                             real *chargeA, real *chargeB,
                             real *sqrt_c6A, real *sqrt_c6B,
                             real *sigmaA, real *sigmaB,
                             int maxshift_x, int maxshift_y)
{
    int flags;

    flags = 0;
    if (EEL_PME(ic->eeltype))
    {
        flags |= PP_PME_CHARGE;
    }
    if (EVDW_PME(ic->vdwtype))
    {
        flags |= (PP_PME_SQRTC6 | PP_PME_SIGMA);
    }
    if (bFreeEnergy_q || bFreeEnergy_lj)
    {
        /* Assumes that the B state flags are in the bits just above
         * the ones for the A state. */
        flags |= (flags << 1);
    }

    gmx_pme_send_coeffs_coords(cr, flags,
                               chargeA, chargeB,
                               sqrt_c6A, sqrt_c6B, sigmaA, sigmaB,
                               NULL, NULL, 0, 0, maxshift_x, maxshift_y, -1);
}
/* Read in the tpr file and save information we need later in info */
static void read_tpr_file(const char *fn_sim_tpr, t_inputinfo *info, t_state *state, gmx_mtop_t *mtop, t_inputrec *ir, real user_beta, real fracself)
{
    read_tpx_state(fn_sim_tpr,ir,state,NULL,mtop);

    /* The values of the original tpr input file are save in the first 
     * place [0] of the arrays */
    info->orig_sim_steps = ir->nsteps;
    info->pme_order[0]   = ir->pme_order;
    info->rcoulomb[0]    = ir->rcoulomb;
    info->rvdw[0]        = ir->rvdw;        
    info->nkx[0]         = ir->nkx;
    info->nky[0]         = ir->nky;
    info->nkz[0]         = ir->nkz;
    info->ewald_rtol[0]  = ir->ewald_rtol;
    info->fracself       = fracself;
    if (user_beta > 0)
        info->ewald_beta[0] = user_beta;
    else
        info->ewald_beta[0]  = calc_ewaldcoeff(info->rcoulomb[0],info->ewald_rtol[0]);

    /* Check if PME was chosen */
    if (EEL_PME(ir->coulombtype) == FALSE)
        gmx_fatal(FARGS, "Can only do optimizations for simulations with PME");
    
    /* Check if rcoulomb == rlist, which is necessary for PME */
    if (!(ir->rcoulomb == ir->rlist))
        gmx_fatal(FARGS, "PME requires rcoulomb (%f) to be equal to rlist (%f).", ir->rcoulomb, ir->rlist);
}
Beispiel #3
0
void calc_verlet_buffer_size(const gmx_mtop_t *mtop, real boxvol,
                             const t_inputrec *ir,
                             real reference_temperature,
                             const verletbuf_list_setup_t *list_setup,
                             int *n_nonlin_vsite,
                             real *rlist)
{
    double                resolution;
    char                 *env;

    real                  particle_distance;
    real                  nb_clust_frac_pairs_not_in_list_at_cutoff;

    verletbuf_atomtype_t *att  = NULL;
    int                   natt = -1, i;
    double                reppow;
    real                  md1_ljd, d2_ljd, md3_ljd;
    real                  md1_ljr, d2_ljr, md3_ljr;
    real                  md1_el,  d2_el;
    real                  elfac;
    real                  kT_fac, mass_min;
    int                   ib0, ib1, ib;
    real                  rb, rl;
    real                  drift;

    if (reference_temperature < 0)
    {
        if (EI_MD(ir->eI) && ir->etc == etcNO)
        {
            /* This case should be handled outside calc_verlet_buffer_size */
            gmx_incons("calc_verlet_buffer_size called with an NVE ensemble and reference_temperature < 0");
        }

        /* We use the maximum temperature with multiple T-coupl groups.
         * We could use a per particle temperature, but since particles
         * interact, this might underestimate the buffer size.
         */
        reference_temperature = 0;
        for (i = 0; i < ir->opts.ngtc; i++)
        {
            if (ir->opts.tau_t[i] >= 0)
            {
                reference_temperature = max(reference_temperature,
                                            ir->opts.ref_t[i]);
            }
        }
    }

    /* Resolution of the buffer size */
    resolution = 0.001;

    env = getenv("GMX_VERLET_BUFFER_RES");
    if (env != NULL)
    {
        sscanf(env, "%lf", &resolution);
    }

    /* In an atom wise pair-list there would be no pairs in the list
     * beyond the pair-list cut-off.
     * However, we use a pair-list of groups vs groups of atoms.
     * For groups of 4 atoms, the parallelism of SSE instructions, only
     * 10% of the atoms pairs are not in the list just beyond the cut-off.
     * As this percentage increases slowly compared to the decrease of the
     * Gaussian displacement distribution over this range, we can simply
     * reduce the drift by this fraction.
     * For larger groups, e.g. of 8 atoms, this fraction will be lower,
     * so then buffer size will be on the conservative (large) side.
     *
     * Note that the formulas used here do not take into account
     * cancellation of errors which could occur by missing both
     * attractive and repulsive interactions.
     *
     * The only major assumption is homogeneous particle distribution.
     * For an inhomogeneous system, such as a liquid-vapor system,
     * the buffer will be underestimated. The actual energy drift
     * will be higher by the factor: local/homogeneous particle density.
     *
     * The results of this estimate have been checked againt simulations.
     * In most cases the real drift differs by less than a factor 2.
     */

    /* Worst case assumption: HCP packing of particles gives largest distance */
    particle_distance = pow(boxvol*sqrt(2)/mtop->natoms, 1.0/3.0);

    get_verlet_buffer_atomtypes(mtop, &att, &natt, n_nonlin_vsite);
    assert(att != NULL && natt >= 0);

    if (debug)
    {
        fprintf(debug, "particle distance assuming HCP packing: %f nm\n",
                particle_distance);
        fprintf(debug, "energy drift atom types: %d\n", natt);
    }

    reppow   = mtop->ffparams.reppow;
    md1_ljd  = 0;
    d2_ljd   = 0;
    md3_ljd  = 0;
    md1_ljr  = 0;
    d2_ljr   = 0;
    md3_ljr  = 0;
    if (ir->vdwtype == evdwCUT)
    {
        real sw_range, md3_pswf;

        switch (ir->vdw_modifier)
        {
            case eintmodNONE:
            case eintmodPOTSHIFT:
                /* -dV/dr of -r^-6 and r^-reppow */
                md1_ljd =     -6*pow(ir->rvdw, -7.0);
                md1_ljr = reppow*pow(ir->rvdw, -(reppow+1));
                /* The contribution of the higher derivatives is negligible */
                break;
            case eintmodFORCESWITCH:
                /* At the cut-off: V=V'=V''=0, so we use only V''' */
                md3_ljd  = -md3_force_switch(6.0,    ir->rvdw_switch, ir->rvdw);
                md3_ljr  =  md3_force_switch(reppow, ir->rvdw_switch, ir->rvdw);
                break;
            case eintmodPOTSWITCH:
                /* At the cut-off: V=V'=V''=0.
                 * V''' is given by the original potential times
                 * the third derivative of the switch function.
                 */
                sw_range  = ir->rvdw - ir->rvdw_switch;
                md3_pswf  = 60.0*pow(sw_range, -3.0);

                md3_ljd   = -pow(ir->rvdw, -6.0   )*md3_pswf;
                md3_ljr   =  pow(ir->rvdw, -reppow)*md3_pswf;
                break;
            default:
                gmx_incons("Unimplemented VdW modifier");
        }
    }
    else if (EVDW_PME(ir->vdwtype))
    {
        real b, r, br, br2, br4, br6;
        b        = calc_ewaldcoeff_lj(ir->rvdw, ir->ewald_rtol_lj);
        r        = ir->rvdw;
        br       = b*r;
        br2      = br*br;
        br4      = br2*br2;
        br6      = br4*br2;
        /* -dV/dr of g(br)*r^-6 [where g(x) = exp(-x^2)(1+x^2+x^4/2), see LJ-PME equations in manual] and r^-reppow */
        md1_ljd  = -exp(-br2)*(br6 + 3.0*br4 + 6.0*br2 + 6.0)*pow(r, -7.0);
        md1_ljr  = reppow*pow(r, -(reppow+1));
        /* The contribution of the higher derivatives is negligible */
    }
    else
    {
        gmx_fatal(FARGS, "Energy drift calculation is only implemented for plain cut-off Lennard-Jones interactions");
    }

    elfac = ONE_4PI_EPS0/ir->epsilon_r;

    /* Determine md=-dV/dr and dd=d^2V/dr^2 */
    md1_el = 0;
    d2_el  = 0;
    if (ir->coulombtype == eelCUT || EEL_RF(ir->coulombtype))
    {
        real eps_rf, k_rf;

        if (ir->coulombtype == eelCUT)
        {
            eps_rf = 1;
            k_rf   = 0;
        }
        else
        {
            eps_rf = ir->epsilon_rf/ir->epsilon_r;
            if (eps_rf != 0)
            {
                k_rf = pow(ir->rcoulomb, -3.0)*(eps_rf - ir->epsilon_r)/(2*eps_rf + ir->epsilon_r);
            }
            else
            {
                /* epsilon_rf = infinity */
                k_rf = 0.5*pow(ir->rcoulomb, -3.0);
            }
        }

        if (eps_rf > 0)
        {
            md1_el = elfac*(pow(ir->rcoulomb, -2.0) - 2*k_rf*ir->rcoulomb);
        }
        d2_el      = elfac*(2*pow(ir->rcoulomb, -3.0) + 2*k_rf);
    }
    else if (EEL_PME(ir->coulombtype) || ir->coulombtype == eelEWALD)
    {
        real b, rc, br;

        b      = calc_ewaldcoeff_q(ir->rcoulomb, ir->ewald_rtol);
        rc     = ir->rcoulomb;
        br     = b*rc;
        md1_el = elfac*(b*exp(-br*br)*M_2_SQRTPI/rc + gmx_erfc(br)/(rc*rc));
        d2_el  = elfac/(rc*rc)*(2*b*(1 + br*br)*exp(-br*br)*M_2_SQRTPI + 2*gmx_erfc(br)/rc);
    }
    else
    {
        gmx_fatal(FARGS, "Energy drift calculation is only implemented for Reaction-Field and Ewald electrostatics");
    }

    /* Determine the variance of the atomic displacement
     * over nstlist-1 steps: kT_fac
     * For inertial dynamics (not Brownian dynamics) the mass factor
     * is not included in kT_fac, it is added later.
     */
    if (ir->eI == eiBD)
    {
        /* Get the displacement distribution from the random component only.
         * With accurate integration the systematic (force) displacement
         * should be negligible (unless nstlist is extremely large, which
         * you wouldn't do anyhow).
         */
        kT_fac = 2*BOLTZ*reference_temperature*(ir->nstlist-1)*ir->delta_t;
        if (ir->bd_fric > 0)
        {
            /* This is directly sigma^2 of the displacement */
            kT_fac /= ir->bd_fric;

            /* Set the masses to 1 as kT_fac is the full sigma^2,
             * but we divide by m in ener_drift().
             */
            for (i = 0; i < natt; i++)
            {
                att[i].prop.mass = 1;
            }
        }
        else
        {
            real tau_t;

            /* Per group tau_t is not implemented yet, use the maximum */
            tau_t = ir->opts.tau_t[0];
            for (i = 1; i < ir->opts.ngtc; i++)
            {
                tau_t = max(tau_t, ir->opts.tau_t[i]);
            }

            kT_fac *= tau_t;
            /* This kT_fac needs to be divided by the mass to get sigma^2 */
        }
    }
    else
    {
        kT_fac = BOLTZ*reference_temperature*sqr((ir->nstlist-1)*ir->delta_t);
    }

    mass_min = att[0].prop.mass;
    for (i = 1; i < natt; i++)
    {
        mass_min = min(mass_min, att[i].prop.mass);
    }

    if (debug)
    {
        fprintf(debug, "md1_ljd %9.2e d2_ljd %9.2e md3_ljd %9.2e\n", md1_ljd, d2_ljd, md3_ljd);
        fprintf(debug, "md1_ljr %9.2e d2_ljr %9.2e md3_ljr %9.2e\n", md1_ljr, d2_ljr, md3_ljr);
        fprintf(debug, "md1_el  %9.2e d2_el  %9.2e\n", md1_el, d2_el);
        fprintf(debug, "sqrt(kT_fac) %f\n", sqrt(kT_fac));
        fprintf(debug, "mass_min %f\n", mass_min);
    }

    /* Search using bisection */
    ib0 = -1;
    /* The drift will be neglible at 5 times the max sigma */
    ib1 = (int)(5*2*sqrt(kT_fac/mass_min)/resolution) + 1;
    while (ib1 - ib0 > 1)
    {
        ib = (ib0 + ib1)/2;
        rb = ib*resolution;
        rl = max(ir->rvdw, ir->rcoulomb) + rb;

        /* Calculate the average energy drift at the last step
         * of the nstlist steps at which the pair-list is used.
         */
        drift = ener_drift(att, natt, &mtop->ffparams,
                           kT_fac,
                           md1_ljd, d2_ljd, md3_ljd,
                           md1_ljr, d2_ljr, md3_ljr,
                           md1_el,  d2_el,
                           rb,
                           rl, boxvol);

        /* Correct for the fact that we are using a Ni x Nj particle pair list
         * and not a 1 x 1 particle pair list. This reduces the drift.
         */
        /* We don't have a formula for 8 (yet), use 4 which is conservative */
        nb_clust_frac_pairs_not_in_list_at_cutoff =
            surface_frac(min(list_setup->cluster_size_i, 4),
                         particle_distance, rl)*
            surface_frac(min(list_setup->cluster_size_j, 4),
                         particle_distance, rl);
        drift *= nb_clust_frac_pairs_not_in_list_at_cutoff;

        /* Convert the drift to drift per unit time per atom */
        drift /= ir->nstlist*ir->delta_t*mtop->natoms;

        if (debug)
        {
            fprintf(debug, "ib %3d %3d %3d rb %.3f %dx%d fac %.3f drift %.1e\n",
                    ib0, ib, ib1, rb,
                    list_setup->cluster_size_i, list_setup->cluster_size_j,
                    nb_clust_frac_pairs_not_in_list_at_cutoff,
                    drift);
        }

        if (fabs(drift) > ir->verletbuf_tol)
        {
            ib0 = ib;
        }
        else
        {
            ib1 = ib;
        }
    }

    sfree(att);

    *rlist = max(ir->rvdw, ir->rcoulomb) + ib1*resolution;
}
Beispiel #4
0
float pme_load_estimate(gmx_mtop_t *mtop, t_inputrec *ir, matrix box)
{
    t_atom        *atom;
    int            mb, nmol, atnr, cg, a, a0, nq_tot, nlj_tot, f;
    gmx_bool       bBHAM, bLJcut, bChargePerturbed, bTypePerturbed;
    gmx_bool       bWater, bQ, bLJ;
    double         ndistance_c, ndistance_simd;
    double         cost_bond, cost_pp, cost_redist, cost_spread, cost_fft, cost_solve, cost_pme;
    float          ratio;
    t_iparams     *iparams;
    gmx_moltype_t *molt;

    /* Computational cost of bonded, non-bonded and PME calculations.
     * This will be machine dependent.
     * The numbers here are accurate for Intel Core2 and AMD Athlon 64
     * in single precision. In double precision PME mesh is slightly cheaper,
     * although not so much that the numbers need to be adjusted.
     */

    iparams = mtop->ffparams.iparams;
    atnr    = mtop->ffparams.atnr;

    count_bonded_distances(mtop, ir, &ndistance_c, &ndistance_simd);
    /* C_BOND is the cost for bonded interactions with SIMD implementations,
     * so we need to scale the number of bonded interactions for which there
     * are only C implementations to the number of SIMD equivalents.
     */
    cost_bond = c_bond*(ndistance_c   *simd_cycle_factor(FALSE) +
                        ndistance_simd*simd_cycle_factor(bHaveSIMD));

    if (ir->cutoff_scheme == ecutsGROUP)
    {
        pp_group_load(mtop, ir, box,
                      &nq_tot, &nlj_tot, &cost_pp,
                      &bChargePerturbed, &bTypePerturbed);
    }
    else
    {
        pp_verlet_load(mtop, ir, box,
                       &nq_tot, &nlj_tot, &cost_pp,
                       &bChargePerturbed, &bTypePerturbed);
    }

    cost_redist = 0;
    cost_spread = 0;
    cost_fft    = 0;
    cost_solve  = 0;

    if (EEL_PME(ir->coulombtype))
    {
        double grid = ir->nkx*ir->nky*((ir->nkz + 1)/2);

        f            = ((ir->efep != efepNO && bChargePerturbed) ? 2 : 1);
        cost_redist +=   c_pme_redist*nq_tot;
        cost_spread += f*c_pme_spread*nq_tot*pow(ir->pme_order, 3);
        cost_fft    += f*c_pme_fft*grid*log(grid)/log(2);
        cost_solve  += f*c_pme_solve*grid*simd_cycle_factor(bHaveSIMD);
    }

    if (EVDW_PME(ir->vdwtype))
    {
        double grid = ir->nkx*ir->nky*((ir->nkz + 1)/2);

        f            = ((ir->efep != efepNO && bTypePerturbed) ? 2 : 1);
        if (ir->ljpme_combination_rule == eljpmeLB)
        {
            /* LB combination rule: we have 7 mesh terms */
            f       *= 7;
        }
        cost_redist +=   c_pme_redist*nlj_tot;
        cost_spread += f*c_pme_spread*nlj_tot*pow(ir->pme_order, 3);
        cost_fft    += f*c_pme_fft*2*grid*log(grid)/log(2);
        cost_solve  += f*c_pme_solve*grid*simd_cycle_factor(bHaveSIMD);
    }

    cost_pme = cost_redist + cost_spread + cost_fft + cost_solve;

    ratio = cost_pme/(cost_bond + cost_pp + cost_pme);

    if (debug)
    {
        fprintf(debug,
                "cost_bond   %f\n"
                "cost_pp     %f\n"
                "cost_redist %f\n"
                "cost_spread %f\n"
                "cost_fft    %f\n"
                "cost_solve  %f\n",
                cost_bond, cost_pp, cost_redist, cost_spread, cost_fft, cost_solve);

        fprintf(debug, "Estimate for relative PME load: %.3f\n", ratio);
    }

    return ratio;
}
Beispiel #5
0
real dd_choose_grid(FILE *fplog,
                    t_commrec *cr, gmx_domdec_t *dd, t_inputrec *ir,
                    gmx_mtop_t *mtop, matrix box, gmx_ddbox_t *ddbox,
                    gmx_bool bDynLoadBal, real dlb_scale,
                    real cellsize_limit, real cutoff_dd,
                    gmx_bool bInterCGBondeds)
{
    gmx_int64_t     nnodes_div, ldiv;
    real            limit;

    if (MASTER(cr))
    {
        nnodes_div = cr->nnodes;
        if (EEL_PME(ir->coulombtype))
        {
            if (cr->npmenodes > 0)
            {
                if (cr->npmenodes >= cr->nnodes)
                {
                    gmx_fatal(FARGS,
                              "Cannot have %d separate PME ranks with just %d total ranks",
                              cr->npmenodes, cr->nnodes);
                }

                /* If the user purposely selected the number of PME nodes,
                 * only check for large primes in the PP node count.
                 */
                nnodes_div -= cr->npmenodes;
            }
        }
        else
        {
            cr->npmenodes = 0;
        }

        if (nnodes_div > 12)
        {
            ldiv = largest_divisor(nnodes_div);
            /* Check if the largest divisor is more than nnodes^2/3 */
            if (ldiv*ldiv*ldiv > nnodes_div*nnodes_div)
            {
                gmx_fatal(FARGS, "The number of ranks you selected (%d) contains a large prime factor %d. In most cases this will lead to bad performance. Choose a number with smaller prime factors or set the decomposition (option -dd) manually.",
                          nnodes_div, ldiv);
            }
        }

        if (EEL_PME(ir->coulombtype))
        {
            if (cr->npmenodes < 0)
            {
                /* Use PME nodes when the number of nodes is more than 16 */
                if (cr->nnodes <= 18)
                {
                    cr->npmenodes = 0;
                    if (fplog)
                    {
                        fprintf(fplog, "Using %d separate PME ranks, as there are too few total\n ranks for efficient splitting\n", cr->npmenodes);
                    }
                }
                else
                {
                    cr->npmenodes = guess_npme(fplog, mtop, ir, box, cr->nnodes);
                    if (fplog)
                    {
                        fprintf(fplog, "Using %d separate PME ranks, as guessed by mdrun\n", cr->npmenodes);
                    }
                }
            }
            else
            {
                if (fplog)
                {
                    fprintf(fplog, "Using %d separate PME ranks, per user request\n", cr->npmenodes);
                }
            }
        }

        limit = optimize_ncells(fplog, cr->nnodes, cr->npmenodes,
                                bDynLoadBal, dlb_scale,
                                mtop, box, ddbox, ir, dd,
                                cellsize_limit, cutoff_dd,
                                bInterCGBondeds,
                                dd->nc);
    }
    else
    {
        limit = 0;
    }
    /* Communicate the information set by the master to all nodes */
    gmx_bcast(sizeof(dd->nc), dd->nc, cr);
    if (EEL_PME(ir->coulombtype))
    {
        gmx_bcast(sizeof(ir->nkx), &ir->nkx, cr);
        gmx_bcast(sizeof(ir->nky), &ir->nky, cr);
        gmx_bcast(sizeof(cr->npmenodes), &cr->npmenodes, cr);
    }
    else
    {
        cr->npmenodes = 0;
    }

    return limit;
}
Beispiel #6
0
/*! \brief Determine the optimal distribution of DD cells for the simulation system and number of MPI ranks */
static real optimize_ncells(FILE *fplog,
                            int nnodes_tot, int npme_only,
                            gmx_bool bDynLoadBal, real dlb_scale,
                            gmx_mtop_t *mtop, matrix box, gmx_ddbox_t *ddbox,
                            t_inputrec *ir,
                            gmx_domdec_t *dd,
                            real cellsize_limit, real cutoff,
                            gmx_bool bInterCGBondeds,
                            ivec nc)
{
    int      npp, npme, ndiv, *div, *mdiv, d, nmax;
    double   pbcdxr;
    real     limit;
    ivec     itry;

    limit  = cellsize_limit;

    dd->nc[XX] = 1;
    dd->nc[YY] = 1;
    dd->nc[ZZ] = 1;

    npp = nnodes_tot - npme_only;
    if (EEL_PME(ir->coulombtype))
    {
        npme = (npme_only > 0 ? npme_only : npp);
    }
    else
    {
        npme = 0;
    }

    if (bInterCGBondeds)
    {
        /* If we can skip PBC for distance calculations in plain-C bondeds,
         * we can save some time (e.g. 3D DD with pbc=xyz).
         * Here we ignore SIMD bondeds as they always do (fast) PBC.
         */
        count_bonded_distances(mtop, ir, &pbcdxr, NULL);
        pbcdxr /= (double)mtop->natoms;
    }
    else
    {
        /* Every molecule is a single charge group: no pbc required */
        pbcdxr = 0;
    }
    /* Add a margin for DLB and/or pressure scaling */
    if (bDynLoadBal)
    {
        if (dlb_scale >= 1.0)
        {
            gmx_fatal(FARGS, "The value for option -dds should be smaller than 1");
        }
        if (fplog)
        {
            fprintf(fplog, "Scaling the initial minimum size with 1/%g (option -dds) = %g\n", dlb_scale, 1/dlb_scale);
        }
        limit /= dlb_scale;
    }
    else if (ir->epc != epcNO)
    {
        if (fplog)
        {
            fprintf(fplog, "To account for pressure scaling, scaling the initial minimum size with %g\n", DD_GRID_MARGIN_PRES_SCALE);
            limit *= DD_GRID_MARGIN_PRES_SCALE;
        }
    }

    if (fplog)
    {
        fprintf(fplog, "Optimizing the DD grid for %d cells with a minimum initial size of %.3f nm\n", npp, limit);

        if (inhomogeneous_z(ir))
        {
            fprintf(fplog, "Ewald_geometry=%s: assuming inhomogeneous particle distribution in z, will not decompose in z.\n", eewg_names[ir->ewald_geometry]);
        }

        if (limit > 0)
        {
            fprintf(fplog, "The maximum allowed number of cells is:");
            for (d = 0; d < DIM; d++)
            {
                nmax = (int)(ddbox->box_size[d]*ddbox->skew_fac[d]/limit);
                if (d >= ddbox->npbcdim && nmax < 2)
                {
                    nmax = 2;
                }
                if (d == ZZ && inhomogeneous_z(ir))
                {
                    nmax = 1;
                }
                fprintf(fplog, " %c %d", 'X' + d, nmax);
            }
            fprintf(fplog, "\n");
        }
    }

    if (debug)
    {
        fprintf(debug, "Average nr of pbc_dx calls per atom %.2f\n", pbcdxr);
    }

    /* Decompose npp in factors */
    ndiv = factorize(npp, &div, &mdiv);

    itry[XX] = 1;
    itry[YY] = 1;
    itry[ZZ] = 1;
    clear_ivec(nc);
    assign_factors(dd, limit, cutoff, box, ddbox, mtop->natoms, ir, pbcdxr,
                   npme, ndiv, div, mdiv, itry, nc);

    sfree(div);
    sfree(mdiv);

    return limit;
}
Beispiel #7
0
/*! \brief Return whether the DD inhomogeneous in the z direction */
static gmx_bool inhomogeneous_z(const t_inputrec *ir)
{
    return ((EEL_PME(ir->coulombtype) || ir->coulombtype == eelEWALD) &&
            ir->ePBC == epbcXYZ && ir->ewald_geometry == eewg3DC);
}
Beispiel #8
0
real dd_choose_grid(FILE *fplog,
                    t_commrec *cr,gmx_domdec_t *dd,t_inputrec *ir,
                    gmx_mtop_t *mtop,matrix box,gmx_ddbox_t *ddbox,
                    bool bDynLoadBal,real dlb_scale,
                    real cellsize_limit,real cutoff_dd,
                    bool bInterCGBondeds,bool bInterCGMultiBody)
{
    int  npme,nkx,nky;
    real limit;
    
    if (MASTER(cr))
    {
        if (EEL_PME(ir->coulombtype))
        {
            if (cr->npmenodes >= 0)
            {
                if (cr->nnodes <= 2 && cr->npmenodes > 0)
                {
                    gmx_fatal(FARGS,
                              "Can not have separate PME nodes with 2 or less nodes");
                }
            }
            else
            {
                if (cr->nnodes < 12 &&
                    pme_inconvenient_nnodes(ir->nkx,ir->nky,cr->nnodes) == 0)
                {
                    cr->npmenodes = 0;
                }
                else
                {
                    cr->npmenodes = guess_npme(fplog,mtop,ir,box,cr->nnodes);
                }
            }
            if (fplog)
            {
                fprintf(fplog,"Using %d separate PME nodes\n",cr->npmenodes);
            }
        }
        else
        {
            if (cr->npmenodes < 0)
            {
                cr->npmenodes = 0;
            }
        }
        
        limit = optimize_ncells(fplog,cr->nnodes,cr->npmenodes,
                                bDynLoadBal,dlb_scale,
                                mtop,box,ddbox,ir,dd,
                                cellsize_limit,cutoff_dd,
                                bInterCGBondeds,bInterCGMultiBody,
                                dd->nc);
    }
    else
    {
        limit = 0;
    }
    /* Communicate the information set by the master to all nodes */
    gmx_bcast(sizeof(dd->nc),dd->nc,cr);
    if (EEL_PME(ir->coulombtype))
    {
        gmx_bcast(sizeof(ir->nkx),&ir->nkx,cr);
        gmx_bcast(sizeof(ir->nky),&ir->nky,cr);
        gmx_bcast(sizeof(cr->npmenodes),&cr->npmenodes,cr);
    }
    else
    {
        cr->npmenodes = 0;
    }
    
    return limit;
}
Beispiel #9
0
real dd_choose_grid(FILE *fplog,
                    t_commrec *cr,gmx_domdec_t *dd,t_inputrec *ir,
                    gmx_mtop_t *mtop,matrix box,gmx_ddbox_t *ddbox,
                    gmx_bool bDynLoadBal,real dlb_scale,
                    real cellsize_limit,real cutoff_dd,
                    gmx_bool bInterCGBondeds,gmx_bool bInterCGMultiBody)
{
    int  npme,nkx,nky;
    int  ldiv;
    real limit;

    if (MASTER(cr))
    {
        if (cr->nnodes > 12)
        {
            ldiv = largest_divisor(cr->nnodes);
            /* Check if the largest divisor is more than nnodes^2/3 */
            if (ldiv*ldiv*ldiv > cr->nnodes*cr->nnodes)
            {
                gmx_fatal(FARGS,"The number of nodes you selected (%d) contains a large prime factor %d. In most cases this will lead to bad performance. Choose a number with smaller prime factors or set the decomposition (option -dd) manually.",
                          cr->nnodes,ldiv);
            }
        }

        if (EEL_PME(ir->coulombtype))
        {
            if (cr->npmenodes >= 0)
            {
                if (cr->nnodes <= 2 && cr->npmenodes > 0)
                {
                    gmx_fatal(FARGS,
                              "Can not have separate PME nodes with 2 or less nodes");
                }
            }
            else
            {
                if (cr->nnodes <= 10)
                {
                    cr->npmenodes = 0;
                }
                else
                {
                    cr->npmenodes = guess_npme(fplog,mtop,ir,box,cr->nnodes);
                }
            }
            if (fplog)
            {
                fprintf(fplog,"Using %d separate PME nodes\n",cr->npmenodes);
            }
        }
        else
        {
            if (cr->npmenodes < 0)
            {
                cr->npmenodes = 0;
            }
        }

        limit = optimize_ncells(fplog,cr->nnodes,cr->npmenodes,
                                bDynLoadBal,dlb_scale,
                                mtop,box,ddbox,ir,dd,
                                cellsize_limit,cutoff_dd,
                                bInterCGBondeds,bInterCGMultiBody,
                                dd->nc);
    }
    else
    {
        limit = 0;
    }
    /* Communicate the information set by the master to all nodes */
    gmx_bcast(sizeof(dd->nc),dd->nc,cr);
    if (EEL_PME(ir->coulombtype))
    {
        gmx_bcast(sizeof(ir->nkx),&ir->nkx,cr);
        gmx_bcast(sizeof(ir->nky),&ir->nky,cr);
        gmx_bcast(sizeof(cr->npmenodes),&cr->npmenodes,cr);
    }
    else
    {
        cr->npmenodes = 0;
    }

    return limit;
}
Beispiel #10
0
static real optimize_ncells(FILE *fplog,
                            int nnodes_tot,int npme_only,
                            bool bDynLoadBal,real dlb_scale,
                            gmx_mtop_t *mtop,matrix box,gmx_ddbox_t *ddbox,
                            t_inputrec *ir,
                            gmx_domdec_t *dd,
                            real cellsize_limit,real cutoff,
                            bool bInterCGBondeds,bool bInterCGMultiBody,
                            ivec nc)
{
    int npp,npme,ndiv,*div,*mdiv,d,nmax;
    bool bExcl_pbcdx;
    float pbcdxr;
    real limit;
    ivec itry;
    
    limit  = cellsize_limit;
    
    dd->nc[XX] = 1;
    dd->nc[YY] = 1;
    dd->nc[ZZ] = 1;

    npp = nnodes_tot - npme_only;
    if (EEL_PME(ir->coulombtype))
    {
        npme = (npme_only > 0 ? npme_only : npp);
    }
    else
    {
        npme = 0;
    }
    
    if (bInterCGBondeds)
    {
        /* For Ewald exclusions pbc_dx is not called */
        bExcl_pbcdx =
            (EEL_EXCL_FORCES(ir->coulombtype) && !EEL_FULL(ir->coulombtype));
        pbcdxr = (double)n_bonded_dx(mtop,bExcl_pbcdx)/(double)mtop->natoms;
    }
    else
    {
        /* Every molecule is a single charge group: no pbc required */
        pbcdxr = 0;
    }
    /* Add a margin for DLB and/or pressure scaling */
    if (bDynLoadBal)
    {
        if (dlb_scale >= 1.0)
        {
            gmx_fatal(FARGS,"The value for option -dds should be smaller than 1");
        }
        if (fplog)
        {
            fprintf(fplog,"Scaling the initial minimum size with 1/%g (option -dds) = %g\n",dlb_scale,1/dlb_scale);
        }
        limit /= dlb_scale;
    }
    else if (ir->epc != epcNO)
    {
        if (fplog)
        {
            fprintf(fplog,"To account for pressure scaling, scaling the initial minimum size with %g\n",DD_GRID_MARGIN_PRES_SCALE);
            limit *= DD_GRID_MARGIN_PRES_SCALE;
        }
    }
    
    if (fplog)
    {
        fprintf(fplog,"Optimizing the DD grid for %d cells with a minimum initial size of %.3f nm\n",npp,limit);

        if (limit > 0)
        {
            fprintf(fplog,"The maximum allowed number of cells is:");
            for(d=0; d<DIM; d++)
            {
                nmax = (int)(ddbox->box_size[d]*ddbox->skew_fac[d]/limit);
                if (d >= ddbox->npbcdim && nmax < 2)
                {
                    nmax = 2;
                }
                fprintf(fplog," %c %d",'X' + d,nmax);
            }
            fprintf(fplog,"\n");
        }
    }
    
    if (debug)
    {
        fprintf(debug,"Average nr of pbc_dx calls per atom %.2f\n",pbcdxr);
    }
    
    /* Decompose npp in factors */
    ndiv = factorize(npp,&div,&mdiv);
    
    itry[XX] = 1;
    itry[YY] = 1;
    itry[ZZ] = 1;
    clear_ivec(nc);
    assign_factors(dd,limit,cutoff,box,ddbox,ir,pbcdxr,
                   npme,ndiv,div,mdiv,itry,nc);
    
    sfree(div);
    sfree(mdiv);
    
    return limit;
}
Beispiel #11
0
void do_force_lowlevel(t_forcerec *fr,      t_inputrec *ir,
                       t_idef     *idef,    t_commrec  *cr,
                       t_nrnb     *nrnb,    gmx_wallcycle_t wcycle,
                       t_mdatoms  *md,
                       rvec       x[],      history_t  *hist,
                       rvec       f[],
                       rvec       f_longrange[],
                       gmx_enerdata_t *enerd,
                       t_fcdata   *fcd,
                       gmx_localtop_t *top,
                       gmx_genborn_t *born,
                       gmx_bool       bBornRadii,
                       matrix     box,
                       t_lambda   *fepvals,
                       real       *lambda,
                       t_graph    *graph,
                       t_blocka   *excl,
                       rvec       mu_tot[],
                       int        flags,
                       float      *cycles_pme)
{
    int         i, j;
    int         donb_flags;
    gmx_bool    bSB;
    int         pme_flags;
    matrix      boxs;
    rvec        box_size;
    t_pbc       pbc;
    real        dvdl_dum[efptNR], dvdl_nb[efptNR];

#ifdef GMX_MPI
    double  t0 = 0.0, t1, t2, t3; /* time measurement for coarse load balancing */
#endif

    set_pbc(&pbc, fr->ePBC, box);

    /* reset free energy components */
    for (i = 0; i < efptNR; i++)
    {
        dvdl_nb[i]  = 0;
        dvdl_dum[i] = 0;
    }

    /* Reset box */
    for (i = 0; (i < DIM); i++)
    {
        box_size[i] = box[i][i];
    }

    debug_gmx();

    /* do QMMM first if requested */
    if (fr->bQMMM)
    {
        enerd->term[F_EQM] = calculate_QMMM(cr, x, f, fr);
    }

    /* Call the short range functions all in one go. */

#ifdef GMX_MPI
    /*#define TAKETIME ((cr->npmenodes) && (fr->timesteps < 12))*/
#define TAKETIME FALSE
    if (TAKETIME)
    {
        MPI_Barrier(cr->mpi_comm_mygroup);
        t0 = MPI_Wtime();
    }
#endif

    if (ir->nwall)
    {
        /* foreign lambda component for walls */
        real dvdl_walls = do_walls(ir, fr, box, md, x, f, lambda[efptVDW],
                                   enerd->grpp.ener[egLJSR], nrnb);
        enerd->dvdl_lin[efptVDW] += dvdl_walls;
    }

    /* If doing GB, reset dvda and calculate the Born radii */
    if (ir->implicit_solvent)
    {
        wallcycle_sub_start(wcycle, ewcsNONBONDED);

        for (i = 0; i < born->nr; i++)
        {
            fr->dvda[i] = 0;
        }

        if (bBornRadii)
        {
            calc_gb_rad(cr, fr, ir, top, x, &(fr->gblist), born, md, nrnb);
        }

        wallcycle_sub_stop(wcycle, ewcsNONBONDED);
    }

    where();
    /* We only do non-bonded calculation with group scheme here, the verlet
     * calls are done from do_force_cutsVERLET(). */
    if (fr->cutoff_scheme == ecutsGROUP && (flags & GMX_FORCE_NONBONDED))
    {
        donb_flags = 0;
        /* Add short-range interactions */
        donb_flags |= GMX_NONBONDED_DO_SR;

        /* Currently all group scheme kernels always calculate (shift-)forces */
        if (flags & GMX_FORCE_FORCES)
        {
            donb_flags |= GMX_NONBONDED_DO_FORCE;
        }
        if (flags & GMX_FORCE_VIRIAL)
        {
            donb_flags |= GMX_NONBONDED_DO_SHIFTFORCE;
        }
        if (flags & GMX_FORCE_ENERGY)
        {
            donb_flags |= GMX_NONBONDED_DO_POTENTIAL;
        }
        if (flags & GMX_FORCE_DO_LR)
        {
            donb_flags |= GMX_NONBONDED_DO_LR;
        }

        wallcycle_sub_start(wcycle, ewcsNONBONDED);
        do_nonbonded(fr, x, f, f_longrange, md, excl,
                     &enerd->grpp, nrnb,
                     lambda, dvdl_nb, -1, -1, donb_flags);

        /* If we do foreign lambda and we have soft-core interactions
         * we have to recalculate the (non-linear) energies contributions.
         */
        if (fepvals->n_lambda > 0 && (flags & GMX_FORCE_DHDL) && fepvals->sc_alpha != 0)
        {
            for (i = 0; i < enerd->n_lambda; i++)
            {
                real lam_i[efptNR];

                for (j = 0; j < efptNR; j++)
                {
                    lam_i[j] = (i == 0 ? lambda[j] : fepvals->all_lambda[j][i-1]);
                }
                reset_foreign_enerdata(enerd);
                do_nonbonded(fr, x, f, f_longrange, md, excl,
                             &(enerd->foreign_grpp), nrnb,
                             lam_i, dvdl_dum, -1, -1,
                             (donb_flags & ~GMX_NONBONDED_DO_FORCE) | GMX_NONBONDED_DO_FOREIGNLAMBDA);
                sum_epot(&(enerd->foreign_grpp), enerd->foreign_term);
                enerd->enerpart_lambda[i] += enerd->foreign_term[F_EPOT];
            }
        }
        wallcycle_sub_stop(wcycle, ewcsNONBONDED);
        where();
    }

    /* If we are doing GB, calculate bonded forces and apply corrections
     * to the solvation forces */
    /* MRS: Eventually, many need to include free energy contribution here! */
    if (ir->implicit_solvent)
    {
        wallcycle_sub_start(wcycle, ewcsLISTED);
        calc_gb_forces(cr, md, born, top, x, f, fr, idef,
                       ir->gb_algorithm, ir->sa_algorithm, nrnb, &pbc, graph, enerd);
        wallcycle_sub_stop(wcycle, ewcsLISTED);
    }

#ifdef GMX_MPI
    if (TAKETIME)
    {
        t1          = MPI_Wtime();
        fr->t_fnbf += t1-t0;
    }
#endif

    if (fepvals->sc_alpha != 0)
    {
        enerd->dvdl_nonlin[efptVDW] += dvdl_nb[efptVDW];
    }
    else
    {
        enerd->dvdl_lin[efptVDW] += dvdl_nb[efptVDW];
    }

    if (fepvals->sc_alpha != 0)

    /* even though coulomb part is linear, we already added it, beacuse we
       need to go through the vdw calculation anyway */
    {
        enerd->dvdl_nonlin[efptCOUL] += dvdl_nb[efptCOUL];
    }
    else
    {
        enerd->dvdl_lin[efptCOUL] += dvdl_nb[efptCOUL];
    }

    debug_gmx();


    if (debug)
    {
        pr_rvecs(debug, 0, "fshift after SR", fr->fshift, SHIFTS);
    }

    /* Shift the coordinates. Must be done before listed forces and PPPM,
     * but is also necessary for SHAKE and update, therefore it can NOT
     * go when no listed forces have to be evaluated.
     *
     * The shifting and PBC code is deliberately not timed, since with
     * the Verlet scheme it only takes non-zero time with triclinic
     * boxes, and even then the time is around a factor of 100 less
     * than the next smallest counter.
     */


    /* Here sometimes we would not need to shift with NBFonly,
     * but we do so anyhow for consistency of the returned coordinates.
     */
    if (graph)
    {
        shift_self(graph, box, x);
        if (TRICLINIC(box))
        {
            inc_nrnb(nrnb, eNR_SHIFTX, 2*graph->nnodes);
        }
        else
        {
            inc_nrnb(nrnb, eNR_SHIFTX, graph->nnodes);
        }
    }
    /* Check whether we need to do listed interactions or correct for exclusions */
    if (fr->bMolPBC &&
        ((flags & GMX_FORCE_LISTED)
         || EEL_RF(fr->eeltype) || EEL_FULL(fr->eeltype) || EVDW_PME(fr->vdwtype)))
    {
        /* TODO There are no electrostatics methods that require this
           transformation, when using the Verlet scheme, so update the
           above conditional. */
        /* Since all atoms are in the rectangular or triclinic unit-cell,
         * only single box vector shifts (2 in x) are required.
         */
        set_pbc_dd(&pbc, fr->ePBC, cr->dd, TRUE, box);
    }
    debug_gmx();

    do_force_listed(wcycle, box, ir->fepvals, cr->ms,
                    idef, (const rvec *) x, hist, f, fr,
                    &pbc, graph, enerd, nrnb, lambda, md, fcd,
                    DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL,
                    flags);

    where();

    *cycles_pme = 0;
    clear_mat(fr->vir_el_recip);
    clear_mat(fr->vir_lj_recip);

    /* Do long-range electrostatics and/or LJ-PME, including related short-range
     * corrections.
     */
    if (EEL_FULL(fr->eeltype) || EVDW_PME(fr->vdwtype))
    {
        int  status            = 0;
        real Vlr_q             = 0, Vlr_lj = 0, Vcorr_q = 0, Vcorr_lj = 0;
        real dvdl_long_range_q = 0, dvdl_long_range_lj = 0;

        bSB = (ir->nwall == 2);
        if (bSB)
        {
            copy_mat(box, boxs);
            svmul(ir->wall_ewald_zfac, boxs[ZZ], boxs[ZZ]);
            box_size[ZZ] *= ir->wall_ewald_zfac;
        }

        if (EEL_PME_EWALD(fr->eeltype) || EVDW_PME(fr->vdwtype))
        {
            real dvdl_long_range_correction_q   = 0;
            real dvdl_long_range_correction_lj  = 0;
            /* With the Verlet scheme exclusion forces are calculated
             * in the non-bonded kernel.
             */
            /* The TPI molecule does not have exclusions with the rest
             * of the system and no intra-molecular PME grid
             * contributions will be calculated in
             * gmx_pme_calc_energy.
             */
            if ((ir->cutoff_scheme == ecutsGROUP && fr->n_tpi == 0) ||
                ir->ewald_geometry != eewg3D ||
                ir->epsilon_surface != 0)
            {
                int nthreads, t;

                wallcycle_sub_start(wcycle, ewcsEWALD_CORRECTION);

                if (fr->n_tpi > 0)
                {
                    gmx_fatal(FARGS, "TPI with PME currently only works in a 3D geometry with tin-foil boundary conditions");
                }

                nthreads = gmx_omp_nthreads_get(emntBonded);
#pragma omp parallel for num_threads(nthreads) schedule(static)
                for (t = 0; t < nthreads; t++)
                {
                    int     i;
                    rvec   *fnv;
                    tensor *vir_q, *vir_lj;
                    real   *Vcorrt_q, *Vcorrt_lj, *dvdlt_q, *dvdlt_lj;
                    if (t == 0)
                    {
                        fnv       = fr->f_novirsum;
                        vir_q     = &fr->vir_el_recip;
                        vir_lj    = &fr->vir_lj_recip;
                        Vcorrt_q  = &Vcorr_q;
                        Vcorrt_lj = &Vcorr_lj;
                        dvdlt_q   = &dvdl_long_range_correction_q;
                        dvdlt_lj  = &dvdl_long_range_correction_lj;
                    }
                    else
                    {
                        fnv       = fr->f_t[t].f;
                        vir_q     = &fr->f_t[t].vir_q;
                        vir_lj    = &fr->f_t[t].vir_lj;
                        Vcorrt_q  = &fr->f_t[t].Vcorr_q;
                        Vcorrt_lj = &fr->f_t[t].Vcorr_lj;
                        dvdlt_q   = &fr->f_t[t].dvdl[efptCOUL];
                        dvdlt_lj  = &fr->f_t[t].dvdl[efptVDW];
                        for (i = 0; i < fr->natoms_force; i++)
                        {
                            clear_rvec(fnv[i]);
                        }
                        clear_mat(*vir_q);
                        clear_mat(*vir_lj);
                    }
                    *dvdlt_q  = 0;
                    *dvdlt_lj = 0;

                    ewald_LRcorrection(fr->excl_load[t], fr->excl_load[t+1],
                                       cr, t, fr,
                                       md->chargeA, md->chargeB,
                                       md->sqrt_c6A, md->sqrt_c6B,
                                       md->sigmaA, md->sigmaB,
                                       md->sigma3A, md->sigma3B,
                                       md->nChargePerturbed || md->nTypePerturbed,
                                       ir->cutoff_scheme != ecutsVERLET,
                                       excl, x, bSB ? boxs : box, mu_tot,
                                       ir->ewald_geometry,
                                       ir->epsilon_surface,
                                       fnv, *vir_q, *vir_lj,
                                       Vcorrt_q, Vcorrt_lj,
                                       lambda[efptCOUL], lambda[efptVDW],
                                       dvdlt_q, dvdlt_lj);
                }
                if (nthreads > 1)
                {
                    reduce_thread_forces(fr->natoms_force, fr->f_novirsum,
                                         fr->vir_el_recip, fr->vir_lj_recip,
                                         &Vcorr_q, &Vcorr_lj,
                                         &dvdl_long_range_correction_q,
                                         &dvdl_long_range_correction_lj,
                                         nthreads, fr->f_t);
                }
                wallcycle_sub_stop(wcycle, ewcsEWALD_CORRECTION);
            }

            if (EEL_PME_EWALD(fr->eeltype) && fr->n_tpi == 0)
            {
                /* This is not in a subcounter because it takes a
                   negligible and constant-sized amount of time */
                Vcorr_q += ewald_charge_correction(cr, fr, lambda[efptCOUL], box,
                                                   &dvdl_long_range_correction_q,
                                                   fr->vir_el_recip);
            }

            enerd->dvdl_lin[efptCOUL] += dvdl_long_range_correction_q;
            enerd->dvdl_lin[efptVDW]  += dvdl_long_range_correction_lj;

            if ((EEL_PME(fr->eeltype) || EVDW_PME(fr->vdwtype)) && (cr->duty & DUTY_PME))
            {
                /* Do reciprocal PME for Coulomb and/or LJ. */
                assert(fr->n_tpi >= 0);
                if (fr->n_tpi == 0 || (flags & GMX_FORCE_STATECHANGED))
                {
                    pme_flags = GMX_PME_SPREAD | GMX_PME_SOLVE;
                    if (EEL_PME(fr->eeltype))
                    {
                        pme_flags     |= GMX_PME_DO_COULOMB;
                    }
                    if (EVDW_PME(fr->vdwtype))
                    {
                        pme_flags |= GMX_PME_DO_LJ;
                    }
                    if (flags & GMX_FORCE_FORCES)
                    {
                        pme_flags |= GMX_PME_CALC_F;
                    }
                    if (flags & GMX_FORCE_VIRIAL)
                    {
                        pme_flags |= GMX_PME_CALC_ENER_VIR;
                    }
                    if (fr->n_tpi > 0)
                    {
                        /* We don't calculate f, but we do want the potential */
                        pme_flags |= GMX_PME_CALC_POT;
                    }
                    wallcycle_start(wcycle, ewcPMEMESH);
                    status = gmx_pme_do(fr->pmedata,
                                        0, md->homenr - fr->n_tpi,
                                        x, fr->f_novirsum,
                                        md->chargeA, md->chargeB,
                                        md->sqrt_c6A, md->sqrt_c6B,
                                        md->sigmaA, md->sigmaB,
                                        bSB ? boxs : box, cr,
                                        DOMAINDECOMP(cr) ? dd_pme_maxshift_x(cr->dd) : 0,
                                        DOMAINDECOMP(cr) ? dd_pme_maxshift_y(cr->dd) : 0,
                                        nrnb, wcycle,
                                        fr->vir_el_recip, fr->ewaldcoeff_q,
                                        fr->vir_lj_recip, fr->ewaldcoeff_lj,
                                        &Vlr_q, &Vlr_lj,
                                        lambda[efptCOUL], lambda[efptVDW],
                                        &dvdl_long_range_q, &dvdl_long_range_lj, pme_flags);
                    *cycles_pme = wallcycle_stop(wcycle, ewcPMEMESH);
                    if (status != 0)
                    {
                        gmx_fatal(FARGS, "Error %d in reciprocal PME routine", status);
                    }
                    /* We should try to do as little computation after
                     * this as possible, because parallel PME synchronizes
                     * the nodes, so we want all load imbalance of the
                     * rest of the force calculation to be before the PME
                     * call.  DD load balancing is done on the whole time
                     * of the force call (without PME).
                     */
                }
                if (fr->n_tpi > 0)
                {
                    if (EVDW_PME(ir->vdwtype))
                    {

                        gmx_fatal(FARGS, "Test particle insertion not implemented with LJ-PME");
                    }
                    /* Determine the PME grid energy of the test molecule
                     * with the PME grid potential of the other charges.
                     */
                    gmx_pme_calc_energy(fr->pmedata, fr->n_tpi,
                                        x + md->homenr - fr->n_tpi,
                                        md->chargeA + md->homenr - fr->n_tpi,
                                        &Vlr_q);
                }
            }
        }

        if (!EEL_PME(fr->eeltype) && EEL_PME_EWALD(fr->eeltype))
        {
            Vlr_q = do_ewald(ir, x, fr->f_novirsum,
                             md->chargeA, md->chargeB,
                             box_size, cr, md->homenr,
                             fr->vir_el_recip, fr->ewaldcoeff_q,
                             lambda[efptCOUL], &dvdl_long_range_q, fr->ewald_table);
        }

        /* Note that with separate PME nodes we get the real energies later */
        enerd->dvdl_lin[efptCOUL] += dvdl_long_range_q;
        enerd->dvdl_lin[efptVDW]  += dvdl_long_range_lj;
        enerd->term[F_COUL_RECIP]  = Vlr_q + Vcorr_q;
        enerd->term[F_LJ_RECIP]    = Vlr_lj + Vcorr_lj;
        if (debug)
        {
            fprintf(debug, "Vlr_q = %g, Vcorr_q = %g, Vlr_corr_q = %g\n",
                    Vlr_q, Vcorr_q, enerd->term[F_COUL_RECIP]);
            pr_rvecs(debug, 0, "vir_el_recip after corr", fr->vir_el_recip, DIM);
            pr_rvecs(debug, 0, "fshift after LR Corrections", fr->fshift, SHIFTS);
            fprintf(debug, "Vlr_lj: %g, Vcorr_lj = %g, Vlr_corr_lj = %g\n",
                    Vlr_lj, Vcorr_lj, enerd->term[F_LJ_RECIP]);
            pr_rvecs(debug, 0, "vir_lj_recip after corr", fr->vir_lj_recip, DIM);
        }
    }
    else
    {
        /* Is there a reaction-field exclusion correction needed? */
        if (EEL_RF(fr->eeltype) && eelRF_NEC != fr->eeltype)
        {
            /* With the Verlet scheme, exclusion forces are calculated
             * in the non-bonded kernel.
             */
            if (ir->cutoff_scheme != ecutsVERLET)
            {
                real dvdl_rf_excl      = 0;
                enerd->term[F_RF_EXCL] =
                    RF_excl_correction(fr, graph, md, excl, x, f,
                                       fr->fshift, &pbc, lambda[efptCOUL], &dvdl_rf_excl);

                enerd->dvdl_lin[efptCOUL] += dvdl_rf_excl;
            }
        }
    }
    where();
    debug_gmx();

    if (debug)
    {
        print_nrnb(debug, nrnb);
    }
    debug_gmx();

#ifdef GMX_MPI
    if (TAKETIME)
    {
        t2 = MPI_Wtime();
        MPI_Barrier(cr->mpi_comm_mygroup);
        t3          = MPI_Wtime();
        fr->t_wait += t3-t2;
        if (fr->timesteps == 11)
        {
            char buf[22];
            fprintf(stderr, "* PP load balancing info: rank %d, step %s, rel wait time=%3.0f%% , load string value: %7.2f\n",
                    cr->nodeid, gmx_step_str(fr->timesteps, buf),
                    100*fr->t_wait/(fr->t_wait+fr->t_fnbf),
                    (fr->t_fnbf+fr->t_wait)/fr->t_fnbf);
        }
        fr->timesteps++;
    }
#endif

    if (debug)
    {
        pr_rvecs(debug, 0, "fshift after bondeds", fr->fshift, SHIFTS);
    }

}
Beispiel #12
0
float pme_load_estimate(const gmx_mtop_t *mtop, const t_inputrec *ir,
                        matrix box)
{
    int            nq_tot, nlj_tot, f;
    gmx_bool       bChargePerturbed, bTypePerturbed;
    double         cost_bond, cost_pp, cost_redist, cost_spread, cost_fft, cost_solve, cost_pme;
    float          ratio;

    /* Computational cost of bonded, non-bonded and PME calculations.
     * This will be machine dependent.
     * The numbers here are accurate for Intel Core2 and AMD Athlon 64
     * in single precision. In double precision PME mesh is slightly cheaper,
     * although not so much that the numbers need to be adjusted.
     */

    cost_bond = C_BOND*n_bonded_dx(mtop, TRUE);

    if (ir->cutoff_scheme == ecutsGROUP)
    {
        pp_group_load(mtop, ir, box,
                      &nq_tot, &nlj_tot, &cost_pp,
                      &bChargePerturbed, &bTypePerturbed);
    }
    else
    {
        pp_verlet_load(mtop, ir, box,
                       &nq_tot, &nlj_tot, &cost_pp,
                       &bChargePerturbed, &bTypePerturbed);
    }

    cost_redist = 0;
    cost_spread = 0;
    cost_fft    = 0;
    cost_solve  = 0;

    if (EEL_PME(ir->coulombtype))
    {
        f            = ((ir->efep != efepNO && bChargePerturbed) ? 2 : 1);
        cost_redist +=   C_PME_REDIST*nq_tot;
        cost_spread += f*C_PME_SPREAD*nq_tot*std::pow(static_cast<real>(ir->pme_order), static_cast<real>(3.0));
        cost_fft    += f*C_PME_FFT*ir->nkx*ir->nky*ir->nkz*std::log(static_cast<real>(ir->nkx*ir->nky*ir->nkz));
        cost_solve  += f*C_PME_SOLVE*ir->nkx*ir->nky*ir->nkz;
    }

    if (EVDW_PME(ir->vdwtype))
    {
        f            = ((ir->efep != efepNO && bTypePerturbed) ? 2 : 1);
        if (ir->ljpme_combination_rule == eljpmeLB)
        {
            /* LB combination rule: we have 7 mesh terms */
            f       *= 7;
        }
        cost_redist +=   C_PME_REDIST*nlj_tot;
        cost_spread += f*C_PME_SPREAD*nlj_tot*std::pow(static_cast<real>(ir->pme_order), static_cast<real>(3.0));
        cost_fft    += f*C_PME_FFT*ir->nkx*ir->nky*ir->nkz*std::log(static_cast<real>(ir->nkx*ir->nky*ir->nkz));
        cost_solve  += f*C_PME_SOLVE*ir->nkx*ir->nky*ir->nkz;
    }

    cost_pme = cost_redist + cost_spread + cost_fft + cost_solve;

    ratio = cost_pme/(cost_bond + cost_pp + cost_pme);

    if (debug)
    {
        fprintf(debug,
                "cost_bond   %f\n"
                "cost_pp     %f\n"
                "cost_redist %f\n"
                "cost_spread %f\n"
                "cost_fft    %f\n"
                "cost_solve  %f\n",
                cost_bond, cost_pp, cost_redist, cost_spread, cost_fft, cost_solve);

        fprintf(debug, "Estimate for relative PME load: %.3f\n", ratio);
    }

    return ratio;
}
Beispiel #13
0
int main (int argc, char *argv[])
{
  static const char *desc[] = {
    "The gromacs preprocessor",
    "reads a molecular topology file, checks the validity of the",
    "file, expands the topology from a molecular description to an atomic",
    "description. The topology file contains information about",
    "molecule types and the number of molecules, the preprocessor",
    "copies each molecule as needed. ",
    "There is no limitation on the number of molecule types. ",
    "Bonds and bond-angles can be converted into constraints, separately",
    "for hydrogens and heavy atoms.",
    "Then a coordinate file is read and velocities can be generated",
    "from a Maxwellian distribution if requested.",
    "grompp also reads parameters for the mdrun ",
    "(eg. number of MD steps, time step, cut-off), and others such as",
    "NEMD parameters, which are corrected so that the net acceleration",
    "is zero.",
    "Eventually a binary file is produced that can serve as the sole input",
    "file for the MD program.[PAR]",
    
    "grompp uses the atom names from the topology file. The atom names",
    "in the coordinate file (option [TT]-c[tt]) are only read to generate",
    "warnings when they do not match the atom names in the topology.",
    "Note that the atom names are irrelevant for the simulation as",
    "only the atom types are used for generating interaction parameters.[PAR]",

    "grompp calls a preprocessor to resolve includes, macros ",
    "etcetera. By default we use the cpp in your path. To specify a "
    "different macro-preprocessor (e.g. m4) or alternative location",

    "you can put a line in your parameter file specifying the path",
    "to that program. Specifying [TT]-pp[tt] will get the pre-processed",
    "topology file written out.[PAR]",
    
    "If your system does not have a c-preprocessor, you can still",
    "use grompp, but you do not have access to the features ",
    "from the cpp. Command line options to the c-preprocessor can be given",
    "in the [TT].mdp[tt] file. See your local manual (man cpp).[PAR]",
    
    "When using position restraints a file with restraint coordinates",
    "can be supplied with [TT]-r[tt], otherwise restraining will be done",
    "with respect to the conformation from the [TT]-c[tt] option.",
    "For free energy calculation the the coordinates for the B topology",
    "can be supplied with [TT]-rb[tt], otherwise they will be equal to",
    "those of the A topology.[PAR]",
    
    "Starting coordinates can be read from trajectory with [TT]-t[tt].",
    "The last frame with coordinates and velocities will be read,",
    "unless the [TT]-time[tt] option is used.",
    "Note that these velocities will not be used when [TT]gen_vel = yes[tt]",
    "in your [TT].mdp[tt] file. An energy file can be supplied with",
    "[TT]-e[tt] to have exact restarts when using pressure and/or",
    "Nose-Hoover temperature coupling. For an exact restart do not forget",
    "to turn off velocity generation and turn on unconstrained starting",
    "when constraints are present in the system.",
    "If you want to continue a crashed run, it is",
    "easier to use [TT]tpbconv[tt].[PAR]",

    "Using the [TT]-morse[tt] option grompp can convert the harmonic bonds",
    "in your topology to morse potentials. This makes it possible to break",
    "bonds. For this option to work you need an extra file in your $GMXLIB",
    "with dissociation energy. Use the -debug option to get more information",
    "on the workings of this option (look for MORSE in the grompp.log file",
    "using less or something like that).[PAR]",
    
    "By default all bonded interactions which have constant energy due to",
    "virtual site constructions will be removed. If this constant energy is",
    "not zero, this will result in a shift in the total energy. All bonded",
    "interactions can be kept by turning off [TT]-rmvsbds[tt]. Additionally,",
    "all constraints for distances which will be constant anyway because",
    "of virtual site constructions will be removed. If any constraints remain",
    "which involve virtual sites, a fatal error will result.[PAR]"
    
    "To verify your run input file, please make notice of all warnings",
    "on the screen, and correct where necessary. Do also look at the contents",
    "of the [TT]mdout.mdp[tt] file, this contains comment lines, as well as",
    "the input that [TT]grompp[tt] has read. If in doubt you can start grompp",
    "with the [TT]-debug[tt] option which will give you more information",
    "in a file called grompp.log (along with real debug info). Finally, you",
    "can see the contents of the run input file with the [TT]gmxdump[tt]",
    "program."
  };
  t_gromppopts *opts;
  gmx_mtop_t   *sys;
  int          nmi;
  t_molinfo    *mi;
  gpp_atomtype_t atype;
  t_inputrec   *ir;
  int          natoms,nvsite,comb,mt;
  t_params     *plist;
  t_state      state;
  matrix       box;
  real         max_spacing,fudgeQQ;
  double       reppow;
  char         fn[STRLEN],fnB[STRLEN],*mdparin;
  int          nerror,ntype;
  bool         bNeedVel,bGenVel;
  bool         have_radius,have_vol,have_surftens,have_gb_radius,have_S_hct;
  bool         have_atomnumber;
  int		   n12,n13,n14;
  t_params     *gb_plist = NULL;
  gmx_genborn_t *born = NULL;

  t_filenm fnm[] = {
    { efMDP, NULL,  NULL,        ffOPTRD },
    { efMDP, "-po", "mdout",     ffWRITE },
    { efSTX, "-c",  NULL,        ffREAD  },
    { efSTX, "-r",  NULL,        ffOPTRD },
    { efSTX, "-rb", NULL,        ffOPTRD },
    { efNDX, NULL,  NULL,        ffOPTRD },
    { efTOP, NULL,  NULL,        ffREAD  },
    { efTOP, "-pp", "processed", ffOPTWR },
    { efTPX, "-o",  NULL,        ffWRITE },
    { efTRN, "-t",  NULL,        ffOPTRD },
    { efEDR, "-e",  NULL,        ffOPTRD }
  };
#define NFILE asize(fnm)

  /* Command line options */
  static bool bVerbose=TRUE,bRenum=TRUE;
  static bool bRmVSBds=TRUE,bZero=FALSE;
  static int  i,maxwarn=0;
  static real fr_time=-1;
  t_pargs pa[] = {
    { "-v",       FALSE, etBOOL, {&bVerbose},
      "Be loud and noisy" },
    { "-time",    FALSE, etREAL, {&fr_time},
      "Take frame at or first after this time." },
    { "-rmvsbds",FALSE, etBOOL, {&bRmVSBds},
      "Remove constant bonded interactions with virtual sites" },
    { "-maxwarn", FALSE, etINT,  {&maxwarn},
      "Number of allowed warnings during input processing" },
    { "-zero",    FALSE, etBOOL, {&bZero},
      "Set parameters for bonded interactions without defaults to zero instead of generating an error" },
    { "-renum",   FALSE, etBOOL, {&bRenum},
      "Renumber atomtypes and minimize number of atomtypes" }
  };
  
  CopyRight(stdout,argv[0]);
  
  /* Initiate some variables */
  nerror=0;
  snew(ir,1);
  snew(opts,1);
  init_ir(ir,opts);
  
  /* Parse the command line */
  parse_common_args(&argc,argv,0,NFILE,fnm,asize(pa),pa,
		    asize(desc),desc,0,NULL);
  
  init_warning(maxwarn);
  
  /* PARAMETER file processing */
  mdparin = opt2fn("-f",NFILE,fnm);
  set_warning_line(mdparin,-1);    
  get_ir(mdparin,opt2fn("-po",NFILE,fnm),ir,opts,&nerror);
  
  if (bVerbose) 
    fprintf(stderr,"checking input for internal consistency...\n");
  check_ir(mdparin,ir,opts,&nerror);

  if (ir->ld_seed == -1) {
    ir->ld_seed = make_seed();
    fprintf(stderr,"Setting the LD random seed to %d\n",ir->ld_seed);
  }

  bNeedVel = EI_STATE_VELOCITY(ir->eI);
  bGenVel  = (bNeedVel && opts->bGenVel);

  snew(plist,F_NRE);
  init_plist(plist);
  snew(sys,1);
  atype = init_atomtype();
  if (debug)
    pr_symtab(debug,0,"Just opened",&sys->symtab);
    
  strcpy(fn,ftp2fn(efTOP,NFILE,fnm));
  if (!gmx_fexist(fn)) 
    gmx_fatal(FARGS,"%s does not exist",fn);
  new_status(fn,opt2fn_null("-pp",NFILE,fnm),opt2fn("-c",NFILE,fnm),
	     opts,ir,bZero,bGenVel,bVerbose,&state,
	     atype,sys,&nmi,&mi,plist,&comb,&reppow,&fudgeQQ,
	     opts->bMorse,
	     &nerror);
  
  if (debug)
    pr_symtab(debug,0,"After new_status",&sys->symtab);
  
  if (count_constraints(sys,mi) && (ir->eConstrAlg == econtSHAKE)) {
    if (ir->eI == eiCG || ir->eI == eiLBFGS) {
      fprintf(stderr,
	      "ERROR: Can not do %s with %s, use %s\n",
	      EI(ir->eI),econstr_names[econtSHAKE],econstr_names[econtLINCS]);
      nerror++;
    }
    if (ir->bPeriodicMols) {
      fprintf(stderr,
	      "ERROR: can not do periodic molecules with %s, use %s\n",
	      econstr_names[econtSHAKE],econstr_names[econtLINCS]);
      nerror++;
    }
  }

  /* If we are doing GBSA, check that we got the parameters we need                                                            
   * This checking is to see if there are GBSA paratmeters for all                                                             
   * atoms in the force field. To go around this for testing purposes                                                          
   * comment out the nerror++ counter temporarliy                                                                              
   */
  have_radius=have_vol=have_surftens=have_gb_radius=have_S_hct=TRUE;
  for(i=0;i<get_atomtype_ntypes(atype);i++) {
    have_radius=have_radius       && (get_atomtype_radius(i,atype) > 0);
    have_vol=have_vol             && (get_atomtype_vol(i,atype) > 0);
    have_surftens=have_surftens   && (get_atomtype_surftens(i,atype) > 0);
    have_gb_radius=have_gb_radius && (get_atomtype_gb_radius(i,atype) > 0);
    have_S_hct=have_S_hct         && (get_atomtype_S_hct(i,atype) > 0);
  }
  if(!have_radius && ir->implicit_solvent==eisGBSA) {
    fprintf(stderr,"Can't do GB electrostatics; the forcefield is missing values for\n"
	    "atomtype radii, or they might be zero\n.");
    /* nerror++; */
  }
  /*
  if(!have_surftens && ir->implicit_solvent!=eisNO) {
    fprintf(stderr,"Can't do implicit solvent; the forcefield is missing values\n"
	    " for atomtype surface tension\n.");
    nerror++;                                                                                                                
  }
  */
  
  /* If we are doing QM/MM, check that we got the atom numbers */
  have_atomnumber = TRUE;
  for (i=0; i<get_atomtype_ntypes(atype); i++) {
    have_atomnumber = have_atomnumber && (get_atomtype_atomnumber(i,atype) >= 0);
  }
  if (!have_atomnumber && ir->bQMMM)
  {
    fprintf(stderr,"\n"
            "It appears as if you are trying to run a QM/MM calculation, but the force\n"
            "field you are using does not contain atom numbers fields. This is an\n"
            "optional field (introduced in Gromacs 3.3) for general runs, but mandatory\n"
            "for QM/MM. The good news is that it is easy to add - put the atom number as\n"
            "an integer just before the mass column in ffXXXnb.itp.\n"
            "NB: United atoms have the same atom numbers as normal ones.\n\n"); 
    nerror++;
  }

  if (nerror) {
    print_warn_num(FALSE);
    
    gmx_fatal(FARGS,"There were %d error(s) processing your input",nerror);
  }
  if (opt2bSet("-r",NFILE,fnm))
    sprintf(fn,"%s",opt2fn("-r",NFILE,fnm));
  else
    sprintf(fn,"%s",opt2fn("-c",NFILE,fnm));
  if (opt2bSet("-rb",NFILE,fnm))
    sprintf(fnB,"%s",opt2fn("-rb",NFILE,fnm));
  else
    strcpy(fnB,fn);

  if (nint_ftype(sys,mi,F_POSRES) > 0) {
    if (bVerbose) {
      fprintf(stderr,"Reading position restraint coords from %s",fn);
      if (strcmp(fn,fnB) == 0) {
	fprintf(stderr,"\n");
      } else {
	fprintf(stderr," and %s\n",fnB);
	if (ir->efep != efepNO && ir->n_flambda > 0) {
	  fprintf(stderr,"ERROR: can not change the position restraint reference coordinates with lambda togther with foreign lambda calculation.\n");
	  nerror++;
	}
      }
    }
    gen_posres(sys,mi,fn,fnB,
	       ir->refcoord_scaling,ir->ePBC,
	       ir->posres_com,ir->posres_comB);
  }
		
  nvsite = 0;
  /* set parameters for virtual site construction (not for vsiten) */
  for(mt=0; mt<sys->nmoltype; mt++) {
    nvsite +=
      set_vsites(bVerbose, &sys->moltype[mt].atoms, atype, mi[mt].plist);
  }
  /* now throw away all obsolete bonds, angles and dihedrals: */
  /* note: constraints are ALWAYS removed */
  if (nvsite) {
    for(mt=0; mt<sys->nmoltype; mt++) {
      clean_vsite_bondeds(mi[mt].plist,sys->moltype[mt].atoms.nr,bRmVSBds);
    }
  }
  
	/* If we are using CMAP, setup the pre-interpolation grid */
	if(plist->ncmap>0)
	{
		init_cmap_grid(&sys->cmap_grid, plist->nc, plist->grid_spacing);
		setup_cmap(plist->grid_spacing, plist->nc, plist->cmap,&sys->cmap_grid);
	}
	
  set_wall_atomtype(atype,opts,ir);
  if (bRenum) {
    renum_atype(plist, sys, ir->wall_atomtype, atype, bVerbose);
    ntype = get_atomtype_ntypes(atype);
  }
  
	/* PELA: Copy the atomtype data to the topology atomtype list */
	copy_atomtype_atomtypes(atype,&(sys->atomtypes));

	if (debug)
    pr_symtab(debug,0,"After renum_atype",&sys->symtab);

  if (bVerbose) 
    fprintf(stderr,"converting bonded parameters...\n");
	
  ntype = get_atomtype_ntypes(atype);
  convert_params(ntype, plist, mi, comb, reppow, fudgeQQ, sys);
  	
	if(ir->implicit_solvent)
	{
		printf("Constructing Generalized Born topology...\n");

		/* Check for -normvsbds switch to grompp, necessary for gb together with vsites */
		if(bRmVSBds && nvsite)
		{
			fprintf(stderr, "ERROR: Must use -normvsbds switch to grompp when doing Generalized Born\n"
					"together with virtual sites\n");
			nerror++;
		}
		
		if (nerror)
		{
			print_warn_num(FALSE);
			gmx_fatal(FARGS,"There were %d error(s) processing your input",nerror);
		}
		
		generate_gb_topology(sys,mi);
	}
	
  if (debug)
    pr_symtab(debug,0,"After convert_params",&sys->symtab);

  /* set ptype to VSite for virtual sites */
  for(mt=0; mt<sys->nmoltype; mt++) {
    set_vsites_ptype(FALSE,&sys->moltype[mt]);
  }
  if (debug) {
    pr_symtab(debug,0,"After virtual sites",&sys->symtab);
  }
  /* Check velocity for virtual sites and shells */
  if (bGenVel) {
    check_vel(sys,state.v);
  }
    
  /* check masses */
  check_mol(sys);
  
  for(i=0; i<sys->nmoltype; i++) {
    check_cg_sizes(ftp2fn(efTOP,NFILE,fnm),&sys->moltype[i].cgs);
  }

  check_warning_error(FARGS);
	
  if (bVerbose) 
    fprintf(stderr,"initialising group options...\n");
  do_index(mdparin,ftp2fn_null(efNDX,NFILE,fnm),
	   sys,bVerbose,ir,
	   bGenVel ? state.v : NULL);
	
  /* Init the temperature coupling state */
  init_gtc_state(&state,ir->opts.ngtc);

  if (bVerbose)
    fprintf(stderr,"Checking consistency between energy and charge groups...\n");
  check_eg_vs_cg(sys);
  
  if (debug)
    pr_symtab(debug,0,"After index",&sys->symtab);
  triple_check(mdparin,ir,sys,&nerror);
  close_symtab(&sys->symtab);
  if (debug)
    pr_symtab(debug,0,"After close",&sys->symtab);

  /* make exclusions between QM atoms */
  if (ir->bQMMM) {
    generate_qmexcl(sys,ir);
  }

  if (ftp2bSet(efTRN,NFILE,fnm)) {
    if (bVerbose)
      fprintf(stderr,"getting data from old trajectory ...\n");
    cont_status(ftp2fn(efTRN,NFILE,fnm),ftp2fn_null(efEDR,NFILE,fnm),
		bNeedVel,bGenVel,fr_time,ir,&state,sys);
  }

  if (ir->ePBC==epbcXY && ir->nwall!=2)
    clear_rvec(state.box[ZZ]);
  
  if (EEL_FULL(ir->coulombtype)) {
    /* Calculate the optimal grid dimensions */
    copy_mat(state.box,box);
    if (ir->ePBC==epbcXY && ir->nwall==2)
      svmul(ir->wall_ewald_zfac,box[ZZ],box[ZZ]);
    max_spacing = calc_grid(stdout,box,opts->fourierspacing,
			    &(ir->nkx),&(ir->nky),&(ir->nkz),1);
    if ((ir->coulombtype == eelPPPM) && (max_spacing > 0.1)) {
      set_warning_line(mdparin,-1);
      sprintf(warn_buf,"Grid spacing larger then 0.1 while using PPPM.");
      warning_note(NULL);
    }
  }

  if (ir->ePull != epullNO)
    set_pull_init(ir,sys,state.x,state.box,opts->pull_start);

  /*  reset_multinr(sys); */
  
  if (EEL_PME(ir->coulombtype)) {
    float ratio = pme_load_estimate(sys,ir,state.box);
    fprintf(stderr,"Estimate for the relative computational load of the PME mesh part: %.2f\n",ratio);
    if (ratio > 0.5)
      warning_note("The optimal PME mesh load for parallel simulations is below 0.5\n"
		   "and for highly parallel simulations between 0.25 and 0.33,\n"
		   "for higher performance, increase the cut-off and the PME grid spacing");
  }

  {
    double cio = compute_io(ir,sys->natoms,&sys->groups,F_NRE,1);
    sprintf(warn_buf,"This run will generate roughly %.0f Mb of data",cio);
    if (cio > 2000) {
      set_warning_line(mdparin,-1);
      warning_note(NULL);
    } else {
      printf("%s\n",warn_buf);
    }
  }
	
  if (bVerbose) 
    fprintf(stderr,"writing run input file...\n");

  print_warn_num(TRUE);
  state.lambda = ir->init_lambda;
  write_tpx_state(ftp2fn(efTPX,NFILE,fnm),ir,&state,sys);
  
  thanx(stderr);
  
  return 0;
}
Beispiel #14
0
int mdrunner(gmx_hw_opt_t *hw_opt,
             FILE *fplog, t_commrec *cr, int nfile,
             const t_filenm fnm[], const output_env_t oenv, gmx_bool bVerbose,
             gmx_bool bCompact, int nstglobalcomm,
             ivec ddxyz, int dd_node_order, real rdd, real rconstr,
             const char *dddlb_opt, real dlb_scale,
             const char *ddcsx, const char *ddcsy, const char *ddcsz,
             const char *nbpu_opt, int nstlist_cmdline,
             gmx_int64_t nsteps_cmdline, int nstepout, int resetstep,
             int gmx_unused nmultisim, int repl_ex_nst, int repl_ex_nex,
             int repl_ex_seed, real pforce, real cpt_period, real max_hours,
             int imdport, unsigned long Flags)
{
    gmx_bool                  bForceUseGPU, bTryUseGPU, bRerunMD;
    t_inputrec               *inputrec;
    t_state                  *state = NULL;
    matrix                    box;
    gmx_ddbox_t               ddbox = {0};
    int                       npme_major, npme_minor;
    t_nrnb                   *nrnb;
    gmx_mtop_t               *mtop          = NULL;
    t_mdatoms                *mdatoms       = NULL;
    t_forcerec               *fr            = NULL;
    t_fcdata                 *fcd           = NULL;
    real                      ewaldcoeff_q  = 0;
    real                      ewaldcoeff_lj = 0;
    struct gmx_pme_t        **pmedata       = NULL;
    gmx_vsite_t              *vsite         = NULL;
    gmx_constr_t              constr;
    int                       nChargePerturbed = -1, nTypePerturbed = 0, status;
    gmx_wallcycle_t           wcycle;
    gmx_bool                  bReadEkin;
    gmx_walltime_accounting_t walltime_accounting = NULL;
    int                       rc;
    gmx_int64_t               reset_counters;
    gmx_edsam_t               ed           = NULL;
    int                       nthreads_pme = 1;
    int                       nthreads_pp  = 1;
    gmx_membed_t              membed       = NULL;
    gmx_hw_info_t            *hwinfo       = NULL;
    /* The master rank decides early on bUseGPU and broadcasts this later */
    gmx_bool                  bUseGPU      = FALSE;

    /* CAUTION: threads may be started later on in this function, so
       cr doesn't reflect the final parallel state right now */
    snew(inputrec, 1);
    snew(mtop, 1);

    if (Flags & MD_APPENDFILES)
    {
        fplog = NULL;
    }

    bRerunMD     = (Flags & MD_RERUN);
    bForceUseGPU = (strncmp(nbpu_opt, "gpu", 3) == 0);
    bTryUseGPU   = (strncmp(nbpu_opt, "auto", 4) == 0) || bForceUseGPU;

    /* Detect hardware, gather information. This is an operation that is
     * global for this process (MPI rank). */
    hwinfo = gmx_detect_hardware(fplog, cr, bTryUseGPU);

    gmx_print_detected_hardware(fplog, cr, hwinfo);

    if (fplog != NULL)
    {
        /* Print references after all software/hardware printing */
        please_cite(fplog, "Abraham2015");
        please_cite(fplog, "Pall2015");
        please_cite(fplog, "Pronk2013");
        please_cite(fplog, "Hess2008b");
        please_cite(fplog, "Spoel2005a");
        please_cite(fplog, "Lindahl2001a");
        please_cite(fplog, "Berendsen95a");
    }

    snew(state, 1);
    if (SIMMASTER(cr))
    {
        /* Read (nearly) all data required for the simulation */
        read_tpx_state(ftp2fn(efTPR, nfile, fnm), inputrec, state, NULL, mtop);

        if (inputrec->cutoff_scheme == ecutsVERLET)
        {
            /* Here the master rank decides if all ranks will use GPUs */
            bUseGPU = (hwinfo->gpu_info.n_dev_compatible > 0 ||
                       getenv("GMX_EMULATE_GPU") != NULL);

            /* TODO add GPU kernels for this and replace this check by:
             * (bUseGPU && (ir->vdwtype == evdwPME &&
             *               ir->ljpme_combination_rule == eljpmeLB))
             * update the message text and the content of nbnxn_acceleration_supported.
             */
            if (bUseGPU &&
                !nbnxn_gpu_acceleration_supported(fplog, cr, inputrec, bRerunMD))
            {
                /* Fallback message printed by nbnxn_acceleration_supported */
                if (bForceUseGPU)
                {
                    gmx_fatal(FARGS, "GPU acceleration requested, but not supported with the given input settings");
                }
                bUseGPU = FALSE;
            }

            prepare_verlet_scheme(fplog, cr,
                                  inputrec, nstlist_cmdline, mtop, state->box,
                                  bUseGPU);
        }
        else
        {
            if (nstlist_cmdline > 0)
            {
                gmx_fatal(FARGS, "Can not set nstlist with the group cut-off scheme");
            }

            if (hwinfo->gpu_info.n_dev_compatible > 0)
            {
                md_print_warn(cr, fplog,
                              "NOTE: GPU(s) found, but the current simulation can not use GPUs\n"
                              "      To use a GPU, set the mdp option: cutoff-scheme = Verlet\n");
            }

            if (bForceUseGPU)
            {
                gmx_fatal(FARGS, "GPU requested, but can't be used without cutoff-scheme=Verlet");
            }

#ifdef GMX_TARGET_BGQ
            md_print_warn(cr, fplog,
                          "NOTE: There is no SIMD implementation of the group scheme kernels on\n"
                          "      BlueGene/Q. You will observe better performance from using the\n"
                          "      Verlet cut-off scheme.\n");
#endif
        }

        if (inputrec->eI == eiSD2)
        {
            md_print_warn(cr, fplog, "The stochastic dynamics integrator %s is deprecated, since\n"
                          "it is slower than integrator %s and is slightly less accurate\n"
                          "with constraints. Use the %s integrator.",
                          ei_names[inputrec->eI], ei_names[eiSD1], ei_names[eiSD1]);
        }
    }

    /* Check and update the hardware options for internal consistency */
    check_and_update_hw_opt_1(hw_opt, cr);

    /* Early check for externally set process affinity. */
    gmx_check_thread_affinity_set(fplog, cr,
                                  hw_opt, hwinfo->nthreads_hw_avail, FALSE);

#ifdef GMX_THREAD_MPI
    if (SIMMASTER(cr))
    {
        if (cr->npmenodes > 0 && hw_opt->nthreads_tmpi <= 0)
        {
            gmx_fatal(FARGS, "You need to explicitly specify the number of MPI threads (-ntmpi) when using separate PME ranks");
        }

        /* Since the master knows the cut-off scheme, update hw_opt for this.
         * This is done later for normal MPI and also once more with tMPI
         * for all tMPI ranks.
         */
        check_and_update_hw_opt_2(hw_opt, inputrec->cutoff_scheme);

        /* NOW the threads will be started: */
        hw_opt->nthreads_tmpi = get_nthreads_mpi(hwinfo,
                                                 hw_opt,
                                                 inputrec, mtop,
                                                 cr, fplog, bUseGPU);

        if (hw_opt->nthreads_tmpi > 1)
        {
            t_commrec *cr_old       = cr;
            /* now start the threads. */
            cr = mdrunner_start_threads(hw_opt, fplog, cr_old, nfile, fnm,
                                        oenv, bVerbose, bCompact, nstglobalcomm,
                                        ddxyz, dd_node_order, rdd, rconstr,
                                        dddlb_opt, dlb_scale, ddcsx, ddcsy, ddcsz,
                                        nbpu_opt, nstlist_cmdline,
                                        nsteps_cmdline, nstepout, resetstep, nmultisim,
                                        repl_ex_nst, repl_ex_nex, repl_ex_seed, pforce,
                                        cpt_period, max_hours,
                                        Flags);
            /* the main thread continues here with a new cr. We don't deallocate
               the old cr because other threads may still be reading it. */
            if (cr == NULL)
            {
                gmx_comm("Failed to spawn threads");
            }
        }
    }
#endif
    /* END OF CAUTION: cr is now reliable */

    /* g_membed initialisation *
     * Because we change the mtop, init_membed is called before the init_parallel *
     * (in case we ever want to make it run in parallel) */
    if (opt2bSet("-membed", nfile, fnm))
    {
        if (MASTER(cr))
        {
            fprintf(stderr, "Initializing membed");
        }
        membed = init_membed(fplog, nfile, fnm, mtop, inputrec, state, cr, &cpt_period);
    }

    if (PAR(cr))
    {
        /* now broadcast everything to the non-master nodes/threads: */
        init_parallel(cr, inputrec, mtop);

        /* The master rank decided on the use of GPUs,
         * broadcast this information to all ranks.
         */
        gmx_bcast_sim(sizeof(bUseGPU), &bUseGPU, cr);
    }

    if (fplog != NULL)
    {
        pr_inputrec(fplog, 0, "Input Parameters", inputrec, FALSE);
        fprintf(fplog, "\n");
    }

    /* now make sure the state is initialized and propagated */
    set_state_entries(state, inputrec);

    /* A parallel command line option consistency check that we can
       only do after any threads have started. */
    if (!PAR(cr) &&
        (ddxyz[XX] > 1 || ddxyz[YY] > 1 || ddxyz[ZZ] > 1 || cr->npmenodes > 0))
    {
        gmx_fatal(FARGS,
                  "The -dd or -npme option request a parallel simulation, "
#ifndef GMX_MPI
                  "but %s was compiled without threads or MPI enabled"
#else
#ifdef GMX_THREAD_MPI
                  "but the number of threads (option -nt) is 1"
#else
                  "but %s was not started through mpirun/mpiexec or only one rank was requested through mpirun/mpiexec"
#endif
#endif
                  , output_env_get_program_display_name(oenv)
                  );
    }

    if (bRerunMD &&
        (EI_ENERGY_MINIMIZATION(inputrec->eI) || eiNM == inputrec->eI))
    {
        gmx_fatal(FARGS, "The .mdp file specified an energy mininization or normal mode algorithm, and these are not compatible with mdrun -rerun");
    }

    if (can_use_allvsall(inputrec, TRUE, cr, fplog) && DOMAINDECOMP(cr))
    {
        gmx_fatal(FARGS, "All-vs-all loops do not work with domain decomposition, use a single MPI rank");
    }

    if (!(EEL_PME(inputrec->coulombtype) || EVDW_PME(inputrec->vdwtype)))
    {
        if (cr->npmenodes > 0)
        {
            gmx_fatal_collective(FARGS, cr, NULL,
                                 "PME-only ranks are requested, but the system does not use PME for electrostatics or LJ");
        }

        cr->npmenodes = 0;
    }

    if (bUseGPU && cr->npmenodes < 0)
    {
        /* With GPUs we don't automatically use PME-only ranks. PME ranks can
         * improve performance with many threads per GPU, since our OpenMP
         * scaling is bad, but it's difficult to automate the setup.
         */
        cr->npmenodes = 0;
    }

#ifdef GMX_FAHCORE
    if (MASTER(cr))
    {
        fcRegisterSteps(inputrec->nsteps, inputrec->init_step);
    }
#endif

    /* NMR restraints must be initialized before load_checkpoint,
     * since with time averaging the history is added to t_state.
     * For proper consistency check we therefore need to extend
     * t_state here.
     * So the PME-only nodes (if present) will also initialize
     * the distance restraints.
     */
    snew(fcd, 1);

    /* This needs to be called before read_checkpoint to extend the state */
    init_disres(fplog, mtop, inputrec, cr, fcd, state, repl_ex_nst > 0);

    init_orires(fplog, mtop, state->x, inputrec, cr, &(fcd->orires),
                state);

    if (DEFORM(*inputrec))
    {
        /* Store the deform reference box before reading the checkpoint */
        if (SIMMASTER(cr))
        {
            copy_mat(state->box, box);
        }
        if (PAR(cr))
        {
            gmx_bcast(sizeof(box), box, cr);
        }
        /* Because we do not have the update struct available yet
         * in which the reference values should be stored,
         * we store them temporarily in static variables.
         * This should be thread safe, since they are only written once
         * and with identical values.
         */
        tMPI_Thread_mutex_lock(&deform_init_box_mutex);
        deform_init_init_step_tpx = inputrec->init_step;
        copy_mat(box, deform_init_box_tpx);
        tMPI_Thread_mutex_unlock(&deform_init_box_mutex);
    }

    if (opt2bSet("-cpi", nfile, fnm))
    {
        /* Check if checkpoint file exists before doing continuation.
         * This way we can use identical input options for the first and subsequent runs...
         */
        if (gmx_fexist_master(opt2fn_master("-cpi", nfile, fnm, cr), cr) )
        {
            load_checkpoint(opt2fn_master("-cpi", nfile, fnm, cr), &fplog,
                            cr, ddxyz,
                            inputrec, state, &bReadEkin,
                            (Flags & MD_APPENDFILES),
                            (Flags & MD_APPENDFILESSET));

            if (bReadEkin)
            {
                Flags |= MD_READ_EKIN;
            }
        }
    }

    if (MASTER(cr) && (Flags & MD_APPENDFILES))
    {
        gmx_log_open(ftp2fn(efLOG, nfile, fnm), cr,
                     Flags, &fplog);
    }

    /* override nsteps with value from cmdline */
    override_nsteps_cmdline(fplog, nsteps_cmdline, inputrec, cr);

    if (SIMMASTER(cr))
    {
        copy_mat(state->box, box);
    }

    if (PAR(cr))
    {
        gmx_bcast(sizeof(box), box, cr);
    }

    /* Essential dynamics */
    if (opt2bSet("-ei", nfile, fnm))
    {
        /* Open input and output files, allocate space for ED data structure */
        ed = ed_open(mtop->natoms, &state->edsamstate, nfile, fnm, Flags, oenv, cr);
    }

    if (PAR(cr) && !(EI_TPI(inputrec->eI) ||
                     inputrec->eI == eiNM))
    {
        cr->dd = init_domain_decomposition(fplog, cr, Flags, ddxyz, rdd, rconstr,
                                           dddlb_opt, dlb_scale,
                                           ddcsx, ddcsy, ddcsz,
                                           mtop, inputrec,
                                           box, state->x,
                                           &ddbox, &npme_major, &npme_minor);

        make_dd_communicators(fplog, cr, dd_node_order);

        /* Set overallocation to avoid frequent reallocation of arrays */
        set_over_alloc_dd(TRUE);
    }
    else
    {
        /* PME, if used, is done on all nodes with 1D decomposition */
        cr->npmenodes = 0;
        cr->duty      = (DUTY_PP | DUTY_PME);
        npme_major    = 1;
        npme_minor    = 1;

        if (inputrec->ePBC == epbcSCREW)
        {
            gmx_fatal(FARGS,
                      "pbc=%s is only implemented with domain decomposition",
                      epbc_names[inputrec->ePBC]);
        }
    }

    if (PAR(cr))
    {
        /* After possible communicator splitting in make_dd_communicators.
         * we can set up the intra/inter node communication.
         */
        gmx_setup_nodecomm(fplog, cr);
    }

    /* Initialize per-physical-node MPI process/thread ID and counters. */
    gmx_init_intranode_counters(cr);
#ifdef GMX_MPI
    if (MULTISIM(cr))
    {
        md_print_info(cr, fplog,
                      "This is simulation %d out of %d running as a composite GROMACS\n"
                      "multi-simulation job. Setup for this simulation:\n\n",
                      cr->ms->sim, cr->ms->nsim);
    }
    md_print_info(cr, fplog, "Using %d MPI %s\n",
                  cr->nnodes,
#ifdef GMX_THREAD_MPI
                  cr->nnodes == 1 ? "thread" : "threads"
#else
                  cr->nnodes == 1 ? "process" : "processes"
#endif
                  );
    fflush(stderr);
#endif

    /* Check and update hw_opt for the cut-off scheme */
    check_and_update_hw_opt_2(hw_opt, inputrec->cutoff_scheme);

    /* Check and update hw_opt for the number of MPI ranks */
    check_and_update_hw_opt_3(hw_opt);

    gmx_omp_nthreads_init(fplog, cr,
                          hwinfo->nthreads_hw_avail,
                          hw_opt->nthreads_omp,
                          hw_opt->nthreads_omp_pme,
                          (cr->duty & DUTY_PP) == 0,
                          inputrec->cutoff_scheme == ecutsVERLET);

#ifndef NDEBUG
    if (integrator[inputrec->eI].func != do_tpi &&
        inputrec->cutoff_scheme == ecutsVERLET)
    {
        gmx_feenableexcept();
    }
#endif

    if (bUseGPU)
    {
        /* Select GPU id's to use */
        gmx_select_gpu_ids(fplog, cr, &hwinfo->gpu_info, bForceUseGPU,
                           &hw_opt->gpu_opt);
    }
    else
    {
        /* Ignore (potentially) manually selected GPUs */
        hw_opt->gpu_opt.n_dev_use = 0;
    }

    /* check consistency across ranks of things like SIMD
     * support and number of GPUs selected */
    gmx_check_hw_runconf_consistency(fplog, hwinfo, cr, hw_opt, bUseGPU);

    /* Now that we know the setup is consistent, check for efficiency */
    check_resource_division_efficiency(hwinfo, hw_opt, Flags & MD_NTOMPSET,
                                       cr, fplog);

    if (DOMAINDECOMP(cr))
    {
        /* When we share GPUs over ranks, we need to know this for the DLB */
        dd_setup_dlb_resource_sharing(cr, hwinfo, hw_opt);
    }

    /* getting number of PP/PME threads
       PME: env variable should be read only on one node to make sure it is
       identical everywhere;
     */
    /* TODO nthreads_pp is only used for pinning threads.
     * This is a temporary solution until we have a hw topology library.
     */
    nthreads_pp  = gmx_omp_nthreads_get(emntNonbonded);
    nthreads_pme = gmx_omp_nthreads_get(emntPME);

    wcycle = wallcycle_init(fplog, resetstep, cr, nthreads_pp, nthreads_pme);

    if (PAR(cr))
    {
        /* Master synchronizes its value of reset_counters with all nodes
         * including PME only nodes */
        reset_counters = wcycle_get_reset_counters(wcycle);
        gmx_bcast_sim(sizeof(reset_counters), &reset_counters, cr);
        wcycle_set_reset_counters(wcycle, reset_counters);
    }

    snew(nrnb, 1);
    if (cr->duty & DUTY_PP)
    {
        bcast_state(cr, state);

        /* Initiate forcerecord */
        fr          = mk_forcerec();
        fr->hwinfo  = hwinfo;
        fr->gpu_opt = &hw_opt->gpu_opt;
        init_forcerec(fplog, oenv, fr, fcd, inputrec, mtop, cr, box,
                      opt2fn("-table", nfile, fnm),
                      opt2fn("-tabletf", nfile, fnm),
                      opt2fn("-tablep", nfile, fnm),
                      opt2fn("-tableb", nfile, fnm),
                      nbpu_opt,
                      FALSE,
                      pforce);

        /* version for PCA_NOT_READ_NODE (see md.c) */
        /*init_forcerec(fplog,fr,fcd,inputrec,mtop,cr,box,FALSE,
           "nofile","nofile","nofile","nofile",FALSE,pforce);
         */

        /* Initialize QM-MM */
        if (fr->bQMMM)
        {
            init_QMMMrec(cr, mtop, inputrec, fr);
        }

        /* Initialize the mdatoms structure.
         * mdatoms is not filled with atom data,
         * as this can not be done now with domain decomposition.
         */
        mdatoms = init_mdatoms(fplog, mtop, inputrec->efep != efepNO);

        /* Initialize the virtual site communication */
        vsite = init_vsite(mtop, cr, FALSE);

        calc_shifts(box, fr->shift_vec);

        /* With periodic molecules the charge groups should be whole at start up
         * and the virtual sites should not be far from their proper positions.
         */
        if (!inputrec->bContinuation && MASTER(cr) &&
            !(inputrec->ePBC != epbcNONE && inputrec->bPeriodicMols))
        {
            /* Make molecules whole at start of run */
            if (fr->ePBC != epbcNONE)
            {
                do_pbc_first_mtop(fplog, inputrec->ePBC, box, mtop, state->x);
            }
            if (vsite)
            {
                /* Correct initial vsite positions are required
                 * for the initial distribution in the domain decomposition
                 * and for the initial shell prediction.
                 */
                construct_vsites_mtop(vsite, mtop, state->x);
            }
        }

        if (EEL_PME(fr->eeltype) || EVDW_PME(fr->vdwtype))
        {
            ewaldcoeff_q  = fr->ewaldcoeff_q;
            ewaldcoeff_lj = fr->ewaldcoeff_lj;
            pmedata       = &fr->pmedata;
        }
        else
        {
            pmedata = NULL;
        }
    }
    else
    {
        /* This is a PME only node */

        /* We don't need the state */
        done_state(state);

        ewaldcoeff_q  = calc_ewaldcoeff_q(inputrec->rcoulomb, inputrec->ewald_rtol);
        ewaldcoeff_lj = calc_ewaldcoeff_lj(inputrec->rvdw, inputrec->ewald_rtol_lj);
        snew(pmedata, 1);
    }

    if (hw_opt->thread_affinity != threadaffOFF)
    {
        /* Before setting affinity, check whether the affinity has changed
         * - which indicates that probably the OpenMP library has changed it
         * since we first checked).
         */
        gmx_check_thread_affinity_set(fplog, cr,
                                      hw_opt, hwinfo->nthreads_hw_avail, TRUE);

        /* Set the CPU affinity */
        gmx_set_thread_affinity(fplog, cr, hw_opt, hwinfo);
    }

    /* Initiate PME if necessary,
     * either on all nodes or on dedicated PME nodes only. */
    if (EEL_PME(inputrec->coulombtype) || EVDW_PME(inputrec->vdwtype))
    {
        if (mdatoms)
        {
            nChargePerturbed = mdatoms->nChargePerturbed;
            if (EVDW_PME(inputrec->vdwtype))
            {
                nTypePerturbed   = mdatoms->nTypePerturbed;
            }
        }
        if (cr->npmenodes > 0)
        {
            /* The PME only nodes need to know nChargePerturbed(FEP on Q) and nTypePerturbed(FEP on LJ)*/
            gmx_bcast_sim(sizeof(nChargePerturbed), &nChargePerturbed, cr);
            gmx_bcast_sim(sizeof(nTypePerturbed), &nTypePerturbed, cr);
        }

        if (cr->duty & DUTY_PME)
        {
            status = gmx_pme_init(pmedata, cr, npme_major, npme_minor, inputrec,
                                  mtop ? mtop->natoms : 0, nChargePerturbed, nTypePerturbed,
                                  (Flags & MD_REPRODUCIBLE), nthreads_pme);
            if (status != 0)
            {
                gmx_fatal(FARGS, "Error %d initializing PME", status);
            }
        }
    }


    if (integrator[inputrec->eI].func == do_md)
    {
        /* Turn on signal handling on all nodes */
        /*
         * (A user signal from the PME nodes (if any)
         * is communicated to the PP nodes.
         */
        signal_handler_install();
    }

    if (cr->duty & DUTY_PP)
    {
        /* Assumes uniform use of the number of OpenMP threads */
        walltime_accounting = walltime_accounting_init(gmx_omp_nthreads_get(emntDefault));

        if (inputrec->bPull)
        {
            /* Initialize pull code */
            inputrec->pull_work =
                init_pull(fplog, inputrec->pull, inputrec, nfile, fnm,
                          mtop, cr, oenv, inputrec->fepvals->init_lambda,
                          EI_DYNAMICS(inputrec->eI) && MASTER(cr), Flags);
        }

        if (inputrec->bRot)
        {
            /* Initialize enforced rotation code */
            init_rot(fplog, inputrec, nfile, fnm, cr, state->x, box, mtop, oenv,
                     bVerbose, Flags);
        }

        if (inputrec->eSwapCoords != eswapNO)
        {
            /* Initialize ion swapping code */
            init_swapcoords(fplog, bVerbose, inputrec, opt2fn_master("-swap", nfile, fnm, cr),
                            mtop, state->x, state->box, &state->swapstate, cr, oenv, Flags);
        }

        constr = init_constraints(fplog, mtop, inputrec, ed, state, cr);

        if (DOMAINDECOMP(cr))
        {
            GMX_RELEASE_ASSERT(fr, "fr was NULL while cr->duty was DUTY_PP");
            dd_init_bondeds(fplog, cr->dd, mtop, vsite, inputrec,
                            Flags & MD_DDBONDCHECK, fr->cginfo_mb);

            set_dd_parameters(fplog, cr->dd, dlb_scale, inputrec, &ddbox);

            setup_dd_grid(fplog, cr->dd);
        }

        /* Now do whatever the user wants us to do (how flexible...) */
        integrator[inputrec->eI].func(fplog, cr, nfile, fnm,
                                      oenv, bVerbose, bCompact,
                                      nstglobalcomm,
                                      vsite, constr,
                                      nstepout, inputrec, mtop,
                                      fcd, state,
                                      mdatoms, nrnb, wcycle, ed, fr,
                                      repl_ex_nst, repl_ex_nex, repl_ex_seed,
                                      membed,
                                      cpt_period, max_hours,
                                      imdport,
                                      Flags,
                                      walltime_accounting);

        if (inputrec->bPull)
        {
            finish_pull(inputrec->pull_work);
        }

        if (inputrec->bRot)
        {
            finish_rot(inputrec->rot);
        }

    }
    else
    {
        GMX_RELEASE_ASSERT(pmedata, "pmedata was NULL while cr->duty was not DUTY_PP");
        /* do PME only */
        walltime_accounting = walltime_accounting_init(gmx_omp_nthreads_get(emntPME));
        gmx_pmeonly(*pmedata, cr, nrnb, wcycle, walltime_accounting, ewaldcoeff_q, ewaldcoeff_lj, inputrec);
    }

    wallcycle_stop(wcycle, ewcRUN);

    /* Finish up, write some stuff
     * if rerunMD, don't write last frame again
     */
    finish_run(fplog, cr,
               inputrec, nrnb, wcycle, walltime_accounting,
               fr ? fr->nbv : NULL,
               EI_DYNAMICS(inputrec->eI) && !MULTISIM(cr));


    /* Free GPU memory and context */
    free_gpu_resources(fr, cr, &hwinfo->gpu_info, fr ? fr->gpu_opt : NULL);

    if (opt2bSet("-membed", nfile, fnm))
    {
        sfree(membed);
    }

    gmx_hardware_info_free(hwinfo);

    /* Does what it says */
    print_date_and_time(fplog, cr->nodeid, "Finished mdrun", gmx_gettime());
    walltime_accounting_destroy(walltime_accounting);

    /* PLUMED */
    if(plumedswitch){
      plumed_finalize(plumedmain);
    }
    /* END PLUMED */

    /* Close logfile already here if we were appending to it */
    if (MASTER(cr) && (Flags & MD_APPENDFILES))
    {
        gmx_log_close(fplog);
    }

    rc = (int)gmx_get_stop_condition();

    done_ed(&ed);

#ifdef GMX_THREAD_MPI
    /* we need to join all threads. The sub-threads join when they
       exit this function, but the master thread needs to be told to
       wait for that. */
    if (PAR(cr) && MASTER(cr))
    {
        tMPI_Finalize();
    }
#endif

    return rc;
}
/*! \brief Returns the kinds of electrostatics and Vdw OpenCL
 *  kernels that will be used.
 *
 * Respectively, these values are from enum eelOcl and enum
 * evdwOcl. */
static void
map_interaction_types_to_gpu_kernel_flavors(const interaction_const_t *ic,
                                            int                        combRule,
                                            int                       *gpu_eeltype,
                                            int                       *gpu_vdwtype)
{
    if (ic->vdwtype == evdwCUT)
    {
        switch (ic->vdw_modifier)
        {
            case eintmodNONE:
            case eintmodPOTSHIFT:
                switch (combRule)
                {
                    case ljcrNONE:
                        *gpu_vdwtype = evdwOclCUT;
                        break;
                    case ljcrGEOM:
                        *gpu_vdwtype = evdwOclCUTCOMBGEOM;
                        break;
                    case ljcrLB:
                        *gpu_vdwtype = evdwOclCUTCOMBLB;
                        break;
                    default:
                        gmx_incons("The requested LJ combination rule is not implemented in the OpenCL GPU accelerated kernels!");
                }
                break;
            case eintmodFORCESWITCH:
                *gpu_vdwtype = evdwOclFSWITCH;
                break;
            case eintmodPOTSWITCH:
                *gpu_vdwtype = evdwOclPSWITCH;
                break;
            default:
                gmx_incons("The requested VdW interaction modifier is not implemented in the GPU accelerated kernels!");
        }
    }
    else if (ic->vdwtype == evdwPME)
    {
        if (ic->ljpme_comb_rule == ljcrGEOM)
        {
            *gpu_vdwtype = evdwOclEWALDGEOM;
        }
        else
        {
            *gpu_vdwtype = evdwOclEWALDLB;
        }
    }
    else
    {
        gmx_incons("The requested VdW type is not implemented in the GPU accelerated kernels!");
    }

    if (ic->eeltype == eelCUT)
    {
        *gpu_eeltype = eelOclCUT;
    }
    else if (EEL_RF(ic->eeltype))
    {
        *gpu_eeltype = eelOclRF;
    }
    else if ((EEL_PME(ic->eeltype) || ic->eeltype == eelEWALD))
    {
        /* Initially rcoulomb == rvdw, so it's surely not twin cut-off. */
        *gpu_eeltype = nbnxn_gpu_pick_ewald_kernel_type(false);
    }
    else
    {
        /* Shouldn't happen, as this is checked when choosing Verlet-scheme */
        gmx_incons("The requested electrostatics type is not implemented in the GPU accelerated kernels!");
    }
}