Example #1
0
float pme_load_estimate(gmx_mtop_t *mtop, t_inputrec *ir, matrix box)
{
    t_atom        *atom;
    int            mb, nmol, atnr, cg, a, a0, nq_tot, nlj_tot, f;
    gmx_bool       bBHAM, bLJcut, bChargePerturbed, bTypePerturbed;
    gmx_bool       bWater, bQ, bLJ;
    double         ndistance_c, ndistance_simd;
    double         cost_bond, cost_pp, cost_redist, cost_spread, cost_fft, cost_solve, cost_pme;
    float          ratio;
    t_iparams     *iparams;
    gmx_moltype_t *molt;

    /* Computational cost of bonded, non-bonded and PME calculations.
     * This will be machine dependent.
     * The numbers here are accurate for Intel Core2 and AMD Athlon 64
     * in single precision. In double precision PME mesh is slightly cheaper,
     * although not so much that the numbers need to be adjusted.
     */

    iparams = mtop->ffparams.iparams;
    atnr    = mtop->ffparams.atnr;

    count_bonded_distances(mtop, ir, &ndistance_c, &ndistance_simd);
    /* C_BOND is the cost for bonded interactions with SIMD implementations,
     * so we need to scale the number of bonded interactions for which there
     * are only C implementations to the number of SIMD equivalents.
     */
    cost_bond = c_bond*(ndistance_c   *simd_cycle_factor(FALSE) +
                        ndistance_simd*simd_cycle_factor(bHaveSIMD));

    if (ir->cutoff_scheme == ecutsGROUP)
    {
        pp_group_load(mtop, ir, box,
                      &nq_tot, &nlj_tot, &cost_pp,
                      &bChargePerturbed, &bTypePerturbed);
    }
    else
    {
        pp_verlet_load(mtop, ir, box,
                       &nq_tot, &nlj_tot, &cost_pp,
                       &bChargePerturbed, &bTypePerturbed);
    }

    cost_redist = 0;
    cost_spread = 0;
    cost_fft    = 0;
    cost_solve  = 0;

    if (EEL_PME(ir->coulombtype))
    {
        double grid = ir->nkx*ir->nky*((ir->nkz + 1)/2);

        f            = ((ir->efep != efepNO && bChargePerturbed) ? 2 : 1);
        cost_redist +=   c_pme_redist*nq_tot;
        cost_spread += f*c_pme_spread*nq_tot*pow(ir->pme_order, 3);
        cost_fft    += f*c_pme_fft*grid*log(grid)/log(2);
        cost_solve  += f*c_pme_solve*grid*simd_cycle_factor(bHaveSIMD);
    }

    if (EVDW_PME(ir->vdwtype))
    {
        double grid = ir->nkx*ir->nky*((ir->nkz + 1)/2);

        f            = ((ir->efep != efepNO && bTypePerturbed) ? 2 : 1);
        if (ir->ljpme_combination_rule == eljpmeLB)
        {
            /* LB combination rule: we have 7 mesh terms */
            f       *= 7;
        }
        cost_redist +=   c_pme_redist*nlj_tot;
        cost_spread += f*c_pme_spread*nlj_tot*pow(ir->pme_order, 3);
        cost_fft    += f*c_pme_fft*2*grid*log(grid)/log(2);
        cost_solve  += f*c_pme_solve*grid*simd_cycle_factor(bHaveSIMD);
    }

    cost_pme = cost_redist + cost_spread + cost_fft + cost_solve;

    ratio = cost_pme/(cost_bond + cost_pp + cost_pme);

    if (debug)
    {
        fprintf(debug,
                "cost_bond   %f\n"
                "cost_pp     %f\n"
                "cost_redist %f\n"
                "cost_spread %f\n"
                "cost_fft    %f\n"
                "cost_solve  %f\n",
                cost_bond, cost_pp, cost_redist, cost_spread, cost_fft, cost_solve);

        fprintf(debug, "Estimate for relative PME load: %.3f\n", ratio);
    }

    return ratio;
}
Example #2
0
/*! \brief Determine the optimal distribution of DD cells for the simulation system and number of MPI ranks */
static real optimize_ncells(FILE *fplog,
                            int nnodes_tot, int npme_only,
                            gmx_bool bDynLoadBal, real dlb_scale,
                            gmx_mtop_t *mtop, matrix box, gmx_ddbox_t *ddbox,
                            t_inputrec *ir,
                            gmx_domdec_t *dd,
                            real cellsize_limit, real cutoff,
                            gmx_bool bInterCGBondeds,
                            ivec nc)
{
    int      npp, npme, ndiv, *div, *mdiv, d, nmax;
    double   pbcdxr;
    real     limit;
    ivec     itry;

    limit  = cellsize_limit;

    dd->nc[XX] = 1;
    dd->nc[YY] = 1;
    dd->nc[ZZ] = 1;

    npp = nnodes_tot - npme_only;
    if (EEL_PME(ir->coulombtype))
    {
        npme = (npme_only > 0 ? npme_only : npp);
    }
    else
    {
        npme = 0;
    }

    if (bInterCGBondeds)
    {
        /* If we can skip PBC for distance calculations in plain-C bondeds,
         * we can save some time (e.g. 3D DD with pbc=xyz).
         * Here we ignore SIMD bondeds as they always do (fast) PBC.
         */
        count_bonded_distances(mtop, ir, &pbcdxr, NULL);
        pbcdxr /= (double)mtop->natoms;
    }
    else
    {
        /* Every molecule is a single charge group: no pbc required */
        pbcdxr = 0;
    }
    /* Add a margin for DLB and/or pressure scaling */
    if (bDynLoadBal)
    {
        if (dlb_scale >= 1.0)
        {
            gmx_fatal(FARGS, "The value for option -dds should be smaller than 1");
        }
        if (fplog)
        {
            fprintf(fplog, "Scaling the initial minimum size with 1/%g (option -dds) = %g\n", dlb_scale, 1/dlb_scale);
        }
        limit /= dlb_scale;
    }
    else if (ir->epc != epcNO)
    {
        if (fplog)
        {
            fprintf(fplog, "To account for pressure scaling, scaling the initial minimum size with %g\n", DD_GRID_MARGIN_PRES_SCALE);
            limit *= DD_GRID_MARGIN_PRES_SCALE;
        }
    }

    if (fplog)
    {
        fprintf(fplog, "Optimizing the DD grid for %d cells with a minimum initial size of %.3f nm\n", npp, limit);

        if (inhomogeneous_z(ir))
        {
            fprintf(fplog, "Ewald_geometry=%s: assuming inhomogeneous particle distribution in z, will not decompose in z.\n", eewg_names[ir->ewald_geometry]);
        }

        if (limit > 0)
        {
            fprintf(fplog, "The maximum allowed number of cells is:");
            for (d = 0; d < DIM; d++)
            {
                nmax = (int)(ddbox->box_size[d]*ddbox->skew_fac[d]/limit);
                if (d >= ddbox->npbcdim && nmax < 2)
                {
                    nmax = 2;
                }
                if (d == ZZ && inhomogeneous_z(ir))
                {
                    nmax = 1;
                }
                fprintf(fplog, " %c %d", 'X' + d, nmax);
            }
            fprintf(fplog, "\n");
        }
    }

    if (debug)
    {
        fprintf(debug, "Average nr of pbc_dx calls per atom %.2f\n", pbcdxr);
    }

    /* Decompose npp in factors */
    ndiv = factorize(npp, &div, &mdiv);

    itry[XX] = 1;
    itry[YY] = 1;
    itry[ZZ] = 1;
    clear_ivec(nc);
    assign_factors(dd, limit, cutoff, box, ddbox, mtop->natoms, ir, pbcdxr,
                   npme, ndiv, div, mdiv, itry, nc);

    sfree(div);
    sfree(mdiv);

    return limit;
}