float pme_load_estimate(gmx_mtop_t *mtop, t_inputrec *ir, matrix box) { t_atom *atom; int mb, nmol, atnr, cg, a, a0, nq_tot, nlj_tot, f; gmx_bool bBHAM, bLJcut, bChargePerturbed, bTypePerturbed; gmx_bool bWater, bQ, bLJ; double ndistance_c, ndistance_simd; double cost_bond, cost_pp, cost_redist, cost_spread, cost_fft, cost_solve, cost_pme; float ratio; t_iparams *iparams; gmx_moltype_t *molt; /* Computational cost of bonded, non-bonded and PME calculations. * This will be machine dependent. * The numbers here are accurate for Intel Core2 and AMD Athlon 64 * in single precision. In double precision PME mesh is slightly cheaper, * although not so much that the numbers need to be adjusted. */ iparams = mtop->ffparams.iparams; atnr = mtop->ffparams.atnr; count_bonded_distances(mtop, ir, &ndistance_c, &ndistance_simd); /* C_BOND is the cost for bonded interactions with SIMD implementations, * so we need to scale the number of bonded interactions for which there * are only C implementations to the number of SIMD equivalents. */ cost_bond = c_bond*(ndistance_c *simd_cycle_factor(FALSE) + ndistance_simd*simd_cycle_factor(bHaveSIMD)); if (ir->cutoff_scheme == ecutsGROUP) { pp_group_load(mtop, ir, box, &nq_tot, &nlj_tot, &cost_pp, &bChargePerturbed, &bTypePerturbed); } else { pp_verlet_load(mtop, ir, box, &nq_tot, &nlj_tot, &cost_pp, &bChargePerturbed, &bTypePerturbed); } cost_redist = 0; cost_spread = 0; cost_fft = 0; cost_solve = 0; if (EEL_PME(ir->coulombtype)) { double grid = ir->nkx*ir->nky*((ir->nkz + 1)/2); f = ((ir->efep != efepNO && bChargePerturbed) ? 2 : 1); cost_redist += c_pme_redist*nq_tot; cost_spread += f*c_pme_spread*nq_tot*pow(ir->pme_order, 3); cost_fft += f*c_pme_fft*grid*log(grid)/log(2); cost_solve += f*c_pme_solve*grid*simd_cycle_factor(bHaveSIMD); } if (EVDW_PME(ir->vdwtype)) { double grid = ir->nkx*ir->nky*((ir->nkz + 1)/2); f = ((ir->efep != efepNO && bTypePerturbed) ? 2 : 1); if (ir->ljpme_combination_rule == eljpmeLB) { /* LB combination rule: we have 7 mesh terms */ f *= 7; } cost_redist += c_pme_redist*nlj_tot; cost_spread += f*c_pme_spread*nlj_tot*pow(ir->pme_order, 3); cost_fft += f*c_pme_fft*2*grid*log(grid)/log(2); cost_solve += f*c_pme_solve*grid*simd_cycle_factor(bHaveSIMD); } cost_pme = cost_redist + cost_spread + cost_fft + cost_solve; ratio = cost_pme/(cost_bond + cost_pp + cost_pme); if (debug) { fprintf(debug, "cost_bond %f\n" "cost_pp %f\n" "cost_redist %f\n" "cost_spread %f\n" "cost_fft %f\n" "cost_solve %f\n", cost_bond, cost_pp, cost_redist, cost_spread, cost_fft, cost_solve); fprintf(debug, "Estimate for relative PME load: %.3f\n", ratio); } return ratio; }
/*! \brief Determine the optimal distribution of DD cells for the simulation system and number of MPI ranks */ static real optimize_ncells(FILE *fplog, int nnodes_tot, int npme_only, gmx_bool bDynLoadBal, real dlb_scale, gmx_mtop_t *mtop, matrix box, gmx_ddbox_t *ddbox, t_inputrec *ir, gmx_domdec_t *dd, real cellsize_limit, real cutoff, gmx_bool bInterCGBondeds, ivec nc) { int npp, npme, ndiv, *div, *mdiv, d, nmax; double pbcdxr; real limit; ivec itry; limit = cellsize_limit; dd->nc[XX] = 1; dd->nc[YY] = 1; dd->nc[ZZ] = 1; npp = nnodes_tot - npme_only; if (EEL_PME(ir->coulombtype)) { npme = (npme_only > 0 ? npme_only : npp); } else { npme = 0; } if (bInterCGBondeds) { /* If we can skip PBC for distance calculations in plain-C bondeds, * we can save some time (e.g. 3D DD with pbc=xyz). * Here we ignore SIMD bondeds as they always do (fast) PBC. */ count_bonded_distances(mtop, ir, &pbcdxr, NULL); pbcdxr /= (double)mtop->natoms; } else { /* Every molecule is a single charge group: no pbc required */ pbcdxr = 0; } /* Add a margin for DLB and/or pressure scaling */ if (bDynLoadBal) { if (dlb_scale >= 1.0) { gmx_fatal(FARGS, "The value for option -dds should be smaller than 1"); } if (fplog) { fprintf(fplog, "Scaling the initial minimum size with 1/%g (option -dds) = %g\n", dlb_scale, 1/dlb_scale); } limit /= dlb_scale; } else if (ir->epc != epcNO) { if (fplog) { fprintf(fplog, "To account for pressure scaling, scaling the initial minimum size with %g\n", DD_GRID_MARGIN_PRES_SCALE); limit *= DD_GRID_MARGIN_PRES_SCALE; } } if (fplog) { fprintf(fplog, "Optimizing the DD grid for %d cells with a minimum initial size of %.3f nm\n", npp, limit); if (inhomogeneous_z(ir)) { fprintf(fplog, "Ewald_geometry=%s: assuming inhomogeneous particle distribution in z, will not decompose in z.\n", eewg_names[ir->ewald_geometry]); } if (limit > 0) { fprintf(fplog, "The maximum allowed number of cells is:"); for (d = 0; d < DIM; d++) { nmax = (int)(ddbox->box_size[d]*ddbox->skew_fac[d]/limit); if (d >= ddbox->npbcdim && nmax < 2) { nmax = 2; } if (d == ZZ && inhomogeneous_z(ir)) { nmax = 1; } fprintf(fplog, " %c %d", 'X' + d, nmax); } fprintf(fplog, "\n"); } } if (debug) { fprintf(debug, "Average nr of pbc_dx calls per atom %.2f\n", pbcdxr); } /* Decompose npp in factors */ ndiv = factorize(npp, &div, &mdiv); itry[XX] = 1; itry[YY] = 1; itry[ZZ] = 1; clear_ivec(nc); assign_factors(dd, limit, cutoff, box, ddbox, mtop->natoms, ir, pbcdxr, npme, ndiv, div, mdiv, itry, nc); sfree(div); sfree(mdiv); return limit; }