float pme_load_estimate(gmx_mtop_t *mtop,t_inputrec *ir,matrix box) { t_atom *atom; int mb,nmol,atnr,cg,a,a0,nq_tot; gmx_bool bBHAM,bLJcut,bChargePerturbed,bWater,bQ,bLJ; double cost_bond,cost_pp,cost_redist,cost_spread,cost_fft,cost_solve,cost_pme; float ratio; t_iparams *iparams; gmx_moltype_t *molt; /* Computational cost of bonded, non-bonded and PME calculations. * This will be machine dependent. * The numbers here are accurate for Intel Core2 and AMD Athlon 64 * in single precision. In double precision PME mesh is slightly cheaper, * although not so much that the numbers need to be adjusted. */ iparams = mtop->ffparams.iparams; atnr = mtop->ffparams.atnr; cost_bond = C_BOND*n_bonded_dx(mtop,TRUE); if (ir->cutoff_scheme == ecutsGROUP) { pp_group_load(mtop,ir,box,&nq_tot,&cost_pp,&bChargePerturbed); } else { pp_verlet_load(mtop,ir,box,&nq_tot,&cost_pp,&bChargePerturbed); } cost_redist = C_PME_REDIST*nq_tot; cost_spread = C_PME_SPREAD*nq_tot*pow(ir->pme_order,3); cost_fft = C_PME_FFT*ir->nkx*ir->nky*ir->nkz*log(ir->nkx*ir->nky*ir->nkz); cost_solve = C_PME_SOLVE*ir->nkx*ir->nky*ir->nkz; if (ir->efep != efepNO && bChargePerturbed) { /* All PME work, except redist & spline coefficient calculation, doubles */ cost_spread *= 2; cost_fft *= 2; cost_solve *= 2; } cost_pme = cost_redist + cost_spread + cost_fft + cost_solve; ratio = cost_pme/(cost_bond + cost_pp + cost_pme); if (debug) { fprintf(debug, "cost_bond %f\n" "cost_pp %f\n" "cost_redist %f\n" "cost_spread %f\n" "cost_fft %f\n" "cost_solve %f\n", cost_bond,cost_pp,cost_redist,cost_spread,cost_fft,cost_solve); fprintf(debug,"Estimate for relative PME load: %.3f\n",ratio); } return ratio; }
float pme_load_estimate(gmx_mtop_t *mtop,t_inputrec *ir,matrix box) { t_atom *atom; int mb,nmol,atnr,cg,a,a0,ncqlj,ncq,nclj; bool bBHAM,bLJcut,bWater,bQ,bLJ; double nw,nqlj,nq,nlj,cost_bond,cost_pp,cost_spread,cost_fft; float fq,fqlj,flj,fljtab,fqljw,fqw,fqspread,ffft,fbond; float ratio; t_iparams *iparams; gmx_moltype_t *molt; bBHAM = (mtop->ffparams.functype[0] == F_BHAM); bLJcut = ((ir->vdwtype == evdwCUT) && !bBHAM); /* Computational cost relative to a tabulated q-q interaction. * This will be machine dependent. * The numbers here are accurate for Intel Core2 and AMD Athlon 64 * in single precision. In double precision PME mesh is slightly cheaper, * although not so much that the numbers need to be adjusted. */ fq = 1.0; fqlj = (bLJcut ? 1.5 : 2.0 ); flj = (bLJcut ? 0.5 : 1.5 ); /* Cost of 1 water with one Q/LJ atom */ fqljw = (bLJcut ? 1.75 : 2.25); /* Cost of 1 water with one Q atom or with 1/3 water (LJ negligible) */ fqw = 1.5; /* Cost of q spreading and force interpolation per charge */ fqspread = 25.0; /* Cost of fft's + pme_solve, will be multiplied with N log(N) */ ffft = 0.4; /* Cost of a bonded interaction divided by the number of (pbc_)dx required */ fbond = 5.0; iparams = mtop->ffparams.iparams; atnr = mtop->ffparams.atnr; nw = 0; nqlj = 0; nq = 0; nlj = 0; for(mb=0; mb<mtop->nmolblock; mb++) { molt = &mtop->moltype[mtop->molblock[mb].type]; atom = molt->atoms.atom; nmol = mtop->molblock[mb].nmol; a = 0; for(cg=0; cg<molt->cgs.nr; cg++) { bWater = !bBHAM; ncqlj = 0; ncq = 0; nclj = 0; a0 = a; while (a < molt->cgs.index[cg+1]) { bQ = (atom[a].q != 0 || atom[a].qB != 0); bLJ = (iparams[(atnr+1)*atom[a].type].lj.c6 != 0 || iparams[(atnr+1)*atom[a].type].lj.c12 != 0); /* This if this atom fits into water optimization */ if (!((a == a0 && bQ && bLJ) || (a == a0+1 && bQ && !bLJ) || (a == a0+2 && bQ && !bLJ && atom[a].q == atom[a-1].q) || (a == a0+3 && !bQ && bLJ))) bWater = FALSE; if (bQ && bLJ) { ncqlj++; } else { if (bQ) ncq++; if (bLJ) nclj++; } a++; } if (bWater) { nw += nmol; } else { nqlj += nmol*ncqlj; nq += nmol*ncq; nlj += nmol*nclj; } } } if (debug) fprintf(debug,"nw %g nqlj %g nq %g nlj %g\n",nw,nqlj,nq,nlj); cost_bond = fbond*n_bonded_dx(mtop,TRUE); /* For the PP non-bonded cost it is (unrealistically) assumed * that all atoms are distributed homogeneously in space. */ cost_pp = 0.5*(fqljw*nw*nqlj + fqw *nw*(3*nw + nq) + fqlj *nqlj*nqlj + fq *nq*(3*nw + nqlj + nq) + flj *nlj*(nw + nqlj + nlj)) *4/3*M_PI*ir->rlist*ir->rlist*ir->rlist/det(box); cost_spread = fqspread*(3*nw + nqlj + nq); cost_fft = ffft*ir->nkx*ir->nky*ir->nkz*log(ir->nkx*ir->nky*ir->nkz); ratio = (cost_spread + cost_fft)/(cost_bond + cost_pp + cost_spread + cost_fft); if (debug) { fprintf(debug, "cost_bond %f\n" "cost_pp %f\n" "cost_spread %f\n" "cost_fft %f\n", cost_bond,cost_pp,cost_spread,cost_fft); fprintf(debug,"Estimate for relative PME load: %.3f\n",ratio); } return ratio; }
static real optimize_ncells(FILE *fplog, int nnodes_tot,int npme_only, bool bDynLoadBal,real dlb_scale, gmx_mtop_t *mtop,matrix box,gmx_ddbox_t *ddbox, t_inputrec *ir, gmx_domdec_t *dd, real cellsize_limit,real cutoff, bool bInterCGBondeds,bool bInterCGMultiBody, ivec nc) { int npp,npme,ndiv,*div,*mdiv,d,nmax; bool bExcl_pbcdx; float pbcdxr; real limit; ivec itry; limit = cellsize_limit; dd->nc[XX] = 1; dd->nc[YY] = 1; dd->nc[ZZ] = 1; npp = nnodes_tot - npme_only; if (EEL_PME(ir->coulombtype)) { npme = (npme_only > 0 ? npme_only : npp); } else { npme = 0; } if (bInterCGBondeds) { /* For Ewald exclusions pbc_dx is not called */ bExcl_pbcdx = (EEL_EXCL_FORCES(ir->coulombtype) && !EEL_FULL(ir->coulombtype)); pbcdxr = (double)n_bonded_dx(mtop,bExcl_pbcdx)/(double)mtop->natoms; } else { /* Every molecule is a single charge group: no pbc required */ pbcdxr = 0; } /* Add a margin for DLB and/or pressure scaling */ if (bDynLoadBal) { if (dlb_scale >= 1.0) { gmx_fatal(FARGS,"The value for option -dds should be smaller than 1"); } if (fplog) { fprintf(fplog,"Scaling the initial minimum size with 1/%g (option -dds) = %g\n",dlb_scale,1/dlb_scale); } limit /= dlb_scale; } else if (ir->epc != epcNO) { if (fplog) { fprintf(fplog,"To account for pressure scaling, scaling the initial minimum size with %g\n",DD_GRID_MARGIN_PRES_SCALE); limit *= DD_GRID_MARGIN_PRES_SCALE; } } if (fplog) { fprintf(fplog,"Optimizing the DD grid for %d cells with a minimum initial size of %.3f nm\n",npp,limit); if (limit > 0) { fprintf(fplog,"The maximum allowed number of cells is:"); for(d=0; d<DIM; d++) { nmax = (int)(ddbox->box_size[d]*ddbox->skew_fac[d]/limit); if (d >= ddbox->npbcdim && nmax < 2) { nmax = 2; } fprintf(fplog," %c %d",'X' + d,nmax); } fprintf(fplog,"\n"); } } if (debug) { fprintf(debug,"Average nr of pbc_dx calls per atom %.2f\n",pbcdxr); } /* Decompose npp in factors */ ndiv = factorize(npp,&div,&mdiv); itry[XX] = 1; itry[YY] = 1; itry[ZZ] = 1; clear_ivec(nc); assign_factors(dd,limit,cutoff,box,ddbox,ir,pbcdxr, npme,ndiv,div,mdiv,itry,nc); sfree(div); sfree(mdiv); return limit; }
float pme_load_estimate(const gmx_mtop_t *mtop, const t_inputrec *ir, matrix box) { int nq_tot, nlj_tot, f; gmx_bool bChargePerturbed, bTypePerturbed; double cost_bond, cost_pp, cost_redist, cost_spread, cost_fft, cost_solve, cost_pme; float ratio; /* Computational cost of bonded, non-bonded and PME calculations. * This will be machine dependent. * The numbers here are accurate for Intel Core2 and AMD Athlon 64 * in single precision. In double precision PME mesh is slightly cheaper, * although not so much that the numbers need to be adjusted. */ cost_bond = C_BOND*n_bonded_dx(mtop, TRUE); if (ir->cutoff_scheme == ecutsGROUP) { pp_group_load(mtop, ir, box, &nq_tot, &nlj_tot, &cost_pp, &bChargePerturbed, &bTypePerturbed); } else { pp_verlet_load(mtop, ir, box, &nq_tot, &nlj_tot, &cost_pp, &bChargePerturbed, &bTypePerturbed); } cost_redist = 0; cost_spread = 0; cost_fft = 0; cost_solve = 0; if (EEL_PME(ir->coulombtype)) { f = ((ir->efep != efepNO && bChargePerturbed) ? 2 : 1); cost_redist += C_PME_REDIST*nq_tot; cost_spread += f*C_PME_SPREAD*nq_tot*std::pow(static_cast<real>(ir->pme_order), static_cast<real>(3.0)); cost_fft += f*C_PME_FFT*ir->nkx*ir->nky*ir->nkz*std::log(static_cast<real>(ir->nkx*ir->nky*ir->nkz)); cost_solve += f*C_PME_SOLVE*ir->nkx*ir->nky*ir->nkz; } if (EVDW_PME(ir->vdwtype)) { f = ((ir->efep != efepNO && bTypePerturbed) ? 2 : 1); if (ir->ljpme_combination_rule == eljpmeLB) { /* LB combination rule: we have 7 mesh terms */ f *= 7; } cost_redist += C_PME_REDIST*nlj_tot; cost_spread += f*C_PME_SPREAD*nlj_tot*std::pow(static_cast<real>(ir->pme_order), static_cast<real>(3.0)); cost_fft += f*C_PME_FFT*ir->nkx*ir->nky*ir->nkz*std::log(static_cast<real>(ir->nkx*ir->nky*ir->nkz)); cost_solve += f*C_PME_SOLVE*ir->nkx*ir->nky*ir->nkz; } cost_pme = cost_redist + cost_spread + cost_fft + cost_solve; ratio = cost_pme/(cost_bond + cost_pp + cost_pme); if (debug) { fprintf(debug, "cost_bond %f\n" "cost_pp %f\n" "cost_redist %f\n" "cost_spread %f\n" "cost_fft %f\n" "cost_solve %f\n", cost_bond, cost_pp, cost_redist, cost_spread, cost_fft, cost_solve); fprintf(debug, "Estimate for relative PME load: %.3f\n", ratio); } return ratio; }