static void assign_factors(gmx_domdec_t *dd, real limit,real cutoff, matrix box,gmx_ddbox_t *ddbox,t_inputrec *ir, float pbcdxr,int npme, int ndiv,int *div,int *mdiv,ivec ir_try,ivec opt) { int x,y,z,i; float ce; if (ndiv == 0) { ce = comm_cost_est(dd,limit,cutoff,box,ddbox,ir,pbcdxr,npme,ir_try); if (ce >= 0 && (opt[XX] == 0 || ce < comm_cost_est(dd,limit,cutoff,box,ddbox,ir,pbcdxr, npme,opt))) { copy_ivec(ir_try,opt); } return; } for(x=mdiv[0]; x>=0; x--) { for(i=0; i<x; i++) { ir_try[XX] *= div[0]; } for(y=mdiv[0]-x; y>=0; y--) { for(i=0; i<y; i++) { ir_try[YY] *= div[0]; } for(i=0; i<mdiv[0]-x-y; i++) { ir_try[ZZ] *= div[0]; } /* recurse */ assign_factors(dd,limit,cutoff,box,ddbox,ir,pbcdxr,npme, ndiv-1,div+1,mdiv+1,ir_try,opt); for(i=0; i<mdiv[0]-x-y; i++) { ir_try[ZZ] /= div[0]; } for(i=0; i<y; i++) { ir_try[YY] /= div[0]; } } for(i=0; i<x; i++) { ir_try[XX] /= div[0]; } } }
/*! \brief Determine the optimal distribution of DD cells for the simulation system and number of MPI ranks */ static real optimize_ncells(FILE *fplog, int nnodes_tot, int npme_only, gmx_bool bDynLoadBal, real dlb_scale, gmx_mtop_t *mtop, matrix box, gmx_ddbox_t *ddbox, t_inputrec *ir, gmx_domdec_t *dd, real cellsize_limit, real cutoff, gmx_bool bInterCGBondeds, ivec nc) { int npp, npme, ndiv, *div, *mdiv, d, nmax; double pbcdxr; real limit; ivec itry; limit = cellsize_limit; dd->nc[XX] = 1; dd->nc[YY] = 1; dd->nc[ZZ] = 1; npp = nnodes_tot - npme_only; if (EEL_PME(ir->coulombtype)) { npme = (npme_only > 0 ? npme_only : npp); } else { npme = 0; } if (bInterCGBondeds) { /* If we can skip PBC for distance calculations in plain-C bondeds, * we can save some time (e.g. 3D DD with pbc=xyz). * Here we ignore SIMD bondeds as they always do (fast) PBC. */ count_bonded_distances(mtop, ir, &pbcdxr, NULL); pbcdxr /= (double)mtop->natoms; } else { /* Every molecule is a single charge group: no pbc required */ pbcdxr = 0; } /* Add a margin for DLB and/or pressure scaling */ if (bDynLoadBal) { if (dlb_scale >= 1.0) { gmx_fatal(FARGS, "The value for option -dds should be smaller than 1"); } if (fplog) { fprintf(fplog, "Scaling the initial minimum size with 1/%g (option -dds) = %g\n", dlb_scale, 1/dlb_scale); } limit /= dlb_scale; } else if (ir->epc != epcNO) { if (fplog) { fprintf(fplog, "To account for pressure scaling, scaling the initial minimum size with %g\n", DD_GRID_MARGIN_PRES_SCALE); limit *= DD_GRID_MARGIN_PRES_SCALE; } } if (fplog) { fprintf(fplog, "Optimizing the DD grid for %d cells with a minimum initial size of %.3f nm\n", npp, limit); if (inhomogeneous_z(ir)) { fprintf(fplog, "Ewald_geometry=%s: assuming inhomogeneous particle distribution in z, will not decompose in z.\n", eewg_names[ir->ewald_geometry]); } if (limit > 0) { fprintf(fplog, "The maximum allowed number of cells is:"); for (d = 0; d < DIM; d++) { nmax = (int)(ddbox->box_size[d]*ddbox->skew_fac[d]/limit); if (d >= ddbox->npbcdim && nmax < 2) { nmax = 2; } if (d == ZZ && inhomogeneous_z(ir)) { nmax = 1; } fprintf(fplog, " %c %d", 'X' + d, nmax); } fprintf(fplog, "\n"); } } if (debug) { fprintf(debug, "Average nr of pbc_dx calls per atom %.2f\n", pbcdxr); } /* Decompose npp in factors */ ndiv = factorize(npp, &div, &mdiv); itry[XX] = 1; itry[YY] = 1; itry[ZZ] = 1; clear_ivec(nc); assign_factors(dd, limit, cutoff, box, ddbox, mtop->natoms, ir, pbcdxr, npme, ndiv, div, mdiv, itry, nc); sfree(div); sfree(mdiv); return limit; }
static real optimize_ncells(FILE *fplog, int nnodes_tot,int npme_only, bool bDynLoadBal,real dlb_scale, gmx_mtop_t *mtop,matrix box,gmx_ddbox_t *ddbox, t_inputrec *ir, gmx_domdec_t *dd, real cellsize_limit,real cutoff, bool bInterCGBondeds,bool bInterCGMultiBody, ivec nc) { int npp,npme,ndiv,*div,*mdiv,d,nmax; bool bExcl_pbcdx; float pbcdxr; real limit; ivec itry; limit = cellsize_limit; dd->nc[XX] = 1; dd->nc[YY] = 1; dd->nc[ZZ] = 1; npp = nnodes_tot - npme_only; if (EEL_PME(ir->coulombtype)) { npme = (npme_only > 0 ? npme_only : npp); } else { npme = 0; } if (bInterCGBondeds) { /* For Ewald exclusions pbc_dx is not called */ bExcl_pbcdx = (EEL_EXCL_FORCES(ir->coulombtype) && !EEL_FULL(ir->coulombtype)); pbcdxr = (double)n_bonded_dx(mtop,bExcl_pbcdx)/(double)mtop->natoms; } else { /* Every molecule is a single charge group: no pbc required */ pbcdxr = 0; } /* Add a margin for DLB and/or pressure scaling */ if (bDynLoadBal) { if (dlb_scale >= 1.0) { gmx_fatal(FARGS,"The value for option -dds should be smaller than 1"); } if (fplog) { fprintf(fplog,"Scaling the initial minimum size with 1/%g (option -dds) = %g\n",dlb_scale,1/dlb_scale); } limit /= dlb_scale; } else if (ir->epc != epcNO) { if (fplog) { fprintf(fplog,"To account for pressure scaling, scaling the initial minimum size with %g\n",DD_GRID_MARGIN_PRES_SCALE); limit *= DD_GRID_MARGIN_PRES_SCALE; } } if (fplog) { fprintf(fplog,"Optimizing the DD grid for %d cells with a minimum initial size of %.3f nm\n",npp,limit); if (limit > 0) { fprintf(fplog,"The maximum allowed number of cells is:"); for(d=0; d<DIM; d++) { nmax = (int)(ddbox->box_size[d]*ddbox->skew_fac[d]/limit); if (d >= ddbox->npbcdim && nmax < 2) { nmax = 2; } fprintf(fplog," %c %d",'X' + d,nmax); } fprintf(fplog,"\n"); } } if (debug) { fprintf(debug,"Average nr of pbc_dx calls per atom %.2f\n",pbcdxr); } /* Decompose npp in factors */ ndiv = factorize(npp,&div,&mdiv); itry[XX] = 1; itry[YY] = 1; itry[ZZ] = 1; clear_ivec(nc); assign_factors(dd,limit,cutoff,box,ddbox,ir,pbcdxr, npme,ndiv,div,mdiv,itry,nc); sfree(div); sfree(mdiv); return limit; }