static bool fits_pme_perf(FILE *fplog, t_inputrec *ir,matrix box,gmx_mtop_t *mtop, int nnodes,int npme,float ratio) { /* Does this division gives a reasonable PME load? */ return (fits_pme_ratio(nnodes,npme,ratio) && pme_inconvenient_nnodes(ir->nkx,ir->nky,npme) <= 1); }
static gmx_bool fits_pp_pme_perf(FILE *fplog, t_inputrec *ir,matrix box,gmx_mtop_t *mtop, int nnodes,int npme,float ratio) { int ndiv,*div,*mdiv,ldiv; int npp_root3,npme_root2; ndiv = factorize(nnodes-npme,&div,&mdiv); ldiv = div[ndiv-1]; sfree(div); sfree(mdiv); npp_root3 = (int)(pow(nnodes-npme,1.0/3.0) + 0.5); npme_root2 = (int)(sqrt(npme) + 0.5); /* The check below gives a reasonable division: * factor 5 allowed at 5 or more PP nodes, * factor 7 allowed at 49 or more PP nodes. */ if (ldiv > 3 + npp_root3) { return FALSE; } /* Check if the number of PP and PME nodes have a reasonable sized * denominator in common, such that we can use 2D PME decomposition * when required (which requires nx_pp == nx_pme). * The factor of 2 allows for a maximum ratio of 2^2=4 * between nx_pme and ny_pme. */ if (lcd(nnodes-npme,npme)*2 < npme_root2) { return FALSE; } /* Does this division gives a reasonable PME load? */ return fits_pme_ratio(nnodes,npme,ratio); }
/*! \brief Returns TRUE when npme out of ntot ranks doing PME is expected to give reasonable performance */ static gmx_bool fits_pp_pme_perf(int ntot, int npme, float ratio) { int ndiv, *div, *mdiv, ldiv; int npp_root3, npme_root2; ndiv = factorize(ntot - npme, &div, &mdiv); ldiv = div[ndiv-1]; sfree(div); sfree(mdiv); npp_root3 = static_cast<int>(std::pow(ntot - npme, 1.0/3.0) + 0.5); npme_root2 = static_cast<int>(std::sqrt(static_cast<double>(npme)) + 0.5); /* The check below gives a reasonable division: * factor 5 allowed at 5 or more PP ranks, * factor 7 allowed at 49 or more PP ranks. */ if (ldiv > 3 + npp_root3) { return FALSE; } /* Check if the number of PP and PME ranks have a reasonable sized * denominator in common, such that we can use 2D PME decomposition * when required (which requires nx_pp == nx_pme). * The factor of 2 allows for a maximum ratio of 2^2=4 * between nx_pme and ny_pme. */ if (lcd(ntot - npme, npme)*2 < npme_root2) { return FALSE; } /* Does this division gives a reasonable PME load? */ return fits_pme_ratio(ntot, npme, ratio); }
/*! \brief Make a guess for the number of PME ranks to use. */ static int guess_npme(FILE *fplog, gmx_mtop_t *mtop, t_inputrec *ir, matrix box, int nrank_tot) { float ratio; int npme; ratio = pme_load_estimate(mtop, ir, box); if (fplog) { fprintf(fplog, "Guess for relative PME load: %.2f\n", ratio); } /* We assume the optimal rank ratio is close to the load ratio. * The communication load is neglected, * but (hopefully) this will balance out between PP and PME. */ if (!fits_pme_ratio(nrank_tot, nrank_tot/2, ratio)) { /* We would need more than nrank_tot/2 PME only nodes, * which is not possible. Since the PME load is very high, * we will not loose much performance when all ranks do PME. */ return 0; } /* First try to find npme as a factor of nrank_tot up to nrank_tot/3. * We start with a minimum PME node fraction of 1/16 * and avoid ratios which lead to large prime factors in nnodes-npme. */ npme = (nrank_tot + 15)/16; while (npme <= nrank_tot/3) { if (nrank_tot % npme == 0) { /* Note that fits_perf might change the PME grid, * in the current implementation it does not. */ if (fits_pp_pme_perf(nrank_tot, npme, ratio)) { break; } } npme++; } if (npme > nrank_tot/3) { /* Try any possible number for npme */ npme = 1; while (npme <= nrank_tot/2) { /* Note that fits_perf may change the PME grid */ if (fits_pp_pme_perf(nrank_tot, npme, ratio)) { break; } npme++; } } if (npme > nrank_tot/2) { gmx_fatal(FARGS, "Could not find an appropriate number of separate PME ranks. i.e. >= %5f*#ranks (%d) and <= #ranks/2 (%d) and reasonable performance wise (grid_x=%d, grid_y=%d).\n" "Use the -npme option of mdrun or change the number of ranks or the PME grid dimensions, see the manual for details.", ratio, (int)(0.95*ratio*nrank_tot + 0.5), nrank_tot/2, ir->nkx, ir->nky); /* Keep the compiler happy */ npme = 0; } else { if (fplog) { fprintf(fplog, "Will use %d particle-particle and %d PME only ranks\n" "This is a guess, check the performance at the end of the log file\n", nrank_tot - npme, npme); } fprintf(stderr, "\n" "Will use %d particle-particle and %d PME only ranks\n" "This is a guess, check the performance at the end of the log file\n", nrank_tot - npme, npme); } return npme; }
static int guess_npme(FILE *fplog,gmx_mtop_t *mtop,t_inputrec *ir,matrix box, int nnodes) { float ratio; int npme,nkx,nky,ndiv,*div,*mdiv,ldiv; t_inputrec ir_try; ratio = pme_load_estimate(mtop,ir,box); if (fplog) { fprintf(fplog,"Guess for relative PME load: %.2f\n",ratio); } /* We assume the optimal node ratio is close to the load ratio. * The communication load is neglected, * but (hopefully) this will balance out between PP and PME. */ if (!fits_pme_ratio(nnodes,nnodes/2,ratio)) { /* We would need more than nnodes/2 PME only nodes, * which is not possible. Since the PME load is very high, * we will not loose much performance when all nodes do PME. */ return 0; } /* First try to find npme as a factor of nnodes up to nnodes/3 */ npme = 1; while (npme <= nnodes/3) { if (nnodes % npme == 0) { /* Note that fits_perf might change the PME grid, * in the current implementation it does not. */ if (fits_pme_perf(fplog,ir,box,mtop,nnodes,npme,ratio)) { break; } } npme++; } if (npme > nnodes/3) { /* Try any possible number for npme */ npme = 1; while (npme <= nnodes/2) { ndiv = factorize(nnodes-npme,&div,&mdiv); ldiv = div[ndiv-1]; sfree(div); sfree(mdiv); /* Only use this value if nnodes-npme does not have * a large prime factor (5 y, 7 n, 14 n, 15 y). */ if (ldiv <= 3 + (int)(pow(nnodes-npme,1.0/3.0) + 0.5)) { /* Note that fits_perf may change the PME grid */ if (fits_pme_perf(fplog,ir,box,mtop,nnodes,npme,ratio)) { break; } } npme++; } } if (npme > nnodes/2) { gmx_fatal(FARGS,"Could not find an appropriate number of separate PME nodes. i.e. >= %5f*#nodes (%d) and <= #nodes/2 (%d) and reasonable performance wise (grid_x=%d, grid_y=%d).\n" "Use the -npme option of mdrun or change the number of processors or the PME grid dimensions, see the manual for details.", ratio,(int)(0.95*ratio*nnodes+0.5),nnodes/2,ir->nkx,ir->nky); /* Keep the compiler happy */ npme = 0; } else { if (fplog) { fprintf(fplog, "Will use %d particle-particle and %d PME only nodes\n" "This is a guess, check the performance at the end of the log file\n", nnodes-npme,npme); } fprintf(stderr,"\n" "Will use %d particle-particle and %d PME only nodes\n" "This is a guess, check the performance at the end of the log file\n", nnodes-npme,npme); } return npme; }