コード例 #1
0
static bool fits_pme_perf(FILE *fplog,
						  t_inputrec *ir,matrix box,gmx_mtop_t *mtop,
						  int nnodes,int npme,float ratio)
{
    /* Does this division gives a reasonable PME load? */
    return (fits_pme_ratio(nnodes,npme,ratio) &&
			pme_inconvenient_nnodes(ir->nkx,ir->nky,npme) <= 1);
}
コード例 #2
0
ファイル: domdec_setup.c プロジェクト: nrego/indus
static gmx_bool fits_pp_pme_perf(FILE *fplog,
                                 t_inputrec *ir,matrix box,gmx_mtop_t *mtop,
                                 int nnodes,int npme,float ratio)
{
    int ndiv,*div,*mdiv,ldiv;
    int npp_root3,npme_root2;

    ndiv = factorize(nnodes-npme,&div,&mdiv);
    ldiv = div[ndiv-1];
    sfree(div);
    sfree(mdiv);

    npp_root3  = (int)(pow(nnodes-npme,1.0/3.0) + 0.5);
    npme_root2 = (int)(sqrt(npme) + 0.5);

    /* The check below gives a reasonable division:
     * factor 5 allowed at 5 or more PP nodes,
     * factor 7 allowed at 49 or more PP nodes.
     */
    if (ldiv > 3 + npp_root3)
    {
        return FALSE;
    }

    /* Check if the number of PP and PME nodes have a reasonable sized
     * denominator in common, such that we can use 2D PME decomposition
     * when required (which requires nx_pp == nx_pme).
     * The factor of 2 allows for a maximum ratio of 2^2=4
     * between nx_pme and ny_pme.
     */
    if (lcd(nnodes-npme,npme)*2 < npme_root2)
    {
        return FALSE;
    }

    /* Does this division gives a reasonable PME load? */
    return fits_pme_ratio(nnodes,npme,ratio);
}
コード例 #3
0
/*! \brief Returns TRUE when npme out of ntot ranks doing PME is expected to give reasonable performance */
static gmx_bool fits_pp_pme_perf(int ntot, int npme, float ratio)
{
    int ndiv, *div, *mdiv, ldiv;
    int npp_root3, npme_root2;

    ndiv = factorize(ntot - npme, &div, &mdiv);
    ldiv = div[ndiv-1];
    sfree(div);
    sfree(mdiv);

    npp_root3  = static_cast<int>(std::pow(ntot - npme, 1.0/3.0) + 0.5);
    npme_root2 = static_cast<int>(std::sqrt(static_cast<double>(npme)) + 0.5);

    /* The check below gives a reasonable division:
     * factor 5 allowed at 5 or more PP ranks,
     * factor 7 allowed at 49 or more PP ranks.
     */
    if (ldiv > 3 + npp_root3)
    {
        return FALSE;
    }

    /* Check if the number of PP and PME ranks have a reasonable sized
     * denominator in common, such that we can use 2D PME decomposition
     * when required (which requires nx_pp == nx_pme).
     * The factor of 2 allows for a maximum ratio of 2^2=4
     * between nx_pme and ny_pme.
     */
    if (lcd(ntot - npme, npme)*2 < npme_root2)
    {
        return FALSE;
    }

    /* Does this division gives a reasonable PME load? */
    return fits_pme_ratio(ntot, npme, ratio);
}
コード例 #4
0
/*! \brief Make a guess for the number of PME ranks to use. */
static int guess_npme(FILE *fplog, gmx_mtop_t *mtop, t_inputrec *ir, matrix box,
                      int nrank_tot)
{
    float      ratio;
    int        npme;

    ratio = pme_load_estimate(mtop, ir, box);

    if (fplog)
    {
        fprintf(fplog, "Guess for relative PME load: %.2f\n", ratio);
    }

    /* We assume the optimal rank ratio is close to the load ratio.
     * The communication load is neglected,
     * but (hopefully) this will balance out between PP and PME.
     */

    if (!fits_pme_ratio(nrank_tot, nrank_tot/2, ratio))
    {
        /* We would need more than nrank_tot/2 PME only nodes,
         * which is not possible. Since the PME load is very high,
         * we will not loose much performance when all ranks do PME.
         */

        return 0;
    }

    /* First try to find npme as a factor of nrank_tot up to nrank_tot/3.
     * We start with a minimum PME node fraction of 1/16
     * and avoid ratios which lead to large prime factors in nnodes-npme.
     */
    npme = (nrank_tot + 15)/16;
    while (npme <= nrank_tot/3)
    {
        if (nrank_tot % npme == 0)
        {
            /* Note that fits_perf might change the PME grid,
             * in the current implementation it does not.
             */
            if (fits_pp_pme_perf(nrank_tot, npme, ratio))
            {
                break;
            }
        }
        npme++;
    }
    if (npme > nrank_tot/3)
    {
        /* Try any possible number for npme */
        npme = 1;
        while (npme <= nrank_tot/2)
        {
            /* Note that fits_perf may change the PME grid */
            if (fits_pp_pme_perf(nrank_tot, npme, ratio))
            {
                break;
            }
            npme++;
        }
    }
    if (npme > nrank_tot/2)
    {
        gmx_fatal(FARGS, "Could not find an appropriate number of separate PME ranks. i.e. >= %5f*#ranks (%d) and <= #ranks/2 (%d) and reasonable performance wise (grid_x=%d, grid_y=%d).\n"
                  "Use the -npme option of mdrun or change the number of ranks or the PME grid dimensions, see the manual for details.",
                  ratio, (int)(0.95*ratio*nrank_tot + 0.5), nrank_tot/2, ir->nkx, ir->nky);
        /* Keep the compiler happy */
        npme = 0;
    }
    else
    {
        if (fplog)
        {
            fprintf(fplog,
                    "Will use %d particle-particle and %d PME only ranks\n"
                    "This is a guess, check the performance at the end of the log file\n",
                    nrank_tot - npme, npme);
        }
        fprintf(stderr, "\n"
                "Will use %d particle-particle and %d PME only ranks\n"
                "This is a guess, check the performance at the end of the log file\n",
                nrank_tot - npme, npme);
    }

    return npme;
}
コード例 #5
0
static int guess_npme(FILE *fplog,gmx_mtop_t *mtop,t_inputrec *ir,matrix box,
					  int nnodes)
{
	float ratio;
	int  npme,nkx,nky,ndiv,*div,*mdiv,ldiv;
	t_inputrec ir_try;
	
	ratio = pme_load_estimate(mtop,ir,box);
	
	if (fplog)
    {
		fprintf(fplog,"Guess for relative PME load: %.2f\n",ratio);
    }
	
	/* We assume the optimal node ratio is close to the load ratio.
	 * The communication load is neglected,
	 * but (hopefully) this will balance out between PP and PME.
	 */
	
    if (!fits_pme_ratio(nnodes,nnodes/2,ratio))
    {
        /* We would need more than nnodes/2 PME only nodes,
         * which is not possible. Since the PME load is very high,
         * we will not loose much performance when all nodes do PME.
         */

        return 0;
    }

    /* First try to find npme as a factor of nnodes up to nnodes/3 */
	npme = 1;
    while (npme <= nnodes/3) {
        if (nnodes % npme == 0) {
            /* Note that fits_perf might change the PME grid,
             * in the current implementation it does not.
             */
            if (fits_pme_perf(fplog,ir,box,mtop,nnodes,npme,ratio))
			{
				break;
			}
        }
        npme++;
    }
    if (npme > nnodes/3)
    {
        /* Try any possible number for npme */
        npme = 1;
        while (npme <= nnodes/2)
        {
            ndiv = factorize(nnodes-npme,&div,&mdiv);
            ldiv = div[ndiv-1];
            sfree(div);
            sfree(mdiv);
            /* Only use this value if nnodes-npme does not have
             * a large prime factor (5 y, 7 n, 14 n, 15 y).
             */
            if (ldiv <= 3 + (int)(pow(nnodes-npme,1.0/3.0) + 0.5))
            {
                /* Note that fits_perf may change the PME grid */
                if (fits_pme_perf(fplog,ir,box,mtop,nnodes,npme,ratio))
                {
                    break;
                }
            }
            npme++;
        }
    }
    if (npme > nnodes/2)
    {
        gmx_fatal(FARGS,"Could not find an appropriate number of separate PME nodes. i.e. >= %5f*#nodes (%d) and <= #nodes/2 (%d) and reasonable performance wise (grid_x=%d, grid_y=%d).\n"
                  "Use the -npme option of mdrun or change the number of processors or the PME grid dimensions, see the manual for details.",
                  ratio,(int)(0.95*ratio*nnodes+0.5),nnodes/2,ir->nkx,ir->nky);
        /* Keep the compiler happy */
        npme = 0;
    }
    else
    {
        if (fplog)
        {
            fprintf(fplog,
                    "Will use %d particle-particle and %d PME only nodes\n"
                    "This is a guess, check the performance at the end of the log file\n",
                    nnodes-npme,npme);
        }
        fprintf(stderr,"\n"
                "Will use %d particle-particle and %d PME only nodes\n"
                "This is a guess, check the performance at the end of the log file\n",
                nnodes-npme,npme);
    }
    
    return npme;
}