Ejemplo n.º 1
0
/*! \brief Estimate cost of communication for a possible domain decomposition. */
static float comm_cost_est(real limit, real cutoff,
                           matrix box, gmx_ddbox_t *ddbox,
                           int natoms, t_inputrec *ir,
                           float pbcdxr,
                           int npme_tot, ivec nc)
{
    ivec  npme = {1, 1, 1};
    int   i, j, nk, overlap;
    rvec  bt;
    float comm_vol, comm_vol_xf, comm_pme, cost_pbcdx;
    /* This is the cost of a pbc_dx call relative to the cost
     * of communicating the coordinate and force of an atom.
     * This will be machine dependent.
     * These factors are for x86 with SMP or Infiniband.
     */
    float pbcdx_rect_fac = 0.1;
    float pbcdx_tric_fac = 0.2;
    float temp;

    /* Check the DD algorithm restrictions */
    if ((ir->ePBC == epbcXY && ir->nwall < 2 && nc[ZZ] > 1) ||
        (ir->ePBC == epbcSCREW && (nc[XX] == 1 || nc[YY] > 1 || nc[ZZ] > 1)))
    {
        return -1;
    }

    if (inhomogeneous_z(ir) && nc[ZZ] > 1)
    {
        return -1;
    }

    assert(ddbox->npbcdim <= DIM);

    /* Check if the triclinic requirements are met */
    for (i = 0; i < DIM; i++)
    {
        for (j = i+1; j < ddbox->npbcdim; j++)
        {
            if (box[j][i] != 0 || ir->deform[j][i] != 0 ||
                (ir->epc != epcNO && ir->compress[j][i] != 0))
            {
                if (nc[j] > 1 && nc[i] == 1)
                {
                    return -1;
                }
            }
        }
    }

    for (i = 0; i < DIM; i++)
    {
        bt[i] = ddbox->box_size[i]*ddbox->skew_fac[i];

        /* Without PBC and with 2 cells, there are no lower limits on the cell size */
        if (!(i >= ddbox->npbcdim && nc[i] <= 2) && bt[i] < nc[i]*limit)
        {
            return -1;
        }
        /* With PBC, check if the cut-off fits in nc[i]-1 cells */
        if (i < ddbox->npbcdim && nc[i] > 1 && (nc[i] - 1)*bt[i] < nc[i]*cutoff)
        {
            return -1;
        }
    }

    if (npme_tot > 1)
    {
        /* The following choices should match those
         * in init_domain_decomposition in domdec.c.
         */
        if (nc[XX] == 1 && nc[YY] > 1)
        {
            npme[XX] = 1;
            npme[YY] = npme_tot;
        }
        else if (nc[YY] == 1)
        {
            npme[XX] = npme_tot;
            npme[YY] = 1;
        }
        else
        {
            /* Will we use 1D or 2D PME decomposition? */
            npme[XX] = (npme_tot % nc[XX] == 0) ? nc[XX] : npme_tot;
            npme[YY] = npme_tot/npme[XX];
        }
    }

    /* When two dimensions are (nearly) equal, use more cells
     * for the smallest index, so the decomposition does not
     * depend sensitively on the rounding of the box elements.
     */
    for (i = 0; i < DIM; i++)
    {
        for (j = i+1; j < DIM; j++)
        {
            /* Check if the box size is nearly identical,
             * in that case we prefer nx > ny  and ny > nz.
             */
            if (fabs(bt[j] - bt[i]) < 0.01*bt[i] && nc[j] > nc[i])
            {
                /* The XX/YY check is a bit compact. If nc[YY]==npme[YY]
                 * this means the swapped nc has nc[XX]==npme[XX],
                 * and we can also swap X and Y for PME.
                 */
                /* Check if dimension i and j are equivalent for PME.
                 * For x/y: if nc[YY]!=npme[YY], we can not swap x/y
                 * For y/z: we can not have PME decomposition in z
                 */
                if (npme_tot <= 1 ||
                    !((i == XX && j == YY && nc[YY] != npme[YY]) ||
                      (i == YY && j == ZZ && npme[YY] > 1)))
                {
                    return -1;
                }
            }
        }
    }

    /* This function determines only half of the communication cost.
     * All PP, PME and PP-PME communication is symmetric
     * and the "back"-communication cost is identical to the forward cost.
     */

    comm_vol = comm_box_frac(nc, cutoff, ddbox);

    comm_pme = 0;
    for (i = 0; i < 2; i++)
    {
        /* Determine the largest volume for PME x/f redistribution */
        if (nc[i] % npme[i] != 0)
        {
            if (nc[i] > npme[i])
            {
                comm_vol_xf = (npme[i] == 2 ? 1.0/3.0 : 0.5);
            }
            else
            {
                comm_vol_xf = 1.0 - lcd(nc[i], npme[i])/(double)npme[i];
            }
            comm_pme += 3*natoms*comm_vol_xf;
        }

        /* Grid overlap communication */
        if (npme[i] > 1)
        {
            nk        = (i == 0 ? ir->nkx : ir->nky);
            overlap   = (nk % npme[i] == 0 ? ir->pme_order-1 : ir->pme_order);
            temp      = npme[i];
            temp     *= overlap;
            temp     *= ir->nkx;
            temp     *= ir->nky;
            temp     *= ir->nkz;
            temp     /= nk;
            comm_pme += temp;
/* Old line comm_pme += npme[i]*overlap*ir->nkx*ir->nky*ir->nkz/nk; */
        }
    }

    comm_pme += comm_pme_cost_vol(npme[YY], ir->nky, ir->nkz, ir->nkx);
    comm_pme += comm_pme_cost_vol(npme[XX], ir->nkx, ir->nky, ir->nkz);

    /* Add cost of pbc_dx for bondeds */
    cost_pbcdx = 0;
    if ((nc[XX] == 1 || nc[YY] == 1) || (nc[ZZ] == 1 && ir->ePBC != epbcXY))
    {
        if ((ddbox->tric_dir[XX] && nc[XX] == 1) ||
            (ddbox->tric_dir[YY] && nc[YY] == 1))
        {
            cost_pbcdx = pbcdxr*pbcdx_tric_fac;
        }
        else
        {
            cost_pbcdx = pbcdxr*pbcdx_rect_fac;
        }
    }

    if (debug)
    {
        fprintf(debug,
                "nc %2d %2d %2d %2d %2d vol pp %6.4f pbcdx %6.4f pme %9.3e tot %9.3e\n",
                nc[XX], nc[YY], nc[ZZ], npme[XX], npme[YY],
                comm_vol, cost_pbcdx, comm_pme/(3*natoms),
                comm_vol + cost_pbcdx + comm_pme/(3*natoms));
    }

    return 3*natoms*(comm_vol + cost_pbcdx) + comm_pme;
}
Ejemplo n.º 2
0
static float comm_cost_est(gmx_domdec_t *dd,real limit,real cutoff,
                           matrix box,gmx_ddbox_t *ddbox,t_inputrec *ir,
                           float pbcdxr,
                           int npme,ivec nc)
{
    int  i,j,k,npp;
    rvec bt;
    float comm_vol,comm_vol_pme,cost_pbcdx;
    /* This is the cost of a pbc_dx call relative to the cost
     * of communicating the coordinate and force of an atom.
     * This will be machine dependent.
     * These factors are for x86 with SMP or Infiniband.
     */
    float pbcdx_rect_fac = 0.1;
    float pbcdx_tric_fac = 0.2;
    
    /* Check the DD algorithm restrictions */
    if ((ir->ePBC == epbcXY && ir->nwall < 2 && nc[ZZ] > 1) ||
        (ir->ePBC == epbcSCREW && (nc[XX] == 1 || nc[YY] > 1 || nc[ZZ] > 1)))
    {
        return -1;
    }
    
    /* Check if the triclinic requirements are met */
    for(i=0; i<DIM; i++)
    {
        for(j=i+1; j<ddbox->npbcdim; j++)
        {
            if (box[j][i] != 0 || ir->deform[j][i] != 0 ||
                (ir->epc != epcNO && ir->compress[j][i] != 0))
            {
                if (nc[j] > 1 && nc[i] == 1)
                {
                    return -1;
                }
            }
        }
    }
    
    npp = 1;
    for(i=0; i<DIM; i++)
    {
        npp *= nc[i];
        bt[i] = ddbox->box_size[i]*ddbox->skew_fac[i];
        
        /* Without PBC there are no cell size limits with 2 cells */
        if (!(i >= ddbox->npbcdim && nc[i] <= 2) && bt[i] < nc[i]*limit)
        {
            return -1;
        }
    }
    
    /* When two dimensions are (nearly) equal, use more cells
     * for the smallest index, so the decomposition does not
     * depend sensitively on the rounding of the box elements.
     */
    for(i=0; i<DIM; i++)
    {
        if (npme == 0 || i != XX)
        {
            for(j=i+1; j<DIM; j++)
            {
                if (fabs(bt[j] - bt[i]) < 0.01*bt[i] && nc[j] > nc[i])
                {
                    return -1;
                }
            }
        }
    }
    
    comm_vol = comm_box_frac(nc,cutoff,ddbox);

    /* Determine the largest volume that a PME only needs to communicate */
    comm_vol_pme = 0;
    if ((npme > 0) && (nc[XX] % npme != 0))
    {
        if (nc[XX] > npme)
        {
            comm_vol_pme = (npme==2 ? 1.0/3.0 : 0.5);
        }
        else
        {
            comm_vol_pme = 1.0 - lcd(nc[XX],npme)/(double)npme;
        }
        /* Normalize by the number of PME only nodes */
        comm_vol_pme /= npme;
    }
    
    /* Add cost of pbc_dx for bondeds */
    cost_pbcdx = 0;
    if ((nc[XX] == 1 || nc[YY] == 1) || (nc[ZZ] == 1 && ir->ePBC != epbcXY))
    {
        if ((ddbox->tric_dir[XX] && nc[XX] == 1) ||
            (ddbox->tric_dir[YY] && nc[YY] == 1))
        {
            cost_pbcdx = pbcdxr*pbcdx_tric_fac/npp;
        }
        else
        {
            cost_pbcdx = pbcdxr*pbcdx_rect_fac/npp;
        }
    }
    
    if (debug)
    {
        fprintf(debug,
                "nc %2d %2d %2d vol pp %6.4f pbcdx %6.4f pme %6.4f tot %6.4f\n",
                nc[XX],nc[YY],nc[ZZ],
                comm_vol,cost_pbcdx,comm_vol_pme,
                comm_vol + cost_pbcdx + comm_vol_pme);
    }
    
    return comm_vol + cost_pbcdx + comm_vol_pme;
}