/*! \brief Estimate cost of communication for a possible domain decomposition. */ static float comm_cost_est(real limit, real cutoff, matrix box, gmx_ddbox_t *ddbox, int natoms, t_inputrec *ir, float pbcdxr, int npme_tot, ivec nc) { ivec npme = {1, 1, 1}; int i, j, nk, overlap; rvec bt; float comm_vol, comm_vol_xf, comm_pme, cost_pbcdx; /* This is the cost of a pbc_dx call relative to the cost * of communicating the coordinate and force of an atom. * This will be machine dependent. * These factors are for x86 with SMP or Infiniband. */ float pbcdx_rect_fac = 0.1; float pbcdx_tric_fac = 0.2; float temp; /* Check the DD algorithm restrictions */ if ((ir->ePBC == epbcXY && ir->nwall < 2 && nc[ZZ] > 1) || (ir->ePBC == epbcSCREW && (nc[XX] == 1 || nc[YY] > 1 || nc[ZZ] > 1))) { return -1; } if (inhomogeneous_z(ir) && nc[ZZ] > 1) { return -1; } assert(ddbox->npbcdim <= DIM); /* Check if the triclinic requirements are met */ for (i = 0; i < DIM; i++) { for (j = i+1; j < ddbox->npbcdim; j++) { if (box[j][i] != 0 || ir->deform[j][i] != 0 || (ir->epc != epcNO && ir->compress[j][i] != 0)) { if (nc[j] > 1 && nc[i] == 1) { return -1; } } } } for (i = 0; i < DIM; i++) { bt[i] = ddbox->box_size[i]*ddbox->skew_fac[i]; /* Without PBC and with 2 cells, there are no lower limits on the cell size */ if (!(i >= ddbox->npbcdim && nc[i] <= 2) && bt[i] < nc[i]*limit) { return -1; } /* With PBC, check if the cut-off fits in nc[i]-1 cells */ if (i < ddbox->npbcdim && nc[i] > 1 && (nc[i] - 1)*bt[i] < nc[i]*cutoff) { return -1; } } if (npme_tot > 1) { /* The following choices should match those * in init_domain_decomposition in domdec.c. */ if (nc[XX] == 1 && nc[YY] > 1) { npme[XX] = 1; npme[YY] = npme_tot; } else if (nc[YY] == 1) { npme[XX] = npme_tot; npme[YY] = 1; } else { /* Will we use 1D or 2D PME decomposition? */ npme[XX] = (npme_tot % nc[XX] == 0) ? nc[XX] : npme_tot; npme[YY] = npme_tot/npme[XX]; } } /* When two dimensions are (nearly) equal, use more cells * for the smallest index, so the decomposition does not * depend sensitively on the rounding of the box elements. */ for (i = 0; i < DIM; i++) { for (j = i+1; j < DIM; j++) { /* Check if the box size is nearly identical, * in that case we prefer nx > ny and ny > nz. */ if (fabs(bt[j] - bt[i]) < 0.01*bt[i] && nc[j] > nc[i]) { /* The XX/YY check is a bit compact. If nc[YY]==npme[YY] * this means the swapped nc has nc[XX]==npme[XX], * and we can also swap X and Y for PME. */ /* Check if dimension i and j are equivalent for PME. * For x/y: if nc[YY]!=npme[YY], we can not swap x/y * For y/z: we can not have PME decomposition in z */ if (npme_tot <= 1 || !((i == XX && j == YY && nc[YY] != npme[YY]) || (i == YY && j == ZZ && npme[YY] > 1))) { return -1; } } } } /* This function determines only half of the communication cost. * All PP, PME and PP-PME communication is symmetric * and the "back"-communication cost is identical to the forward cost. */ comm_vol = comm_box_frac(nc, cutoff, ddbox); comm_pme = 0; for (i = 0; i < 2; i++) { /* Determine the largest volume for PME x/f redistribution */ if (nc[i] % npme[i] != 0) { if (nc[i] > npme[i]) { comm_vol_xf = (npme[i] == 2 ? 1.0/3.0 : 0.5); } else { comm_vol_xf = 1.0 - lcd(nc[i], npme[i])/(double)npme[i]; } comm_pme += 3*natoms*comm_vol_xf; } /* Grid overlap communication */ if (npme[i] > 1) { nk = (i == 0 ? ir->nkx : ir->nky); overlap = (nk % npme[i] == 0 ? ir->pme_order-1 : ir->pme_order); temp = npme[i]; temp *= overlap; temp *= ir->nkx; temp *= ir->nky; temp *= ir->nkz; temp /= nk; comm_pme += temp; /* Old line comm_pme += npme[i]*overlap*ir->nkx*ir->nky*ir->nkz/nk; */ } } comm_pme += comm_pme_cost_vol(npme[YY], ir->nky, ir->nkz, ir->nkx); comm_pme += comm_pme_cost_vol(npme[XX], ir->nkx, ir->nky, ir->nkz); /* Add cost of pbc_dx for bondeds */ cost_pbcdx = 0; if ((nc[XX] == 1 || nc[YY] == 1) || (nc[ZZ] == 1 && ir->ePBC != epbcXY)) { if ((ddbox->tric_dir[XX] && nc[XX] == 1) || (ddbox->tric_dir[YY] && nc[YY] == 1)) { cost_pbcdx = pbcdxr*pbcdx_tric_fac; } else { cost_pbcdx = pbcdxr*pbcdx_rect_fac; } } if (debug) { fprintf(debug, "nc %2d %2d %2d %2d %2d vol pp %6.4f pbcdx %6.4f pme %9.3e tot %9.3e\n", nc[XX], nc[YY], nc[ZZ], npme[XX], npme[YY], comm_vol, cost_pbcdx, comm_pme/(3*natoms), comm_vol + cost_pbcdx + comm_pme/(3*natoms)); } return 3*natoms*(comm_vol + cost_pbcdx) + comm_pme; }
static float comm_cost_est(gmx_domdec_t *dd,real limit,real cutoff, matrix box,gmx_ddbox_t *ddbox,t_inputrec *ir, float pbcdxr, int npme,ivec nc) { int i,j,k,npp; rvec bt; float comm_vol,comm_vol_pme,cost_pbcdx; /* This is the cost of a pbc_dx call relative to the cost * of communicating the coordinate and force of an atom. * This will be machine dependent. * These factors are for x86 with SMP or Infiniband. */ float pbcdx_rect_fac = 0.1; float pbcdx_tric_fac = 0.2; /* Check the DD algorithm restrictions */ if ((ir->ePBC == epbcXY && ir->nwall < 2 && nc[ZZ] > 1) || (ir->ePBC == epbcSCREW && (nc[XX] == 1 || nc[YY] > 1 || nc[ZZ] > 1))) { return -1; } /* Check if the triclinic requirements are met */ for(i=0; i<DIM; i++) { for(j=i+1; j<ddbox->npbcdim; j++) { if (box[j][i] != 0 || ir->deform[j][i] != 0 || (ir->epc != epcNO && ir->compress[j][i] != 0)) { if (nc[j] > 1 && nc[i] == 1) { return -1; } } } } npp = 1; for(i=0; i<DIM; i++) { npp *= nc[i]; bt[i] = ddbox->box_size[i]*ddbox->skew_fac[i]; /* Without PBC there are no cell size limits with 2 cells */ if (!(i >= ddbox->npbcdim && nc[i] <= 2) && bt[i] < nc[i]*limit) { return -1; } } /* When two dimensions are (nearly) equal, use more cells * for the smallest index, so the decomposition does not * depend sensitively on the rounding of the box elements. */ for(i=0; i<DIM; i++) { if (npme == 0 || i != XX) { for(j=i+1; j<DIM; j++) { if (fabs(bt[j] - bt[i]) < 0.01*bt[i] && nc[j] > nc[i]) { return -1; } } } } comm_vol = comm_box_frac(nc,cutoff,ddbox); /* Determine the largest volume that a PME only needs to communicate */ comm_vol_pme = 0; if ((npme > 0) && (nc[XX] % npme != 0)) { if (nc[XX] > npme) { comm_vol_pme = (npme==2 ? 1.0/3.0 : 0.5); } else { comm_vol_pme = 1.0 - lcd(nc[XX],npme)/(double)npme; } /* Normalize by the number of PME only nodes */ comm_vol_pme /= npme; } /* Add cost of pbc_dx for bondeds */ cost_pbcdx = 0; if ((nc[XX] == 1 || nc[YY] == 1) || (nc[ZZ] == 1 && ir->ePBC != epbcXY)) { if ((ddbox->tric_dir[XX] && nc[XX] == 1) || (ddbox->tric_dir[YY] && nc[YY] == 1)) { cost_pbcdx = pbcdxr*pbcdx_tric_fac/npp; } else { cost_pbcdx = pbcdxr*pbcdx_rect_fac/npp; } } if (debug) { fprintf(debug, "nc %2d %2d %2d vol pp %6.4f pbcdx %6.4f pme %6.4f tot %6.4f\n", nc[XX],nc[YY],nc[ZZ], comm_vol,cost_pbcdx,comm_vol_pme, comm_vol + cost_pbcdx + comm_vol_pme); } return comm_vol + cost_pbcdx + comm_vol_pme; }