void pme_loadbal_do(pme_load_balancing_t *pme_lb, t_commrec *cr, FILE *fp_err, FILE *fp_log, t_inputrec *ir, t_forcerec *fr, t_state *state, gmx_wallcycle_t wcycle, gmx_int64_t step, gmx_int64_t step_rel, gmx_bool *bPrinting) { int n_prev; double cycles_prev; assert(pme_lb != NULL); if (!pme_lb->bActive) { return; } n_prev = pme_lb->cycles_n; cycles_prev = pme_lb->cycles_c; wallcycle_get(wcycle, ewcSTEP, &pme_lb->cycles_n, &pme_lb->cycles_c); if (pme_lb->cycles_n == 0) { /* Before the first step we haven't done any steps yet */ return; } /* Sanity check, we expect nstlist cycle counts */ if (pme_lb->cycles_n - n_prev != ir->nstlist) { /* We could return here, but it's safer to issue and error and quit */ gmx_incons("pme_loadbal_do called at an interval != nstlist"); } /* PME grid + cut-off optimization with GPUs or PME ranks */ if (!pme_lb->bBalance && pme_lb->bSepPMERanks) { if (pme_lb->bTriggerOnDLB) { pme_lb->bBalance = dd_dlb_is_on(cr->dd); } /* We should ignore the first timing to avoid timing allocation * overhead. And since the PME load balancing is called just * before DD repartitioning, the ratio returned by dd_pme_f_ratio * is not over the last nstlist steps, but the nstlist steps before * that. So the first useful ratio is available at step_rel=3*nstlist. */ else if (step_rel >= 3*ir->nstlist) { if (DDMASTER(cr->dd)) { /* If PME rank load is too high, start tuning */ pme_lb->bBalance = (dd_pme_f_ratio(cr->dd) >= loadBalanceTriggerFactor); } dd_bcast(cr->dd, sizeof(gmx_bool), &pme_lb->bBalance); } pme_lb->bActive = (pme_lb->bBalance || step_rel <= pme_lb->step_rel_stop); } /* The location in the code of this balancing termination is strange. * You would expect to have it after the call to pme_load_balance() * below, since there pme_lb->stage is updated. * But when terminating directly after deciding on and selecting the * optimal setup, DLB will turn on right away if it was locked before. * This might be due to PME reinitialization. So we check stage here * to allow for another nstlist steps with DLB locked to stabilize * the performance. */ if (pme_lb->bBalance && pme_lb->stage == pme_lb->nstage) { pme_lb->bBalance = FALSE; if (DOMAINDECOMP(cr) && dd_dlb_is_locked(cr->dd)) { /* Unlock the DLB=auto, DLB is allowed to activate */ dd_dlb_unlock(cr->dd); md_print_warn(cr, fp_log, "NOTE: DLB can now turn on, when beneficial\n"); /* We don't deactivate the tuning yet, since we will balance again * after DLB gets turned on, if it does within PMETune_period. */ continue_pme_loadbal(pme_lb, TRUE); pme_lb->bTriggerOnDLB = TRUE; pme_lb->step_rel_stop = step_rel + PMETunePeriod*ir->nstlist; } else { /* We're completely done with PME tuning */ pme_lb->bActive = FALSE; } if (DOMAINDECOMP(cr)) { /* Set the cut-off limit to the final selected cut-off, * so we don't have artificial DLB limits. * This also ensures that we won't disable the currently * optimal setting during a second round of PME balancing. */ set_dd_dlb_max_cutoff(cr, fr->ic->rlistlong); } } if (pme_lb->bBalance) { /* We might not have collected nstlist steps in cycles yet, * since init_step might not be a multiple of nstlist, * but the first data collected is skipped anyhow. */ pme_load_balance(pme_lb, cr, fp_err, fp_log, ir, state, pme_lb->cycles_c - cycles_prev, fr->ic, fr->nbv, &fr->pmedata, step); /* Update constants in forcerec/inputrec to keep them in sync with fr->ic */ fr->ewaldcoeff_q = fr->ic->ewaldcoeff_q; fr->ewaldcoeff_lj = fr->ic->ewaldcoeff_lj; fr->rlist = fr->ic->rlist; fr->rlistlong = fr->ic->rlistlong; fr->rcoulomb = fr->ic->rcoulomb; fr->rvdw = fr->ic->rvdw; if (ir->eDispCorr != edispcNO) { calc_enervirdiff(NULL, ir->eDispCorr, fr); } } if (!pme_lb->bBalance && (!pme_lb->bSepPMERanks || step_rel > pme_lb->step_rel_stop)) { /* We have just deactivated the balancing and we're not measuring PP/PME * imbalance during the first steps of the run: deactivate the tuning. */ pme_lb->bActive = FALSE; } if (!(pme_lb->bActive) && DOMAINDECOMP(cr) && dd_dlb_is_locked(cr->dd)) { /* Make sure DLB is allowed when we deactivate PME tuning */ dd_dlb_unlock(cr->dd); md_print_warn(cr, fp_log, "NOTE: DLB can now turn on, when beneficial\n"); } *bPrinting = pme_lb->bBalance; }
void pme_loadbal_do(pme_load_balancing_t *pme_lb, t_commrec *cr, FILE *fp_err, FILE *fp_log, t_inputrec *ir, t_forcerec *fr, t_state *state, gmx_wallcycle_t wcycle, gmx_int64_t step, gmx_int64_t step_rel, gmx_bool *bPrinting) { int n_prev; double cycles_prev; assert(pme_lb != NULL); if (!pme_lb->bActive) { return; } n_prev = pme_lb->cycles_n; cycles_prev = pme_lb->cycles_c; wallcycle_get(wcycle, ewcSTEP, &pme_lb->cycles_n, &pme_lb->cycles_c); if (pme_lb->cycles_n == 0) { /* Before the first step we haven't done any steps yet */ return; } /* Sanity check, we expect nstlist cycle counts */ if (pme_lb->cycles_n - n_prev != ir->nstlist) { /* We could return here, but it's safer to issue and error and quit */ gmx_incons("pme_loadbal_do called at an interval != nstlist"); } /* PME grid + cut-off optimization with GPUs or PME ranks */ if (!pme_lb->bBalance && pme_lb->bSepPMERanks) { if (DDMASTER(cr->dd)) { /* PME rank load is too high, start tuning */ pme_lb->bBalance = (dd_pme_f_ratio(cr->dd) >= loadBalanceTriggerFactor); } dd_bcast(cr->dd, sizeof(gmx_bool), &pme_lb->bBalance); if (pme_lb->bBalance && use_GPU(fr->nbv) && DOMAINDECOMP(cr) && pme_lb->bSepPMERanks) { /* Lock DLB=auto to off (does nothing when DLB=yes/no). * With GPUs + separate PME ranks, we don't want DLB. * This could happen when we scan coarse grids and * it would then never be turned off again. * This would hurt performance at the final, optimal * grid spacing, where DLB almost never helps. * Also, DLB can limit the cut-off for PME tuning. */ dd_dlb_set_lock(cr->dd, TRUE); } } if (pme_lb->bBalance) { /* init_step might not be a multiple of nstlist, * but the first cycle is always skipped anyhow. */ pme_lb->bBalance = pme_load_balance(pme_lb, cr, fp_err, fp_log, ir, state, pme_lb->cycles_c - cycles_prev, fr->ic, fr->nbv, &fr->pmedata, step); /* Update constants in forcerec/inputrec to keep them in sync with fr->ic */ fr->ewaldcoeff_q = fr->ic->ewaldcoeff_q; fr->ewaldcoeff_lj = fr->ic->ewaldcoeff_lj; fr->rlist = fr->ic->rlist; fr->rlistlong = fr->ic->rlistlong; fr->rcoulomb = fr->ic->rcoulomb; fr->rvdw = fr->ic->rvdw; if (ir->eDispCorr != edispcNO) { calc_enervirdiff(NULL, ir->eDispCorr, fr); } if (!pme_lb->bBalance && DOMAINDECOMP(cr) && dd_dlb_is_locked(cr->dd)) { /* Unlock the DLB=auto, DLB is allowed to activate * (but we don't expect it to activate in most cases). */ dd_dlb_set_lock(cr->dd, FALSE); } } if (!pme_lb->bBalance && (!pme_lb->bSepPMERanks || (step_rel <= PMETunePeriod*ir->nstlist))) { /* We have just deactivated the balancing and we're not measuring PP/PME * imbalance during the first 50*nstlist steps: deactivate the tuning. */ pme_lb->bActive = FALSE; } *bPrinting = pme_lb->bBalance; }