static void prepare_verlet_scheme(FILE *fplog, t_commrec *cr, t_inputrec *ir, int nstlist_cmdline, const gmx_mtop_t *mtop, matrix box, gmx_bool bUseGPU) { /* For NVE simulations, we will retain the initial list buffer */ if (EI_DYNAMICS(ir->eI) && ir->verletbuf_tol > 0 && !(EI_MD(ir->eI) && ir->etc == etcNO)) { /* Update the Verlet buffer size for the current run setup */ verletbuf_list_setup_t ls; real rlist_new; /* Here we assume SIMD-enabled kernels are being used. But as currently * calc_verlet_buffer_size gives the same results for 4x8 and 4x4 * and 4x2 gives a larger buffer than 4x4, this is ok. */ verletbuf_get_list_setup(TRUE, bUseGPU, &ls); calc_verlet_buffer_size(mtop, det(box), ir, -1, &ls, NULL, &rlist_new); if (rlist_new != ir->rlist) { if (fplog != NULL) { fprintf(fplog, "\nChanging rlist from %g to %g for non-bonded %dx%d atom kernels\n\n", ir->rlist, rlist_new, ls.cluster_size_i, ls.cluster_size_j); } ir->rlist = rlist_new; ir->rlistlong = rlist_new; } } if (nstlist_cmdline > 0 && (!EI_DYNAMICS(ir->eI) || ir->verletbuf_tol <= 0)) { gmx_fatal(FARGS, "Can not set nstlist without %s", !EI_DYNAMICS(ir->eI) ? "dynamics" : "verlet-buffer-tolerance"); } if (EI_DYNAMICS(ir->eI)) { /* Set or try nstlist values */ increase_nstlist(fplog, cr, ir, nstlist_cmdline, mtop, box, bUseGPU); } }
void calc_verlet_buffer_size(const gmx_mtop_t *mtop, real boxvol, const t_inputrec *ir, real reference_temperature, const verletbuf_list_setup_t *list_setup, int *n_nonlin_vsite, real *rlist) { double resolution; char *env; real particle_distance; real nb_clust_frac_pairs_not_in_list_at_cutoff; verletbuf_atomtype_t *att = NULL; int natt = -1, i; double reppow; real md1_ljd, d2_ljd, md3_ljd; real md1_ljr, d2_ljr, md3_ljr; real md1_el, d2_el; real elfac; real kT_fac, mass_min; int ib0, ib1, ib; real rb, rl; real drift; if (reference_temperature < 0) { if (EI_MD(ir->eI) && ir->etc == etcNO) { /* This case should be handled outside calc_verlet_buffer_size */ gmx_incons("calc_verlet_buffer_size called with an NVE ensemble and reference_temperature < 0"); } /* We use the maximum temperature with multiple T-coupl groups. * We could use a per particle temperature, but since particles * interact, this might underestimate the buffer size. */ reference_temperature = 0; for (i = 0; i < ir->opts.ngtc; i++) { if (ir->opts.tau_t[i] >= 0) { reference_temperature = max(reference_temperature, ir->opts.ref_t[i]); } } } /* Resolution of the buffer size */ resolution = 0.001; env = getenv("GMX_VERLET_BUFFER_RES"); if (env != NULL) { sscanf(env, "%lf", &resolution); } /* In an atom wise pair-list there would be no pairs in the list * beyond the pair-list cut-off. * However, we use a pair-list of groups vs groups of atoms. * For groups of 4 atoms, the parallelism of SSE instructions, only * 10% of the atoms pairs are not in the list just beyond the cut-off. * As this percentage increases slowly compared to the decrease of the * Gaussian displacement distribution over this range, we can simply * reduce the drift by this fraction. * For larger groups, e.g. of 8 atoms, this fraction will be lower, * so then buffer size will be on the conservative (large) side. * * Note that the formulas used here do not take into account * cancellation of errors which could occur by missing both * attractive and repulsive interactions. * * The only major assumption is homogeneous particle distribution. * For an inhomogeneous system, such as a liquid-vapor system, * the buffer will be underestimated. The actual energy drift * will be higher by the factor: local/homogeneous particle density. * * The results of this estimate have been checked againt simulations. * In most cases the real drift differs by less than a factor 2. */ /* Worst case assumption: HCP packing of particles gives largest distance */ particle_distance = pow(boxvol*sqrt(2)/mtop->natoms, 1.0/3.0); get_verlet_buffer_atomtypes(mtop, &att, &natt, n_nonlin_vsite); assert(att != NULL && natt >= 0); if (debug) { fprintf(debug, "particle distance assuming HCP packing: %f nm\n", particle_distance); fprintf(debug, "energy drift atom types: %d\n", natt); } reppow = mtop->ffparams.reppow; md1_ljd = 0; d2_ljd = 0; md3_ljd = 0; md1_ljr = 0; d2_ljr = 0; md3_ljr = 0; if (ir->vdwtype == evdwCUT) { real sw_range, md3_pswf; switch (ir->vdw_modifier) { case eintmodNONE: case eintmodPOTSHIFT: /* -dV/dr of -r^-6 and r^-reppow */ md1_ljd = -6*pow(ir->rvdw, -7.0); md1_ljr = reppow*pow(ir->rvdw, -(reppow+1)); /* The contribution of the higher derivatives is negligible */ break; case eintmodFORCESWITCH: /* At the cut-off: V=V'=V''=0, so we use only V''' */ md3_ljd = -md3_force_switch(6.0, ir->rvdw_switch, ir->rvdw); md3_ljr = md3_force_switch(reppow, ir->rvdw_switch, ir->rvdw); break; case eintmodPOTSWITCH: /* At the cut-off: V=V'=V''=0. * V''' is given by the original potential times * the third derivative of the switch function. */ sw_range = ir->rvdw - ir->rvdw_switch; md3_pswf = 60.0*pow(sw_range, -3.0); md3_ljd = -pow(ir->rvdw, -6.0 )*md3_pswf; md3_ljr = pow(ir->rvdw, -reppow)*md3_pswf; break; default: gmx_incons("Unimplemented VdW modifier"); } } else if (EVDW_PME(ir->vdwtype)) { real b, r, br, br2, br4, br6; b = calc_ewaldcoeff_lj(ir->rvdw, ir->ewald_rtol_lj); r = ir->rvdw; br = b*r; br2 = br*br; br4 = br2*br2; br6 = br4*br2; /* -dV/dr of g(br)*r^-6 [where g(x) = exp(-x^2)(1+x^2+x^4/2), see LJ-PME equations in manual] and r^-reppow */ md1_ljd = -exp(-br2)*(br6 + 3.0*br4 + 6.0*br2 + 6.0)*pow(r, -7.0); md1_ljr = reppow*pow(r, -(reppow+1)); /* The contribution of the higher derivatives is negligible */ } else { gmx_fatal(FARGS, "Energy drift calculation is only implemented for plain cut-off Lennard-Jones interactions"); } elfac = ONE_4PI_EPS0/ir->epsilon_r; /* Determine md=-dV/dr and dd=d^2V/dr^2 */ md1_el = 0; d2_el = 0; if (ir->coulombtype == eelCUT || EEL_RF(ir->coulombtype)) { real eps_rf, k_rf; if (ir->coulombtype == eelCUT) { eps_rf = 1; k_rf = 0; } else { eps_rf = ir->epsilon_rf/ir->epsilon_r; if (eps_rf != 0) { k_rf = pow(ir->rcoulomb, -3.0)*(eps_rf - ir->epsilon_r)/(2*eps_rf + ir->epsilon_r); } else { /* epsilon_rf = infinity */ k_rf = 0.5*pow(ir->rcoulomb, -3.0); } } if (eps_rf > 0) { md1_el = elfac*(pow(ir->rcoulomb, -2.0) - 2*k_rf*ir->rcoulomb); } d2_el = elfac*(2*pow(ir->rcoulomb, -3.0) + 2*k_rf); } else if (EEL_PME(ir->coulombtype) || ir->coulombtype == eelEWALD) { real b, rc, br; b = calc_ewaldcoeff_q(ir->rcoulomb, ir->ewald_rtol); rc = ir->rcoulomb; br = b*rc; md1_el = elfac*(b*exp(-br*br)*M_2_SQRTPI/rc + gmx_erfc(br)/(rc*rc)); d2_el = elfac/(rc*rc)*(2*b*(1 + br*br)*exp(-br*br)*M_2_SQRTPI + 2*gmx_erfc(br)/rc); } else { gmx_fatal(FARGS, "Energy drift calculation is only implemented for Reaction-Field and Ewald electrostatics"); } /* Determine the variance of the atomic displacement * over nstlist-1 steps: kT_fac * For inertial dynamics (not Brownian dynamics) the mass factor * is not included in kT_fac, it is added later. */ if (ir->eI == eiBD) { /* Get the displacement distribution from the random component only. * With accurate integration the systematic (force) displacement * should be negligible (unless nstlist is extremely large, which * you wouldn't do anyhow). */ kT_fac = 2*BOLTZ*reference_temperature*(ir->nstlist-1)*ir->delta_t; if (ir->bd_fric > 0) { /* This is directly sigma^2 of the displacement */ kT_fac /= ir->bd_fric; /* Set the masses to 1 as kT_fac is the full sigma^2, * but we divide by m in ener_drift(). */ for (i = 0; i < natt; i++) { att[i].prop.mass = 1; } } else { real tau_t; /* Per group tau_t is not implemented yet, use the maximum */ tau_t = ir->opts.tau_t[0]; for (i = 1; i < ir->opts.ngtc; i++) { tau_t = max(tau_t, ir->opts.tau_t[i]); } kT_fac *= tau_t; /* This kT_fac needs to be divided by the mass to get sigma^2 */ } } else { kT_fac = BOLTZ*reference_temperature*sqr((ir->nstlist-1)*ir->delta_t); } mass_min = att[0].prop.mass; for (i = 1; i < natt; i++) { mass_min = min(mass_min, att[i].prop.mass); } if (debug) { fprintf(debug, "md1_ljd %9.2e d2_ljd %9.2e md3_ljd %9.2e\n", md1_ljd, d2_ljd, md3_ljd); fprintf(debug, "md1_ljr %9.2e d2_ljr %9.2e md3_ljr %9.2e\n", md1_ljr, d2_ljr, md3_ljr); fprintf(debug, "md1_el %9.2e d2_el %9.2e\n", md1_el, d2_el); fprintf(debug, "sqrt(kT_fac) %f\n", sqrt(kT_fac)); fprintf(debug, "mass_min %f\n", mass_min); } /* Search using bisection */ ib0 = -1; /* The drift will be neglible at 5 times the max sigma */ ib1 = (int)(5*2*sqrt(kT_fac/mass_min)/resolution) + 1; while (ib1 - ib0 > 1) { ib = (ib0 + ib1)/2; rb = ib*resolution; rl = max(ir->rvdw, ir->rcoulomb) + rb; /* Calculate the average energy drift at the last step * of the nstlist steps at which the pair-list is used. */ drift = ener_drift(att, natt, &mtop->ffparams, kT_fac, md1_ljd, d2_ljd, md3_ljd, md1_ljr, d2_ljr, md3_ljr, md1_el, d2_el, rb, rl, boxvol); /* Correct for the fact that we are using a Ni x Nj particle pair list * and not a 1 x 1 particle pair list. This reduces the drift. */ /* We don't have a formula for 8 (yet), use 4 which is conservative */ nb_clust_frac_pairs_not_in_list_at_cutoff = surface_frac(min(list_setup->cluster_size_i, 4), particle_distance, rl)* surface_frac(min(list_setup->cluster_size_j, 4), particle_distance, rl); drift *= nb_clust_frac_pairs_not_in_list_at_cutoff; /* Convert the drift to drift per unit time per atom */ drift /= ir->nstlist*ir->delta_t*mtop->natoms; if (debug) { fprintf(debug, "ib %3d %3d %3d rb %.3f %dx%d fac %.3f drift %.1e\n", ib0, ib, ib1, rb, list_setup->cluster_size_i, list_setup->cluster_size_j, nb_clust_frac_pairs_not_in_list_at_cutoff, drift); } if (fabs(drift) > ir->verletbuf_tol) { ib0 = ib; } else { ib1 = ib; } } sfree(att); *rlist = max(ir->rvdw, ir->rcoulomb) + ib1*resolution; }
/* Try to increase nstlist when using the Verlet cut-off scheme */ static void increase_nstlist(FILE *fp, t_commrec *cr, t_inputrec *ir, int nstlist_cmdline, const gmx_mtop_t *mtop, matrix box, gmx_bool bGPU) { float listfac_ok, listfac_max; int nstlist_orig, nstlist_prev; verletbuf_list_setup_t ls; real rlistWithReferenceNstlist, rlist_inc, rlist_ok, rlist_max; real rlist_new, rlist_prev; size_t nstlist_ind = 0; t_state state_tmp; gmx_bool bBox, bDD, bCont; const char *nstl_gpu = "\nFor optimal performance with a GPU nstlist (now %d) should be larger.\nThe optimum depends on your CPU and GPU resources.\nYou might want to try several nstlist values.\n"; const char *nve_err = "Can not increase nstlist because an NVE ensemble is used"; const char *vbd_err = "Can not increase nstlist because verlet-buffer-tolerance is not set or used"; const char *box_err = "Can not increase nstlist because the box is too small"; const char *dd_err = "Can not increase nstlist because of domain decomposition limitations"; char buf[STRLEN]; const float oneThird = 1.0f / 3.0f; if (nstlist_cmdline <= 0) { if (ir->nstlist == 1) { /* The user probably set nstlist=1 for a reason, * don't mess with the settings. */ return; } if (fp != NULL && bGPU && ir->nstlist < nstlist_try[0]) { fprintf(fp, nstl_gpu, ir->nstlist); } nstlist_ind = 0; while (nstlist_ind < NNSTL && ir->nstlist >= nstlist_try[nstlist_ind]) { nstlist_ind++; } if (nstlist_ind == NNSTL) { /* There are no larger nstlist value to try */ return; } } if (EI_MD(ir->eI) && ir->etc == etcNO) { if (MASTER(cr)) { fprintf(stderr, "%s\n", nve_err); } if (fp != NULL) { fprintf(fp, "%s\n", nve_err); } return; } if (ir->verletbuf_tol == 0 && bGPU) { gmx_fatal(FARGS, "You are using an old tpr file with a GPU, please generate a new tpr file with an up to date version of grompp"); } if (ir->verletbuf_tol < 0) { if (MASTER(cr)) { fprintf(stderr, "%s\n", vbd_err); } if (fp != NULL) { fprintf(fp, "%s\n", vbd_err); } return; } if (bGPU) { listfac_ok = nbnxn_gpu_listfac_ok; listfac_max = nbnxn_gpu_listfac_max; } else { listfac_ok = nbnxn_cpu_listfac_ok; listfac_max = nbnxn_cpu_listfac_max; } nstlist_orig = ir->nstlist; if (nstlist_cmdline > 0) { if (fp) { sprintf(buf, "Getting nstlist=%d from command line option", nstlist_cmdline); } ir->nstlist = nstlist_cmdline; } verletbuf_get_list_setup(TRUE, bGPU, &ls); /* Allow rlist to make the list a given factor larger than the list * would be with the reference value for nstlist (10). */ nstlist_prev = ir->nstlist; ir->nstlist = nbnxnReferenceNstlist; calc_verlet_buffer_size(mtop, det(box), ir, -1, &ls, NULL, &rlistWithReferenceNstlist); ir->nstlist = nstlist_prev; /* Determine the pair list size increase due to zero interactions */ rlist_inc = nbnxn_get_rlist_effective_inc(ls.cluster_size_j, mtop->natoms/det(box)); rlist_ok = (rlistWithReferenceNstlist + rlist_inc)*pow(listfac_ok, oneThird) - rlist_inc; rlist_max = (rlistWithReferenceNstlist + rlist_inc)*pow(listfac_max, oneThird) - rlist_inc; if (debug) { fprintf(debug, "nstlist tuning: rlist_inc %.3f rlist_ok %.3f rlist_max %.3f\n", rlist_inc, rlist_ok, rlist_max); } nstlist_prev = nstlist_orig; rlist_prev = ir->rlist; do { if (nstlist_cmdline <= 0) { ir->nstlist = nstlist_try[nstlist_ind]; } /* Set the pair-list buffer size in ir */ calc_verlet_buffer_size(mtop, det(box), ir, -1, &ls, NULL, &rlist_new); /* Does rlist fit in the box? */ bBox = (sqr(rlist_new) < max_cutoff2(ir->ePBC, box)); bDD = TRUE; if (bBox && DOMAINDECOMP(cr)) { /* Check if rlist fits in the domain decomposition */ if (inputrec2nboundeddim(ir) < DIM) { gmx_incons("Changing nstlist with domain decomposition and unbounded dimensions is not implemented yet"); } copy_mat(box, state_tmp.box); bDD = change_dd_cutoff(cr, &state_tmp, ir, rlist_new); } if (debug) { fprintf(debug, "nstlist %d rlist %.3f bBox %d bDD %d\n", ir->nstlist, rlist_new, bBox, bDD); } bCont = FALSE; if (nstlist_cmdline <= 0) { if (bBox && bDD && rlist_new <= rlist_max) { /* Increase nstlist */ nstlist_prev = ir->nstlist; rlist_prev = rlist_new; bCont = (nstlist_ind+1 < NNSTL && rlist_new < rlist_ok); } else { /* Stick with the previous nstlist */ ir->nstlist = nstlist_prev; rlist_new = rlist_prev; bBox = TRUE; bDD = TRUE; } } nstlist_ind++; } while (bCont); if (!bBox || !bDD) { gmx_warning(!bBox ? box_err : dd_err); if (fp != NULL) { fprintf(fp, "\n%s\n", bBox ? box_err : dd_err); } ir->nstlist = nstlist_orig; } else if (ir->nstlist != nstlist_orig || rlist_new != ir->rlist) { sprintf(buf, "Changing nstlist from %d to %d, rlist from %g to %g", nstlist_orig, ir->nstlist, ir->rlist, rlist_new); if (MASTER(cr)) { fprintf(stderr, "%s\n\n", buf); } if (fp != NULL) { fprintf(fp, "%s\n\n", buf); } ir->rlist = rlist_new; ir->rlistlong = rlist_new; } }