static void pull_set_pbcatom(t_commrec *cr, t_pull_group *pgrp, t_mdatoms *md, rvec *x, rvec x_pbc) { int a, m; if (cr && PAR(cr)) { if (DOMAINDECOMP(cr)) { if (!ga2la_get_home(cr->dd->ga2la, pgrp->pbcatom, &a)) { a = -1; } } else { a = pgrp->pbcatom; } if (a >= 0 && a < md->homenr) { copy_rvec(x[a], x_pbc); } else { clear_rvec(x_pbc); } } else { copy_rvec(x[pgrp->pbcatom], x_pbc); } }
static void write_constr_pdb(const char *fn, const char *title, gmx_mtop_t *mtop, int start, int homenr, t_commrec *cr, rvec x[], matrix box) { char fname[STRLEN], format[STRLEN]; FILE *out; int dd_ac0 = 0, dd_ac1 = 0, i, ii, resnr; gmx_domdec_t *dd; char *anm, *resnm; dd = NULL; if (DOMAINDECOMP(cr)) { dd = cr->dd; dd_get_constraint_range(dd, &dd_ac0, &dd_ac1); start = 0; homenr = dd_ac1; } if (PAR(cr)) { sprintf(fname, "%s_n%d.pdb", fn, cr->sim_nodeid); } else { sprintf(fname, "%s.pdb", fn); } sprintf(format, "%s\n", get_pdbformat()); out = gmx_fio_fopen(fname, "w"); fprintf(out, "TITLE %s\n", title); gmx_write_pdb_box(out, -1, box); for (i = start; i < start+homenr; i++) { if (dd != NULL) { if (i >= dd->nat_home && i < dd_ac0) { continue; } ii = dd->gatindex[i]; } else { ii = i; } gmx_mtop_atominfo_global(mtop, ii, &anm, &resnr, &resnm); fprintf(out, format, "ATOM", (ii+1)%100000, anm, resnm, ' ', resnr%10000, ' ', 10*x[i][XX], 10*x[i][YY], 10*x[i][ZZ]); } fprintf(out, "TER\n"); gmx_fio_fclose(out); }
void make_local_shells(t_commrec *cr,t_mdatoms *md, struct gmx_shellfc *shfc) { t_shell *shell; int a0,a1,*ind,nshell,i; gmx_domdec_t *dd=NULL; if (PAR(cr)) { if (DOMAINDECOMP(cr)) { dd = cr->dd; a0 = 0; a1 = dd->nat_home; } else { pd_at_range(cr,&a0,&a1); } } else { /* Single node: we need all shells, just copy the pointer */ shfc->nshell = shfc->nshell_gl; shfc->shell = shfc->shell_gl; return; } ind = shfc->shell_index_gl; nshell = 0; shell = shfc->shell; for(i=a0; i<a1; i++) { if (md->ptype[i] == eptShell) { if (nshell+1 > shfc->shell_nalloc) { shfc->shell_nalloc = over_alloc_dd(nshell+1); srenew(shell,shfc->shell_nalloc); } if (dd) { shell[nshell] = shfc->shell_gl[ind[dd->gatindex[i]]]; } else { shell[nshell] = shfc->shell_gl[ind[i]]; } /* With inter-cg shells we can no do shell prediction, * so we do not need the nuclei numbers. */ if (!shfc->bInterCG) { shell[nshell].nucl1 = i + shell[nshell].nucl1 - shell[nshell].shell; if (shell[nshell].nnucl > 1) shell[nshell].nucl2 = i + shell[nshell].nucl2 - shell[nshell].shell; if (shell[nshell].nnucl > 2) shell[nshell].nucl3 = i + shell[nshell].nucl3 - shell[nshell].shell; } shell[nshell].shell = i; nshell++; } } shfc->nshell = nshell; shfc->shell = shell; }
void do_redist_pos_coeffs(struct gmx_pme_t *pme, t_commrec *cr, int start, int homenr, gmx_bool bFirst, rvec x[], real *data) { int d; pme_atomcomm_t *atc; atc = &pme->atc[0]; for (d = pme->ndecompdim - 1; d >= 0; d--) { int n_d; rvec *x_d; real *param_d; if (d == pme->ndecompdim - 1) { n_d = homenr; x_d = x + start; param_d = data; } else { n_d = pme->atc[d + 1].n; x_d = atc->x; param_d = atc->coefficient; } atc = &pme->atc[d]; atc->npd = n_d; if (atc->npd > atc->pd_nalloc) { atc->pd_nalloc = over_alloc_dd(atc->npd); srenew(atc->pd, atc->pd_nalloc); } pme_calc_pidx_wrapper(n_d, pme->recipbox, x_d, atc); where(); /* Redistribute x (only once) and qA/c6A or qB/c6B */ if (DOMAINDECOMP(cr)) { dd_pmeredist_pos_coeffs(pme, n_d, bFirst, x_d, param_d, atc); } } }
real pull_potential(int ePull,t_pull *pull, t_mdatoms *md, t_pbc *pbc, t_commrec *cr, double t, real lambda, rvec *x, rvec *f, tensor vir, real *dvdlambda) { real V,dVdl; pull_calc_coms(cr,pull,md,pbc,t,x,NULL); do_pull_pot(ePull,pull,pbc,t,lambda, &V,pull->bVirial && MASTER(cr) ? vir : NULL,&dVdl); /* Distribute forces over pulled groups */ apply_forces(pull, md, DOMAINDECOMP(cr) ? cr->dd->ga2la : NULL, f); if (MASTER(cr)) { *dvdlambda += dVdl; } return (MASTER(cr) ? V : 0.0); }
static void pull_set_pbcatom(t_commrec *cr, pull_group_work_t *pgrp, rvec *x, rvec x_pbc) { int a; if (cr != NULL && DOMAINDECOMP(cr)) { if (ga2la_get_home(cr->dd->ga2la, pgrp->params.pbcatom, &a)) { copy_rvec(x[a], x_pbc); } else { clear_rvec(x_pbc); } } else { copy_rvec(x[pgrp->params.pbcatom], x_pbc); } }
void do_force_lowlevel(FILE *fplog, gmx_large_int_t step, t_forcerec *fr, t_inputrec *ir, t_idef *idef, t_commrec *cr, t_nrnb *nrnb, gmx_wallcycle_t wcycle, t_mdatoms *md, t_grpopts *opts, rvec x[], history_t *hist, rvec f[], rvec f_longrange[], gmx_enerdata_t *enerd, t_fcdata *fcd, gmx_mtop_t *mtop, gmx_localtop_t *top, gmx_genborn_t *born, t_atomtypes *atype, gmx_bool bBornRadii, matrix box, t_lambda *fepvals, real *lambda, t_graph *graph, t_blocka *excl, rvec mu_tot[], int flags, float *cycles_pme) { int i, j, status; int donb_flags; gmx_bool bDoEpot, bSepDVDL, bSB; int pme_flags; matrix boxs; rvec box_size; real Vsr, Vlr, Vcorr = 0; t_pbc pbc; real dvdgb; char buf[22]; double clam_i, vlam_i; real dvdl_dum[efptNR], dvdl, dvdl_nb[efptNR], lam_i[efptNR]; real dvdlsum; #ifdef GMX_MPI double t0 = 0.0, t1, t2, t3; /* time measurement for coarse load balancing */ #endif #define PRINT_SEPDVDL(s, v, dvdlambda) if (bSepDVDL) {fprintf(fplog, sepdvdlformat, s, v, dvdlambda); } GMX_MPE_LOG(ev_force_start); set_pbc(&pbc, fr->ePBC, box); /* reset free energy components */ for (i = 0; i < efptNR; i++) { dvdl_nb[i] = 0; dvdl_dum[i] = 0; } /* Reset box */ for (i = 0; (i < DIM); i++) { box_size[i] = box[i][i]; } bSepDVDL = (fr->bSepDVDL && do_per_step(step, ir->nstlog)); debug_gmx(); /* do QMMM first if requested */ if (fr->bQMMM) { enerd->term[F_EQM] = calculate_QMMM(cr, x, f, fr, md); } if (bSepDVDL) { fprintf(fplog, "Step %s: non-bonded V and dVdl for node %d:\n", gmx_step_str(step, buf), cr->nodeid); } /* Call the short range functions all in one go. */ GMX_MPE_LOG(ev_do_fnbf_start); #ifdef GMX_MPI /*#define TAKETIME ((cr->npmenodes) && (fr->timesteps < 12))*/ #define TAKETIME FALSE if (TAKETIME) { MPI_Barrier(cr->mpi_comm_mygroup); t0 = MPI_Wtime(); } #endif if (ir->nwall) { /* foreign lambda component for walls */ dvdl = do_walls(ir, fr, box, md, x, f, lambda[efptVDW], enerd->grpp.ener[egLJSR], nrnb); PRINT_SEPDVDL("Walls", 0.0, dvdl); enerd->dvdl_lin[efptVDW] += dvdl; } /* If doing GB, reset dvda and calculate the Born radii */ if (ir->implicit_solvent) { wallcycle_sub_start(wcycle, ewcsNONBONDED); for (i = 0; i < born->nr; i++) { fr->dvda[i] = 0; } if (bBornRadii) { calc_gb_rad(cr, fr, ir, top, atype, x, &(fr->gblist), born, md, nrnb); } wallcycle_sub_stop(wcycle, ewcsNONBONDED); } where(); /* We only do non-bonded calculation with group scheme here, the verlet * calls are done from do_force_cutsVERLET(). */ if (fr->cutoff_scheme == ecutsGROUP && (flags & GMX_FORCE_NONBONDED)) { donb_flags = 0; /* Add short-range interactions */ donb_flags |= GMX_NONBONDED_DO_SR; if (flags & GMX_FORCE_FORCES) { donb_flags |= GMX_NONBONDED_DO_FORCE; } if (flags & GMX_FORCE_ENERGY) { donb_flags |= GMX_NONBONDED_DO_POTENTIAL; } if (flags & GMX_FORCE_DO_LR) { donb_flags |= GMX_NONBONDED_DO_LR; } wallcycle_sub_start(wcycle, ewcsNONBONDED); do_nonbonded(cr, fr, x, f, f_longrange, md, excl, &enerd->grpp, box_size, nrnb, lambda, dvdl_nb, -1, -1, donb_flags); /* If we do foreign lambda and we have soft-core interactions * we have to recalculate the (non-linear) energies contributions. */ if (fepvals->n_lambda > 0 && (flags & GMX_FORCE_DHDL) && fepvals->sc_alpha != 0) { for (i = 0; i < enerd->n_lambda; i++) { for (j = 0; j < efptNR; j++) { lam_i[j] = (i == 0 ? lambda[j] : fepvals->all_lambda[j][i-1]); } reset_foreign_enerdata(enerd); do_nonbonded(cr, fr, x, f, f_longrange, md, excl, &(enerd->foreign_grpp), box_size, nrnb, lam_i, dvdl_dum, -1, -1, (donb_flags & ~GMX_NONBONDED_DO_FORCE) | GMX_NONBONDED_DO_FOREIGNLAMBDA); sum_epot(&ir->opts, &(enerd->foreign_grpp), enerd->foreign_term); enerd->enerpart_lambda[i] += enerd->foreign_term[F_EPOT]; } } wallcycle_sub_stop(wcycle, ewcsNONBONDED); where(); } /* If we are doing GB, calculate bonded forces and apply corrections * to the solvation forces */ /* MRS: Eventually, many need to include free energy contribution here! */ if (ir->implicit_solvent) { wallcycle_sub_start(wcycle, ewcsBONDED); calc_gb_forces(cr, md, born, top, atype, x, f, fr, idef, ir->gb_algorithm, ir->sa_algorithm, nrnb, bBornRadii, &pbc, graph, enerd); wallcycle_sub_stop(wcycle, ewcsBONDED); } #ifdef GMX_MPI if (TAKETIME) { t1 = MPI_Wtime(); fr->t_fnbf += t1-t0; } #endif if (fepvals->sc_alpha != 0) { enerd->dvdl_nonlin[efptVDW] += dvdl_nb[efptVDW]; } else { enerd->dvdl_lin[efptVDW] += dvdl_nb[efptVDW]; } if (fepvals->sc_alpha != 0) /* even though coulomb part is linear, we already added it, beacuse we need to go through the vdw calculation anyway */ { enerd->dvdl_nonlin[efptCOUL] += dvdl_nb[efptCOUL]; } else { enerd->dvdl_lin[efptCOUL] += dvdl_nb[efptCOUL]; } Vsr = 0; if (bSepDVDL) { for (i = 0; i < enerd->grpp.nener; i++) { Vsr += (fr->bBHAM ? enerd->grpp.ener[egBHAMSR][i] : enerd->grpp.ener[egLJSR][i]) + enerd->grpp.ener[egCOULSR][i] + enerd->grpp.ener[egGB][i]; } dvdlsum = dvdl_nb[efptVDW] + dvdl_nb[efptCOUL]; PRINT_SEPDVDL("VdW and Coulomb SR particle-p.", Vsr, dvdlsum); } debug_gmx(); GMX_MPE_LOG(ev_do_fnbf_finish); if (debug) { pr_rvecs(debug, 0, "fshift after SR", fr->fshift, SHIFTS); } /* Shift the coordinates. Must be done before bonded forces and PPPM, * but is also necessary for SHAKE and update, therefore it can NOT * go when no bonded forces have to be evaluated. */ /* Here sometimes we would not need to shift with NBFonly, * but we do so anyhow for consistency of the returned coordinates. */ if (graph) { shift_self(graph, box, x); if (TRICLINIC(box)) { inc_nrnb(nrnb, eNR_SHIFTX, 2*graph->nnodes); } else { inc_nrnb(nrnb, eNR_SHIFTX, graph->nnodes); } } /* Check whether we need to do bondeds or correct for exclusions */ if (fr->bMolPBC && ((flags & GMX_FORCE_BONDED) || EEL_RF(fr->eeltype) || EEL_FULL(fr->eeltype))) { /* Since all atoms are in the rectangular or triclinic unit-cell, * only single box vector shifts (2 in x) are required. */ set_pbc_dd(&pbc, fr->ePBC, cr->dd, TRUE, box); } debug_gmx(); if (flags & GMX_FORCE_BONDED) { GMX_MPE_LOG(ev_calc_bonds_start); wallcycle_sub_start(wcycle, ewcsBONDED); calc_bonds(fplog, cr->ms, idef, x, hist, f, fr, &pbc, graph, enerd, nrnb, lambda, md, fcd, DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL, atype, born, flags, fr->bSepDVDL && do_per_step(step, ir->nstlog), step); /* Check if we have to determine energy differences * at foreign lambda's. */ if (fepvals->n_lambda > 0 && (flags & GMX_FORCE_DHDL) && idef->ilsort != ilsortNO_FE) { if (idef->ilsort != ilsortFE_SORTED) { gmx_incons("The bonded interactions are not sorted for free energy"); } for (i = 0; i < enerd->n_lambda; i++) { reset_foreign_enerdata(enerd); for (j = 0; j < efptNR; j++) { lam_i[j] = (i == 0 ? lambda[j] : fepvals->all_lambda[j][i-1]); } calc_bonds_lambda(fplog, idef, x, fr, &pbc, graph, &(enerd->foreign_grpp), enerd->foreign_term, nrnb, lam_i, md, fcd, DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL); sum_epot(&ir->opts, &(enerd->foreign_grpp), enerd->foreign_term); enerd->enerpart_lambda[i] += enerd->foreign_term[F_EPOT]; } } debug_gmx(); GMX_MPE_LOG(ev_calc_bonds_finish); wallcycle_sub_stop(wcycle, ewcsBONDED); } where(); *cycles_pme = 0; if (EEL_FULL(fr->eeltype)) { bSB = (ir->nwall == 2); if (bSB) { copy_mat(box, boxs); svmul(ir->wall_ewald_zfac, boxs[ZZ], boxs[ZZ]); box_size[ZZ] *= ir->wall_ewald_zfac; } clear_mat(fr->vir_el_recip); if (fr->bEwald) { Vcorr = 0; dvdl = 0; /* With the Verlet scheme exclusion forces are calculated * in the non-bonded kernel. */ /* The TPI molecule does not have exclusions with the rest * of the system and no intra-molecular PME grid contributions * will be calculated in gmx_pme_calc_energy. */ if ((ir->cutoff_scheme == ecutsGROUP && fr->n_tpi == 0) || ir->ewald_geometry != eewg3D || ir->epsilon_surface != 0) { int nthreads, t; wallcycle_sub_start(wcycle, ewcsEWALD_CORRECTION); if (fr->n_tpi > 0) { gmx_fatal(FARGS, "TPI with PME currently only works in a 3D geometry with tin-foil boundary conditions"); } nthreads = gmx_omp_nthreads_get(emntBonded); #pragma omp parallel for num_threads(nthreads) schedule(static) for (t = 0; t < nthreads; t++) { int s, e, i; rvec *fnv; tensor *vir; real *Vcorrt, *dvdlt; if (t == 0) { fnv = fr->f_novirsum; vir = &fr->vir_el_recip; Vcorrt = &Vcorr; dvdlt = &dvdl; } else { fnv = fr->f_t[t].f; vir = &fr->f_t[t].vir; Vcorrt = &fr->f_t[t].Vcorr; dvdlt = &fr->f_t[t].dvdl[efptCOUL]; for (i = 0; i < fr->natoms_force; i++) { clear_rvec(fnv[i]); } clear_mat(*vir); } *dvdlt = 0; *Vcorrt = ewald_LRcorrection(fplog, fr->excl_load[t], fr->excl_load[t+1], cr, t, fr, md->chargeA, md->nChargePerturbed ? md->chargeB : NULL, ir->cutoff_scheme != ecutsVERLET, excl, x, bSB ? boxs : box, mu_tot, ir->ewald_geometry, ir->epsilon_surface, fnv, *vir, lambda[efptCOUL], dvdlt); } if (nthreads > 1) { reduce_thread_forces(fr->natoms_force, fr->f_novirsum, fr->vir_el_recip, &Vcorr, efptCOUL, &dvdl, nthreads, fr->f_t); } wallcycle_sub_stop(wcycle, ewcsEWALD_CORRECTION); } if (fr->n_tpi == 0) { Vcorr += ewald_charge_correction(cr, fr, lambda[efptCOUL], box, &dvdl, fr->vir_el_recip); } PRINT_SEPDVDL("Ewald excl./charge/dip. corr.", Vcorr, dvdl); enerd->dvdl_lin[efptCOUL] += dvdl; } status = 0; Vlr = 0; dvdl = 0; switch (fr->eeltype) { case eelPME: case eelPMESWITCH: case eelPMEUSER: case eelPMEUSERSWITCH: case eelP3M_AD: if (cr->duty & DUTY_PME) { assert(fr->n_tpi >= 0); if (fr->n_tpi == 0 || (flags & GMX_FORCE_STATECHANGED)) { pme_flags = GMX_PME_SPREAD_Q | GMX_PME_SOLVE; if (flags & GMX_FORCE_FORCES) { pme_flags |= GMX_PME_CALC_F; } if (flags & (GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY)) { pme_flags |= GMX_PME_CALC_ENER_VIR; } if (fr->n_tpi > 0) { /* We don't calculate f, but we do want the potential */ pme_flags |= GMX_PME_CALC_POT; } wallcycle_start(wcycle, ewcPMEMESH); status = gmx_pme_do(fr->pmedata, md->start, md->homenr - fr->n_tpi, x, fr->f_novirsum, md->chargeA, md->chargeB, bSB ? boxs : box, cr, DOMAINDECOMP(cr) ? dd_pme_maxshift_x(cr->dd) : 0, DOMAINDECOMP(cr) ? dd_pme_maxshift_y(cr->dd) : 0, nrnb, wcycle, fr->vir_el_recip, fr->ewaldcoeff, &Vlr, lambda[efptCOUL], &dvdl, pme_flags); *cycles_pme = wallcycle_stop(wcycle, ewcPMEMESH); /* We should try to do as little computation after * this as possible, because parallel PME synchronizes * the nodes, so we want all load imbalance of the rest * of the force calculation to be before the PME call. * DD load balancing is done on the whole time of * the force call (without PME). */ } if (fr->n_tpi > 0) { /* Determine the PME grid energy of the test molecule * with the PME grid potential of the other charges. */ gmx_pme_calc_energy(fr->pmedata, fr->n_tpi, x + md->homenr - fr->n_tpi, md->chargeA + md->homenr - fr->n_tpi, &Vlr); } PRINT_SEPDVDL("PME mesh", Vlr, dvdl); } break; case eelEWALD: Vlr = do_ewald(fplog, FALSE, ir, x, fr->f_novirsum, md->chargeA, md->chargeB, box_size, cr, md->homenr, fr->vir_el_recip, fr->ewaldcoeff, lambda[efptCOUL], &dvdl, fr->ewald_table); PRINT_SEPDVDL("Ewald long-range", Vlr, dvdl); break; default: gmx_fatal(FARGS, "No such electrostatics method implemented %s", eel_names[fr->eeltype]); } if (status != 0) { gmx_fatal(FARGS, "Error %d in long range electrostatics routine %s", status, EELTYPE(fr->eeltype)); } /* Note that with separate PME nodes we get the real energies later */ enerd->dvdl_lin[efptCOUL] += dvdl; enerd->term[F_COUL_RECIP] = Vlr + Vcorr; if (debug) { fprintf(debug, "Vlr = %g, Vcorr = %g, Vlr_corr = %g\n", Vlr, Vcorr, enerd->term[F_COUL_RECIP]); pr_rvecs(debug, 0, "vir_el_recip after corr", fr->vir_el_recip, DIM); pr_rvecs(debug, 0, "fshift after LR Corrections", fr->fshift, SHIFTS); } } else { if (EEL_RF(fr->eeltype)) { /* With the Verlet scheme exclusion forces are calculated * in the non-bonded kernel. */ if (ir->cutoff_scheme != ecutsVERLET && fr->eeltype != eelRF_NEC) { dvdl = 0; enerd->term[F_RF_EXCL] = RF_excl_correction(fplog, fr, graph, md, excl, x, f, fr->fshift, &pbc, lambda[efptCOUL], &dvdl); } enerd->dvdl_lin[efptCOUL] += dvdl; PRINT_SEPDVDL("RF exclusion correction", enerd->term[F_RF_EXCL], dvdl); } } where(); debug_gmx(); if (debug) { print_nrnb(debug, nrnb); } debug_gmx(); #ifdef GMX_MPI if (TAKETIME) { t2 = MPI_Wtime(); MPI_Barrier(cr->mpi_comm_mygroup); t3 = MPI_Wtime(); fr->t_wait += t3-t2; if (fr->timesteps == 11) { fprintf(stderr, "* PP load balancing info: node %d, step %s, rel wait time=%3.0f%% , load string value: %7.2f\n", cr->nodeid, gmx_step_str(fr->timesteps, buf), 100*fr->t_wait/(fr->t_wait+fr->t_fnbf), (fr->t_fnbf+fr->t_wait)/fr->t_fnbf); } fr->timesteps++; } #endif if (debug) { pr_rvecs(debug, 0, "fshift after bondeds", fr->fshift, SHIFTS); } GMX_MPE_LOG(ev_force_finish); }
void do_force_lowlevel(t_forcerec *fr, t_inputrec *ir, t_idef *idef, t_commrec *cr, t_nrnb *nrnb, gmx_wallcycle_t wcycle, t_mdatoms *md, rvec x[], history_t *hist, rvec f[], rvec f_longrange[], gmx_enerdata_t *enerd, t_fcdata *fcd, gmx_localtop_t *top, gmx_genborn_t *born, gmx_bool bBornRadii, matrix box, t_lambda *fepvals, real *lambda, t_graph *graph, t_blocka *excl, rvec mu_tot[], int flags, float *cycles_pme) { int i, j; int donb_flags; gmx_bool bSB; int pme_flags; matrix boxs; rvec box_size; t_pbc pbc; real dvdl_dum[efptNR], dvdl_nb[efptNR]; #ifdef GMX_MPI double t0 = 0.0, t1, t2, t3; /* time measurement for coarse load balancing */ #endif set_pbc(&pbc, fr->ePBC, box); /* reset free energy components */ for (i = 0; i < efptNR; i++) { dvdl_nb[i] = 0; dvdl_dum[i] = 0; } /* Reset box */ for (i = 0; (i < DIM); i++) { box_size[i] = box[i][i]; } debug_gmx(); /* do QMMM first if requested */ if (fr->bQMMM) { enerd->term[F_EQM] = calculate_QMMM(cr, x, f, fr); } /* Call the short range functions all in one go. */ #ifdef GMX_MPI /*#define TAKETIME ((cr->npmenodes) && (fr->timesteps < 12))*/ #define TAKETIME FALSE if (TAKETIME) { MPI_Barrier(cr->mpi_comm_mygroup); t0 = MPI_Wtime(); } #endif if (ir->nwall) { /* foreign lambda component for walls */ real dvdl_walls = do_walls(ir, fr, box, md, x, f, lambda[efptVDW], enerd->grpp.ener[egLJSR], nrnb); enerd->dvdl_lin[efptVDW] += dvdl_walls; } /* If doing GB, reset dvda and calculate the Born radii */ if (ir->implicit_solvent) { wallcycle_sub_start(wcycle, ewcsNONBONDED); for (i = 0; i < born->nr; i++) { fr->dvda[i] = 0; } if (bBornRadii) { calc_gb_rad(cr, fr, ir, top, x, &(fr->gblist), born, md, nrnb); } wallcycle_sub_stop(wcycle, ewcsNONBONDED); } where(); /* We only do non-bonded calculation with group scheme here, the verlet * calls are done from do_force_cutsVERLET(). */ if (fr->cutoff_scheme == ecutsGROUP && (flags & GMX_FORCE_NONBONDED)) { donb_flags = 0; /* Add short-range interactions */ donb_flags |= GMX_NONBONDED_DO_SR; /* Currently all group scheme kernels always calculate (shift-)forces */ if (flags & GMX_FORCE_FORCES) { donb_flags |= GMX_NONBONDED_DO_FORCE; } if (flags & GMX_FORCE_VIRIAL) { donb_flags |= GMX_NONBONDED_DO_SHIFTFORCE; } if (flags & GMX_FORCE_ENERGY) { donb_flags |= GMX_NONBONDED_DO_POTENTIAL; } if (flags & GMX_FORCE_DO_LR) { donb_flags |= GMX_NONBONDED_DO_LR; } wallcycle_sub_start(wcycle, ewcsNONBONDED); do_nonbonded(fr, x, f, f_longrange, md, excl, &enerd->grpp, nrnb, lambda, dvdl_nb, -1, -1, donb_flags); /* If we do foreign lambda and we have soft-core interactions * we have to recalculate the (non-linear) energies contributions. */ if (fepvals->n_lambda > 0 && (flags & GMX_FORCE_DHDL) && fepvals->sc_alpha != 0) { for (i = 0; i < enerd->n_lambda; i++) { real lam_i[efptNR]; for (j = 0; j < efptNR; j++) { lam_i[j] = (i == 0 ? lambda[j] : fepvals->all_lambda[j][i-1]); } reset_foreign_enerdata(enerd); do_nonbonded(fr, x, f, f_longrange, md, excl, &(enerd->foreign_grpp), nrnb, lam_i, dvdl_dum, -1, -1, (donb_flags & ~GMX_NONBONDED_DO_FORCE) | GMX_NONBONDED_DO_FOREIGNLAMBDA); sum_epot(&(enerd->foreign_grpp), enerd->foreign_term); enerd->enerpart_lambda[i] += enerd->foreign_term[F_EPOT]; } } wallcycle_sub_stop(wcycle, ewcsNONBONDED); where(); } /* If we are doing GB, calculate bonded forces and apply corrections * to the solvation forces */ /* MRS: Eventually, many need to include free energy contribution here! */ if (ir->implicit_solvent) { wallcycle_sub_start(wcycle, ewcsLISTED); calc_gb_forces(cr, md, born, top, x, f, fr, idef, ir->gb_algorithm, ir->sa_algorithm, nrnb, &pbc, graph, enerd); wallcycle_sub_stop(wcycle, ewcsLISTED); } #ifdef GMX_MPI if (TAKETIME) { t1 = MPI_Wtime(); fr->t_fnbf += t1-t0; } #endif if (fepvals->sc_alpha != 0) { enerd->dvdl_nonlin[efptVDW] += dvdl_nb[efptVDW]; } else { enerd->dvdl_lin[efptVDW] += dvdl_nb[efptVDW]; } if (fepvals->sc_alpha != 0) /* even though coulomb part is linear, we already added it, beacuse we need to go through the vdw calculation anyway */ { enerd->dvdl_nonlin[efptCOUL] += dvdl_nb[efptCOUL]; } else { enerd->dvdl_lin[efptCOUL] += dvdl_nb[efptCOUL]; } debug_gmx(); if (debug) { pr_rvecs(debug, 0, "fshift after SR", fr->fshift, SHIFTS); } /* Shift the coordinates. Must be done before listed forces and PPPM, * but is also necessary for SHAKE and update, therefore it can NOT * go when no listed forces have to be evaluated. * * The shifting and PBC code is deliberately not timed, since with * the Verlet scheme it only takes non-zero time with triclinic * boxes, and even then the time is around a factor of 100 less * than the next smallest counter. */ /* Here sometimes we would not need to shift with NBFonly, * but we do so anyhow for consistency of the returned coordinates. */ if (graph) { shift_self(graph, box, x); if (TRICLINIC(box)) { inc_nrnb(nrnb, eNR_SHIFTX, 2*graph->nnodes); } else { inc_nrnb(nrnb, eNR_SHIFTX, graph->nnodes); } } /* Check whether we need to do listed interactions or correct for exclusions */ if (fr->bMolPBC && ((flags & GMX_FORCE_LISTED) || EEL_RF(fr->eeltype) || EEL_FULL(fr->eeltype) || EVDW_PME(fr->vdwtype))) { /* TODO There are no electrostatics methods that require this transformation, when using the Verlet scheme, so update the above conditional. */ /* Since all atoms are in the rectangular or triclinic unit-cell, * only single box vector shifts (2 in x) are required. */ set_pbc_dd(&pbc, fr->ePBC, cr->dd, TRUE, box); } debug_gmx(); do_force_listed(wcycle, box, ir->fepvals, cr->ms, idef, (const rvec *) x, hist, f, fr, &pbc, graph, enerd, nrnb, lambda, md, fcd, DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL, flags); where(); *cycles_pme = 0; clear_mat(fr->vir_el_recip); clear_mat(fr->vir_lj_recip); /* Do long-range electrostatics and/or LJ-PME, including related short-range * corrections. */ if (EEL_FULL(fr->eeltype) || EVDW_PME(fr->vdwtype)) { int status = 0; real Vlr_q = 0, Vlr_lj = 0, Vcorr_q = 0, Vcorr_lj = 0; real dvdl_long_range_q = 0, dvdl_long_range_lj = 0; bSB = (ir->nwall == 2); if (bSB) { copy_mat(box, boxs); svmul(ir->wall_ewald_zfac, boxs[ZZ], boxs[ZZ]); box_size[ZZ] *= ir->wall_ewald_zfac; } if (EEL_PME_EWALD(fr->eeltype) || EVDW_PME(fr->vdwtype)) { real dvdl_long_range_correction_q = 0; real dvdl_long_range_correction_lj = 0; /* With the Verlet scheme exclusion forces are calculated * in the non-bonded kernel. */ /* The TPI molecule does not have exclusions with the rest * of the system and no intra-molecular PME grid * contributions will be calculated in * gmx_pme_calc_energy. */ if ((ir->cutoff_scheme == ecutsGROUP && fr->n_tpi == 0) || ir->ewald_geometry != eewg3D || ir->epsilon_surface != 0) { int nthreads, t; wallcycle_sub_start(wcycle, ewcsEWALD_CORRECTION); if (fr->n_tpi > 0) { gmx_fatal(FARGS, "TPI with PME currently only works in a 3D geometry with tin-foil boundary conditions"); } nthreads = fr->nthread_ewc; #pragma omp parallel for num_threads(nthreads) schedule(static) for (t = 0; t < nthreads; t++) { try { tensor *vir_q, *vir_lj; real *Vcorrt_q, *Vcorrt_lj, *dvdlt_q, *dvdlt_lj; if (t == 0) { vir_q = &fr->vir_el_recip; vir_lj = &fr->vir_lj_recip; Vcorrt_q = &Vcorr_q; Vcorrt_lj = &Vcorr_lj; dvdlt_q = &dvdl_long_range_correction_q; dvdlt_lj = &dvdl_long_range_correction_lj; } else { vir_q = &fr->ewc_t[t].vir_q; vir_lj = &fr->ewc_t[t].vir_lj; Vcorrt_q = &fr->ewc_t[t].Vcorr_q; Vcorrt_lj = &fr->ewc_t[t].Vcorr_lj; dvdlt_q = &fr->ewc_t[t].dvdl[efptCOUL]; dvdlt_lj = &fr->ewc_t[t].dvdl[efptVDW]; clear_mat(*vir_q); clear_mat(*vir_lj); } *dvdlt_q = 0; *dvdlt_lj = 0; /* Threading is only supported with the Verlet cut-off * scheme and then only single particle forces (no * exclusion forces) are calculated, so we can store * the forces in the normal, single fr->f_novirsum array. */ ewald_LRcorrection(fr->excl_load[t], fr->excl_load[t+1], cr, t, fr, md->chargeA, md->chargeB, md->sqrt_c6A, md->sqrt_c6B, md->sigmaA, md->sigmaB, md->sigma3A, md->sigma3B, md->nChargePerturbed || md->nTypePerturbed, ir->cutoff_scheme != ecutsVERLET, excl, x, bSB ? boxs : box, mu_tot, ir->ewald_geometry, ir->epsilon_surface, fr->f_novirsum, *vir_q, *vir_lj, Vcorrt_q, Vcorrt_lj, lambda[efptCOUL], lambda[efptVDW], dvdlt_q, dvdlt_lj); } GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; } if (nthreads > 1) { reduce_thread_energies(fr->vir_el_recip, fr->vir_lj_recip, &Vcorr_q, &Vcorr_lj, &dvdl_long_range_correction_q, &dvdl_long_range_correction_lj, nthreads, fr->ewc_t); } wallcycle_sub_stop(wcycle, ewcsEWALD_CORRECTION); }
void mdoutf_write_to_trajectory_files(FILE *fplog, t_commrec *cr, gmx_mdoutf_t of, int mdof_flags, gmx_mtop_t *top_global, gmx_int64_t step, double t, t_state *state_local, t_state *state_global, rvec *f_local, rvec *f_global) { rvec *local_v; rvec *global_v; /* MRS -- defining these variables is to manage the difference * between half step and full step velocities, but there must be a better way . . . */ local_v = state_local->v; global_v = state_global->v; if (DOMAINDECOMP(cr)) { if (mdof_flags & MDOF_CPT) { dd_collect_state(cr->dd, state_local, state_global); } else { if (mdof_flags & (MDOF_X | MDOF_X_COMPRESSED)) { dd_collect_vec(cr->dd, state_local, state_local->x, state_global->x); } if (mdof_flags & MDOF_V) { dd_collect_vec(cr->dd, state_local, local_v, global_v); } } if (mdof_flags & MDOF_F) { dd_collect_vec(cr->dd, state_local, f_local, f_global); } } else { if (mdof_flags & MDOF_CPT) { /* All pointers in state_local are equal to state_global, * but we need to copy the non-pointer entries. */ state_global->lambda = state_local->lambda; state_global->veta = state_local->veta; state_global->vol0 = state_local->vol0; copy_mat(state_local->box, state_global->box); copy_mat(state_local->boxv, state_global->boxv); copy_mat(state_local->svir_prev, state_global->svir_prev); copy_mat(state_local->fvir_prev, state_global->fvir_prev); copy_mat(state_local->pres_prev, state_global->pres_prev); } } if (MASTER(cr)) { if (mdof_flags & MDOF_CPT) { fflush_tng(of->tng); fflush_tng(of->tng_low_prec); write_checkpoint(of->fn_cpt, of->bKeepAndNumCPT, fplog, cr, of->eIntegrator, of->simulation_part, of->bExpanded, of->elamstats, step, t, state_global); } if (mdof_flags & (MDOF_X | MDOF_V | MDOF_F)) { if (of->fp_trn) { gmx_trr_write_frame(of->fp_trn, step, t, state_local->lambda[efptFEP], state_local->box, top_global->natoms, (mdof_flags & MDOF_X) ? state_global->x : NULL, (mdof_flags & MDOF_V) ? global_v : NULL, (mdof_flags & MDOF_F) ? f_global : NULL); if (gmx_fio_flush(of->fp_trn) != 0) { gmx_file("Cannot write trajectory; maybe you are out of disk space?"); } } gmx_fwrite_tng(of->tng, FALSE, step, t, state_local->lambda[efptFEP], state_local->box, top_global->natoms, (mdof_flags & MDOF_X) ? state_global->x : NULL, (mdof_flags & MDOF_V) ? global_v : NULL, (mdof_flags & MDOF_F) ? f_global : NULL); } if (mdof_flags & MDOF_X_COMPRESSED) { rvec *xxtc = NULL; if (of->natoms_x_compressed == of->natoms_global) { /* We are writing the positions of all of the atoms to the compressed output */ xxtc = state_global->x; } else { /* We are writing the positions of only a subset of the atoms to the compressed output, so we have to make a copy of the subset of coordinates. */ int i, j; snew(xxtc, of->natoms_x_compressed); for (i = 0, j = 0; (i < of->natoms_global); i++) { if (ggrpnr(of->groups, egcCompressedX, i) == 0) { copy_rvec(state_global->x[i], xxtc[j++]); } } } if (write_xtc(of->fp_xtc, of->natoms_x_compressed, step, t, state_local->box, xxtc, of->x_compression_precision) == 0) { gmx_fatal(FARGS, "XTC error - maybe you are out of disk space?"); } gmx_fwrite_tng(of->tng_low_prec, TRUE, step, t, state_local->lambda[efptFEP], state_local->box, of->natoms_x_compressed, xxtc, NULL, NULL); if (of->natoms_x_compressed != of->natoms_global) { sfree(xxtc); } } } }
void pme_loadbal_do(pme_load_balancing_t *pme_lb, t_commrec *cr, FILE *fp_err, FILE *fp_log, t_inputrec *ir, t_forcerec *fr, t_state *state, gmx_wallcycle_t wcycle, gmx_int64_t step, gmx_int64_t step_rel, gmx_bool *bPrinting) { int n_prev; double cycles_prev; assert(pme_lb != NULL); if (!pme_lb->bActive) { return; } n_prev = pme_lb->cycles_n; cycles_prev = pme_lb->cycles_c; wallcycle_get(wcycle, ewcSTEP, &pme_lb->cycles_n, &pme_lb->cycles_c); if (pme_lb->cycles_n == 0) { /* Before the first step we haven't done any steps yet */ return; } /* Sanity check, we expect nstlist cycle counts */ if (pme_lb->cycles_n - n_prev != ir->nstlist) { /* We could return here, but it's safer to issue and error and quit */ gmx_incons("pme_loadbal_do called at an interval != nstlist"); } /* PME grid + cut-off optimization with GPUs or PME ranks */ if (!pme_lb->bBalance && pme_lb->bSepPMERanks) { if (pme_lb->bTriggerOnDLB) { pme_lb->bBalance = dd_dlb_is_on(cr->dd); } /* We should ignore the first timing to avoid timing allocation * overhead. And since the PME load balancing is called just * before DD repartitioning, the ratio returned by dd_pme_f_ratio * is not over the last nstlist steps, but the nstlist steps before * that. So the first useful ratio is available at step_rel=3*nstlist. */ else if (step_rel >= 3*ir->nstlist) { if (DDMASTER(cr->dd)) { /* If PME rank load is too high, start tuning */ pme_lb->bBalance = (dd_pme_f_ratio(cr->dd) >= loadBalanceTriggerFactor); } dd_bcast(cr->dd, sizeof(gmx_bool), &pme_lb->bBalance); } pme_lb->bActive = (pme_lb->bBalance || step_rel <= pme_lb->step_rel_stop); } /* The location in the code of this balancing termination is strange. * You would expect to have it after the call to pme_load_balance() * below, since there pme_lb->stage is updated. * But when terminating directly after deciding on and selecting the * optimal setup, DLB will turn on right away if it was locked before. * This might be due to PME reinitialization. So we check stage here * to allow for another nstlist steps with DLB locked to stabilize * the performance. */ if (pme_lb->bBalance && pme_lb->stage == pme_lb->nstage) { pme_lb->bBalance = FALSE; if (DOMAINDECOMP(cr) && dd_dlb_is_locked(cr->dd)) { /* Unlock the DLB=auto, DLB is allowed to activate */ dd_dlb_unlock(cr->dd); md_print_warn(cr, fp_log, "NOTE: DLB can now turn on, when beneficial\n"); /* We don't deactivate the tuning yet, since we will balance again * after DLB gets turned on, if it does within PMETune_period. */ continue_pme_loadbal(pme_lb, TRUE); pme_lb->bTriggerOnDLB = TRUE; pme_lb->step_rel_stop = step_rel + PMETunePeriod*ir->nstlist; } else { /* We're completely done with PME tuning */ pme_lb->bActive = FALSE; } if (DOMAINDECOMP(cr)) { /* Set the cut-off limit to the final selected cut-off, * so we don't have artificial DLB limits. * This also ensures that we won't disable the currently * optimal setting during a second round of PME balancing. */ set_dd_dlb_max_cutoff(cr, fr->ic->rlistlong); } } if (pme_lb->bBalance) { /* We might not have collected nstlist steps in cycles yet, * since init_step might not be a multiple of nstlist, * but the first data collected is skipped anyhow. */ pme_load_balance(pme_lb, cr, fp_err, fp_log, ir, state, pme_lb->cycles_c - cycles_prev, fr->ic, fr->nbv, &fr->pmedata, step); /* Update constants in forcerec/inputrec to keep them in sync with fr->ic */ fr->ewaldcoeff_q = fr->ic->ewaldcoeff_q; fr->ewaldcoeff_lj = fr->ic->ewaldcoeff_lj; fr->rlist = fr->ic->rlist; fr->rlistlong = fr->ic->rlistlong; fr->rcoulomb = fr->ic->rcoulomb; fr->rvdw = fr->ic->rvdw; if (ir->eDispCorr != edispcNO) { calc_enervirdiff(NULL, ir->eDispCorr, fr); } } if (!pme_lb->bBalance && (!pme_lb->bSepPMERanks || step_rel > pme_lb->step_rel_stop)) { /* We have just deactivated the balancing and we're not measuring PP/PME * imbalance during the first steps of the run: deactivate the tuning. */ pme_lb->bActive = FALSE; } if (!(pme_lb->bActive) && DOMAINDECOMP(cr) && dd_dlb_is_locked(cr->dd)) { /* Make sure DLB is allowed when we deactivate PME tuning */ dd_dlb_unlock(cr->dd); md_print_warn(cr, fp_log, "NOTE: DLB can now turn on, when beneficial\n"); } *bPrinting = pme_lb->bBalance; }
/* calculates center of mass of selection index from all coordinates x */ void pull_calc_coms(t_commrec *cr, struct pull_t *pull, t_mdatoms *md, t_pbc *pbc, double t, rvec x[], rvec *xp) { int g; real twopi_box = 0; pull_comm_t *comm; comm = &pull->comm; if (comm->rbuf == NULL) { snew(comm->rbuf, pull->ngroup); } if (comm->dbuf == NULL) { snew(comm->dbuf, 3*pull->ngroup); } if (pull->bRefAt && pull->bSetPBCatoms) { pull_set_pbcatoms(cr, pull, x, comm->rbuf); if (cr != NULL && DOMAINDECOMP(cr)) { /* We can keep these PBC reference coordinates fixed for nstlist * steps, since atoms won't jump over PBC. * This avoids a global reduction at the next nstlist-1 steps. * Note that the exact values of the pbc reference coordinates * are irrelevant, as long all atoms in the group are within * half a box distance of the reference coordinate. */ pull->bSetPBCatoms = FALSE; } } if (pull->cosdim >= 0) { int m; assert(pull->npbcdim <= DIM); for (m = pull->cosdim+1; m < pull->npbcdim; m++) { if (pbc->box[m][pull->cosdim] != 0) { gmx_fatal(FARGS, "Can not do cosine weighting for trilinic dimensions"); } } twopi_box = 2.0*M_PI/pbc->box[pull->cosdim][pull->cosdim]; } for (g = 0; g < pull->ngroup; g++) { pull_group_work_t *pgrp; pgrp = &pull->group[g]; if (pgrp->bCalcCOM) { if (pgrp->epgrppbc != epgrppbcCOS) { dvec com, comp; double wmass, wwmass; rvec x_pbc = { 0, 0, 0 }; int i; clear_dvec(com); clear_dvec(comp); wmass = 0; wwmass = 0; if (pgrp->epgrppbc == epgrppbcREFAT) { /* Set the pbc atom */ copy_rvec(comm->rbuf[g], x_pbc); } for (i = 0; i < pgrp->nat_loc; i++) { int ii, m; real mass, wm; ii = pgrp->ind_loc[i]; mass = md->massT[ii]; if (pgrp->weight_loc == NULL) { wm = mass; wmass += wm; } else { real w; w = pgrp->weight_loc[i]; wm = w*mass; wmass += wm; wwmass += wm*w; } if (pgrp->epgrppbc == epgrppbcNONE) { /* Plain COM: sum the coordinates */ for (m = 0; m < DIM; m++) { com[m] += wm*x[ii][m]; } if (xp) { for (m = 0; m < DIM; m++) { comp[m] += wm*xp[ii][m]; } } } else { rvec dx; /* Sum the difference with the reference atom */ pbc_dx(pbc, x[ii], x_pbc, dx); for (m = 0; m < DIM; m++) { com[m] += wm*dx[m]; } if (xp) { /* For xp add the difference between xp and x to dx, * such that we use the same periodic image, * also when xp has a large displacement. */ for (m = 0; m < DIM; m++) { comp[m] += wm*(dx[m] + xp[ii][m] - x[ii][m]); } } } } /* We do this check after the loop above to avoid more nesting. * If we have a single-atom group the mass is irrelevant, so * we can remove the mass factor to avoid division by zero. * Note that with constraint pulling the mass does matter, but * in that case a check group mass != 0 has been done before. */ if (pgrp->params.nat == 1 && pgrp->nat_loc == 1 && wmass == 0) { int m; /* Copy the single atom coordinate */ for (m = 0; m < DIM; m++) { com[m] = x[pgrp->ind_loc[0]][m]; } /* Set all mass factors to 1 to get the correct COM */ wmass = 1; wwmass = 1; } if (pgrp->weight_loc == NULL) { wwmass = wmass; } /* Copy local sums to a buffer for global summing */ copy_dvec(com, comm->dbuf[g*3]); copy_dvec(comp, comm->dbuf[g*3 + 1]); comm->dbuf[g*3 + 2][0] = wmass; comm->dbuf[g*3 + 2][1] = wwmass; comm->dbuf[g*3 + 2][2] = 0; } else { /* Cosine weighting geometry */ double cm, sm, cmp, smp, ccm, csm, ssm, csw, snw; int i; cm = 0; sm = 0; cmp = 0; smp = 0; ccm = 0; csm = 0; ssm = 0; for (i = 0; i < pgrp->nat_loc; i++) { int ii; real mass; ii = pgrp->ind_loc[i]; mass = md->massT[ii]; /* Determine cos and sin sums */ csw = cos(x[ii][pull->cosdim]*twopi_box); snw = sin(x[ii][pull->cosdim]*twopi_box); cm += csw*mass; sm += snw*mass; ccm += csw*csw*mass; csm += csw*snw*mass; ssm += snw*snw*mass; if (xp) { csw = cos(xp[ii][pull->cosdim]*twopi_box); snw = sin(xp[ii][pull->cosdim]*twopi_box); cmp += csw*mass; smp += snw*mass; } } /* Copy local sums to a buffer for global summing */ comm->dbuf[g*3 ][0] = cm; comm->dbuf[g*3 ][1] = sm; comm->dbuf[g*3 ][2] = 0; comm->dbuf[g*3+1][0] = ccm; comm->dbuf[g*3+1][1] = csm; comm->dbuf[g*3+1][2] = ssm; comm->dbuf[g*3+2][0] = cmp; comm->dbuf[g*3+2][1] = smp; comm->dbuf[g*3+2][2] = 0; } } } pull_reduce_double(cr, comm, pull->ngroup*3*DIM, comm->dbuf[0]); for (g = 0; g < pull->ngroup; g++) { pull_group_work_t *pgrp; pgrp = &pull->group[g]; if (pgrp->params.nat > 0 && pgrp->bCalcCOM) { if (pgrp->epgrppbc != epgrppbcCOS) { double wmass, wwmass; int m; /* Determine the inverse mass */ wmass = comm->dbuf[g*3+2][0]; wwmass = comm->dbuf[g*3+2][1]; pgrp->mwscale = 1.0/wmass; /* invtm==0 signals a frozen group, so then we should keep it zero */ if (pgrp->invtm != 0) { pgrp->wscale = wmass/wwmass; pgrp->invtm = wwmass/(wmass*wmass); } /* Divide by the total mass */ for (m = 0; m < DIM; m++) { pgrp->x[m] = comm->dbuf[g*3 ][m]*pgrp->mwscale; if (xp) { pgrp->xp[m] = comm->dbuf[g*3+1][m]*pgrp->mwscale; } if (pgrp->epgrppbc == epgrppbcREFAT) { pgrp->x[m] += comm->rbuf[g][m]; if (xp) { pgrp->xp[m] += comm->rbuf[g][m]; } } } } else { /* Cosine weighting geometry */ double csw, snw, wmass, wwmass; int i, ii; /* Determine the optimal location of the cosine weight */ csw = comm->dbuf[g*3][0]; snw = comm->dbuf[g*3][1]; pgrp->x[pull->cosdim] = atan2_0_2pi(snw, csw)/twopi_box; /* Set the weights for the local atoms */ wmass = sqrt(csw*csw + snw*snw); wwmass = (comm->dbuf[g*3+1][0]*csw*csw + comm->dbuf[g*3+1][1]*csw*snw + comm->dbuf[g*3+1][2]*snw*snw)/(wmass*wmass); pgrp->mwscale = 1.0/wmass; pgrp->wscale = wmass/wwmass; pgrp->invtm = wwmass/(wmass*wmass); /* Set the weights for the local atoms */ csw *= pgrp->invtm; snw *= pgrp->invtm; for (i = 0; i < pgrp->nat_loc; i++) { ii = pgrp->ind_loc[i]; pgrp->weight_loc[i] = csw*cos(twopi_box*x[ii][pull->cosdim]) + snw*sin(twopi_box*x[ii][pull->cosdim]); } if (xp) { csw = comm->dbuf[g*3+2][0]; snw = comm->dbuf[g*3+2][1]; pgrp->xp[pull->cosdim] = atan2_0_2pi(snw, csw)/twopi_box; } } if (debug) { fprintf(debug, "Pull group %d wmass %f invtm %f\n", g, 1.0/pgrp->mwscale, pgrp->invtm); } } } if (pull->bCylinder) { /* Calculate the COMs for the cyclinder reference groups */ make_cyl_refgrps(cr, pull, md, pbc, t, x); } }
gmx_constr_t init_constraints(FILE *fplog, gmx_mtop_t *mtop, t_inputrec *ir, gmx_edsam_t ed, t_state *state, t_commrec *cr) { int ncon, nset, nmol, settle_type, i, natoms, mt, nflexcon; struct gmx_constr *constr; char *env; t_ilist *ilist; gmx_mtop_ilistloop_t iloop; ncon = gmx_mtop_ftype_count(mtop, F_CONSTR) + gmx_mtop_ftype_count(mtop, F_CONSTRNC); nset = gmx_mtop_ftype_count(mtop, F_SETTLE); if (ncon+nset == 0 && ir->ePull != epullCONSTRAINT && ed == NULL) { return NULL; } snew(constr, 1); constr->ncon_tot = ncon; constr->nflexcon = 0; if (ncon > 0) { constr->n_at2con_mt = mtop->nmoltype; snew(constr->at2con_mt, constr->n_at2con_mt); for (mt = 0; mt < mtop->nmoltype; mt++) { constr->at2con_mt[mt] = make_at2con(0, mtop->moltype[mt].atoms.nr, mtop->moltype[mt].ilist, mtop->ffparams.iparams, EI_DYNAMICS(ir->eI), &nflexcon); for (i = 0; i < mtop->nmolblock; i++) { if (mtop->molblock[i].type == mt) { constr->nflexcon += mtop->molblock[i].nmol*nflexcon; } } } if (constr->nflexcon > 0) { if (fplog) { fprintf(fplog, "There are %d flexible constraints\n", constr->nflexcon); if (ir->fc_stepsize == 0) { fprintf(fplog, "\n" "WARNING: step size for flexible constraining = 0\n" " All flexible constraints will be rigid.\n" " Will try to keep all flexible constraints at their original length,\n" " but the lengths may exhibit some drift.\n\n"); constr->nflexcon = 0; } } if (constr->nflexcon > 0) { please_cite(fplog, "Hess2002"); } } if (ir->eConstrAlg == econtLINCS) { constr->lincsd = init_lincs(fplog, mtop, constr->nflexcon, constr->at2con_mt, DOMAINDECOMP(cr) && cr->dd->bInterCGcons, ir->nLincsIter, ir->nProjOrder); } if (ir->eConstrAlg == econtSHAKE) { if (DOMAINDECOMP(cr) && cr->dd->bInterCGcons) { gmx_fatal(FARGS, "SHAKE is not supported with domain decomposition and constraint that cross charge group boundaries, use LINCS"); } if (constr->nflexcon) { gmx_fatal(FARGS, "For this system also velocities and/or forces need to be constrained, this can not be done with SHAKE, you should select LINCS"); } please_cite(fplog, "Ryckaert77a"); if (ir->bShakeSOR) { please_cite(fplog, "Barth95a"); } constr->shaked = shake_init(); } } if (nset > 0) { please_cite(fplog, "Miyamoto92a"); constr->bInterCGsettles = inter_charge_group_settles(mtop); /* Check that we have only one settle type */ settle_type = -1; iloop = gmx_mtop_ilistloop_init(mtop); while (gmx_mtop_ilistloop_next(iloop, &ilist, &nmol)) { for (i = 0; i < ilist[F_SETTLE].nr; i += 4) { if (settle_type == -1) { settle_type = ilist[F_SETTLE].iatoms[i]; } else if (ilist[F_SETTLE].iatoms[i] != settle_type) { gmx_fatal(FARGS, "The [molecules] section of your topology specifies more than one block of\n" "a [moleculetype] with a [settles] block. Only one such is allowed. If you\n" "are trying to partition your solvent into different *groups* (e.g. for\n" "freezing, T-coupling, etc.) then you are using the wrong approach. Index\n" "files specify groups. Otherwise, you may wish to change the least-used\n" "block of molecules with SETTLE constraints into 3 normal constraints."); } } } constr->n_at2settle_mt = mtop->nmoltype; snew(constr->at2settle_mt, constr->n_at2settle_mt); for (mt = 0; mt < mtop->nmoltype; mt++) { constr->at2settle_mt[mt] = make_at2settle(mtop->moltype[mt].atoms.nr, &mtop->moltype[mt].ilist[F_SETTLE]); } } constr->maxwarn = 999; env = getenv("GMX_MAXCONSTRWARN"); if (env) { constr->maxwarn = 0; sscanf(env, "%d", &constr->maxwarn); if (fplog) { fprintf(fplog, "Setting the maximum number of constraint warnings to %d\n", constr->maxwarn); } if (MASTER(cr)) { fprintf(stderr, "Setting the maximum number of constraint warnings to %d\n", constr->maxwarn); } } if (constr->maxwarn < 0 && fplog) { fprintf(fplog, "maxwarn < 0, will not stop on constraint errors\n"); } constr->warncount_lincs = 0; constr->warncount_settle = 0; /* Initialize the essential dynamics sampling. * Put the pointer to the ED struct in constr */ constr->ed = ed; if (ed != NULL || state->edsamstate.nED > 0) { init_edsam(mtop, ir, cr, ed, state->x, state->box, &state->edsamstate); } constr->warn_mtop = mtop; return constr; }
static void make_cyl_refgrps(t_commrec *cr, t_pull *pull, t_mdatoms *md, t_pbc *pbc, double t, rvec *x, rvec *xp) { int c, i, ii, m, start, end; rvec g_x, dx, dir; double r0_2, sum_a, sum_ap, dr2, mass, weight, wmass, wwmass, inp; t_pull_coord *pcrd; t_pull_group *pref, *pgrp, *pdyna; gmx_ga2la_t ga2la = NULL; if (pull->dbuf_cyl == NULL) { snew(pull->dbuf_cyl, pull->ncoord*4); } if (cr && DOMAINDECOMP(cr)) { ga2la = cr->dd->ga2la; } start = 0; end = md->homenr; r0_2 = dsqr(pull->cyl_r0); /* loop over all groups to make a reference group for each*/ for (c = 0; c < pull->ncoord; c++) { pcrd = &pull->coord[c]; /* pref will be the same group for all pull coordinates */ pref = &pull->group[pcrd->group[0]]; pgrp = &pull->group[pcrd->group[1]]; pdyna = &pull->dyna[c]; copy_rvec(pcrd->vec, dir); sum_a = 0; sum_ap = 0; wmass = 0; wwmass = 0; pdyna->nat_loc = 0; for (m = 0; m < DIM; m++) { g_x[m] = pgrp->x[m] - pcrd->vec[m]*(pcrd->init + pcrd->rate*t); } /* loop over all atoms in the main ref group */ for (i = 0; i < pref->nat; i++) { ii = pref->ind[i]; if (ga2la) { if (!ga2la_get_home(ga2la, pref->ind[i], &ii)) { ii = -1; } } if (ii >= start && ii < end) { pbc_dx_aiuc(pbc, x[ii], g_x, dx); inp = iprod(dir, dx); dr2 = 0; for (m = 0; m < DIM; m++) { dr2 += dsqr(dx[m] - inp*dir[m]); } if (dr2 < r0_2) { /* add to index, to sum of COM, to weight array */ if (pdyna->nat_loc >= pdyna->nalloc_loc) { pdyna->nalloc_loc = over_alloc_large(pdyna->nat_loc+1); srenew(pdyna->ind_loc, pdyna->nalloc_loc); srenew(pdyna->weight_loc, pdyna->nalloc_loc); } pdyna->ind_loc[pdyna->nat_loc] = ii; mass = md->massT[ii]; weight = get_weight(sqrt(dr2), pull->cyl_r1, pull->cyl_r0); pdyna->weight_loc[pdyna->nat_loc] = weight; sum_a += mass*weight*inp; if (xp) { pbc_dx_aiuc(pbc, xp[ii], g_x, dx); inp = iprod(dir, dx); sum_ap += mass*weight*inp; } wmass += mass*weight; wwmass += mass*sqr(weight); pdyna->nat_loc++; } } } pull->dbuf_cyl[c*4+0] = wmass; pull->dbuf_cyl[c*4+1] = wwmass; pull->dbuf_cyl[c*4+2] = sum_a; pull->dbuf_cyl[c*4+3] = sum_ap; } if (cr && PAR(cr)) { /* Sum the contributions over the nodes */ gmx_sumd(pull->ncoord*4, pull->dbuf_cyl, cr); } for (c = 0; c < pull->ncoord; c++) { pcrd = &pull->coord[c]; pdyna = &pull->dyna[c]; pgrp = &pull->group[pcrd->group[1]]; wmass = pull->dbuf_cyl[c*4+0]; wwmass = pull->dbuf_cyl[c*4+1]; pdyna->wscale = wmass/wwmass; pdyna->invtm = 1.0/(pdyna->wscale*wmass); for (m = 0; m < DIM; m++) { g_x[m] = pgrp->x[m] - pcrd->vec[m]*(pcrd->init + pcrd->rate*t); pdyna->x[m] = g_x[m] + pcrd->vec[m]*pull->dbuf_cyl[c*4+2]/wmass; if (xp) { pdyna->xp[m] = g_x[m] + pcrd->vec[m]*pull->dbuf_cyl[c*4+3]/wmass; } } if (debug) { fprintf(debug, "Pull cylinder group %d:%8.3f%8.3f%8.3f m:%8.3f\n", c, pdyna->x[0], pdyna->x[1], pdyna->x[2], 1.0/pdyna->invtm); } } }
static void do_lincs(rvec *x,rvec *xp,matrix box,t_pbc *pbc, struct gmx_lincsdata *lincsd,real *invmass, t_commrec *cr, real wangle,int *warn, real invdt,rvec *v, gmx_bool bCalcVir,tensor rmdr) { int b,i,j,k,n,iter; real tmp0,tmp1,tmp2,im1,im2,mvb,rlen,len,len2,dlen2,wfac,lam; rvec dx; int ncons,*bla,*blnr,*blbnb; rvec *r; real *blc,*blmf,*bllen,*blcc,*rhs1,*rhs2,*sol,*lambda; int *nlocat; ncons = lincsd->nc; bla = lincsd->bla; r = lincsd->tmpv; blnr = lincsd->blnr; blbnb = lincsd->blbnb; blc = lincsd->blc; blmf = lincsd->blmf; bllen = lincsd->bllen; blcc = lincsd->tmpncc; rhs1 = lincsd->tmp1; rhs2 = lincsd->tmp2; sol = lincsd->tmp3; lambda = lincsd->lambda; if (DOMAINDECOMP(cr) && cr->dd->constraints) { nlocat = dd_constraints_nlocalatoms(cr->dd); } else if (PARTDECOMP(cr)) { nlocat = pd_constraints_nlocalatoms(cr->pd); } else { nlocat = NULL; } *warn = 0; if (pbc) { /* Compute normalized i-j vectors */ for(b=0; b<ncons; b++) { pbc_dx_aiuc(pbc,x[bla[2*b]],x[bla[2*b+1]],dx); unitv(dx,r[b]); } for(b=0; b<ncons; b++) { for(n=blnr[b]; n<blnr[b+1]; n++) { blcc[n] = blmf[n]*iprod(r[b],r[blbnb[n]]); } pbc_dx_aiuc(pbc,xp[bla[2*b]],xp[bla[2*b+1]],dx); mvb = blc[b]*(iprod(r[b],dx) - bllen[b]); rhs1[b] = mvb; sol[b] = mvb; } } else { /* Compute normalized i-j vectors */ for(b=0; b<ncons; b++) { i = bla[2*b]; j = bla[2*b+1]; tmp0 = x[i][0] - x[j][0]; tmp1 = x[i][1] - x[j][1]; tmp2 = x[i][2] - x[j][2]; rlen = gmx_invsqrt(tmp0*tmp0+tmp1*tmp1+tmp2*tmp2); r[b][0] = rlen*tmp0; r[b][1] = rlen*tmp1; r[b][2] = rlen*tmp2; } /* 16 ncons flops */ for(b=0; b<ncons; b++) { tmp0 = r[b][0]; tmp1 = r[b][1]; tmp2 = r[b][2]; len = bllen[b]; i = bla[2*b]; j = bla[2*b+1]; for(n=blnr[b]; n<blnr[b+1]; n++) { k = blbnb[n]; blcc[n] = blmf[n]*(tmp0*r[k][0] + tmp1*r[k][1] + tmp2*r[k][2]); } /* 6 nr flops */ mvb = blc[b]*(tmp0*(xp[i][0] - xp[j][0]) + tmp1*(xp[i][1] - xp[j][1]) + tmp2*(xp[i][2] - xp[j][2]) - len); rhs1[b] = mvb; sol[b] = mvb; /* 10 flops */ } /* Together: 26*ncons + 6*nrtot flops */ } lincs_matrix_expand(lincsd,blcc,rhs1,rhs2,sol); /* nrec*(ncons+2*nrtot) flops */ for(b=0; b<ncons; b++) { i = bla[2*b]; j = bla[2*b+1]; mvb = blc[b]*sol[b]; lambda[b] = -mvb; im1 = invmass[i]; im2 = invmass[j]; tmp0 = r[b][0]*mvb; tmp1 = r[b][1]*mvb; tmp2 = r[b][2]*mvb; xp[i][0] -= tmp0*im1; xp[i][1] -= tmp1*im1; xp[i][2] -= tmp2*im1; xp[j][0] += tmp0*im2; xp[j][1] += tmp1*im2; xp[j][2] += tmp2*im2; } /* 16 ncons flops */ /* ******** Correction for centripetal effects ******** */ wfac = cos(DEG2RAD*wangle); wfac = wfac*wfac; for(iter=0; iter<lincsd->nIter; iter++) { if (DOMAINDECOMP(cr) && cr->dd->constraints) { /* Communicate the corrected non-local coordinates */ dd_move_x_constraints(cr->dd,box,xp,NULL); } else if (PARTDECOMP(cr)) { pd_move_x_constraints(cr,xp,NULL); } for(b=0; b<ncons; b++) { len = bllen[b]; if (pbc) { pbc_dx_aiuc(pbc,xp[bla[2*b]],xp[bla[2*b+1]],dx); } else { rvec_sub(xp[bla[2*b]],xp[bla[2*b+1]],dx); } len2 = len*len; dlen2 = 2*len2 - norm2(dx); if (dlen2 < wfac*len2 && (nlocat==NULL || nlocat[b])) { *warn = b; } if (dlen2 > 0) { mvb = blc[b]*(len - dlen2*gmx_invsqrt(dlen2)); } else { mvb = blc[b]*len; } rhs1[b] = mvb; sol[b] = mvb; } /* 20*ncons flops */ lincs_matrix_expand(lincsd,blcc,rhs1,rhs2,sol); /* nrec*(ncons+2*nrtot) flops */ for(b=0; b<ncons; b++) { i = bla[2*b]; j = bla[2*b+1]; lam = lambda[b]; mvb = blc[b]*sol[b]; lambda[b] = lam - mvb; im1 = invmass[i]; im2 = invmass[j]; tmp0 = r[b][0]*mvb; tmp1 = r[b][1]*mvb; tmp2 = r[b][2]*mvb; xp[i][0] -= tmp0*im1; xp[i][1] -= tmp1*im1; xp[i][2] -= tmp2*im1; xp[j][0] += tmp0*im2; xp[j][1] += tmp1*im2; xp[j][2] += tmp2*im2; } /* 17 ncons flops */ } /* nit*ncons*(37+9*nrec) flops */ if (v) { /* Correct the velocities */ for(b=0; b<ncons; b++) { i = bla[2*b]; j = bla[2*b+1]; im1 = invmass[i]*lambda[b]*invdt; im2 = invmass[j]*lambda[b]*invdt; v[i][0] += im1*r[b][0]; v[i][1] += im1*r[b][1]; v[i][2] += im1*r[b][2]; v[j][0] -= im2*r[b][0]; v[j][1] -= im2*r[b][1]; v[j][2] -= im2*r[b][2]; } /* 16 ncons flops */ } if (nlocat) { /* Only account for local atoms */ for(b=0; b<ncons; b++) { lambda[b] *= 0.5*nlocat[b]; } } if (bCalcVir) { /* Constraint virial */ for(b=0; b<ncons; b++) { tmp0 = bllen[b]*lambda[b]; for(i=0; i<DIM; i++) { tmp1 = tmp0*r[b][i]; for(j=0; j<DIM; j++) { rmdr[i][j] -= tmp1*r[b][j]; } } } /* 22 ncons flops */ } /* Total: * 26*ncons + 6*nrtot + nrec*(ncons+2*nrtot) * + nit * (20*ncons + nrec*(ncons+2*nrtot) + 17 ncons) * * (26+nrec)*ncons + (6+2*nrec)*nrtot * + nit * ((37+nrec)*ncons + 2*nrec*nrtot) * if nit=1 * (63+nrec)*ncons + (6+4*nrec)*nrtot */ }
gmx_bool replica_exchange(FILE *fplog,const t_commrec *cr,struct gmx_repl_ex *re, t_state *state,real *ener, t_state *state_local, int step,real time) { gmx_multisim_t *ms; int exchange=-1,shift; gmx_bool bExchanged=FALSE; ms = cr->ms; if (MASTER(cr)) { exchange = get_replica_exchange(fplog,ms,re,ener,det(state->box), step,time); bExchanged = (exchange >= 0); } if (PAR(cr)) { #ifdef GMX_MPI MPI_Bcast(&bExchanged,sizeof(gmx_bool),MPI_BYTE,MASTERRANK(cr), cr->mpi_comm_mygroup); #endif } if (bExchanged) { /* Exchange the states */ if (PAR(cr)) { /* Collect the global state on the master node */ if (DOMAINDECOMP(cr)) { dd_collect_state(cr->dd,state_local,state); } else { pd_collect_state(cr,state); } } if (MASTER(cr)) { /* Exchange the global states between the master nodes */ if (debug) { fprintf(debug,"Exchanging %d with %d\n",ms->sim,exchange); } exchange_state(ms,exchange,state); if (re->type == ereTEMP) { scale_velocities(state,sqrt(re->q[ms->sim]/re->q[exchange])); } } /* With domain decomposition the global state is distributed later */ if (!DOMAINDECOMP(cr)) { /* Copy the global state to the local state data structure */ copy_state_nonatomdata(state,state_local); if (PAR(cr)) { bcast_state(cr,state,FALSE); } } } return bExchanged; }
/* calculates center of mass of selection index from all coordinates x */ void pull_calc_coms(t_commrec *cr, t_pull *pull, t_mdatoms *md, t_pbc *pbc, double t, rvec x[], rvec *xp) { int g, i, ii, m; real mass, w, wm, twopi_box = 0; double wmass, wwmass, invwmass; dvec com, comp; double cm, sm, cmp, smp, ccm, csm, ssm, csw, snw; rvec *xx[2], x_pbc = {0, 0, 0}, dx; t_pull_group *pgrp; if (pull->rbuf == NULL) { snew(pull->rbuf, pull->ngroup); } if (pull->dbuf == NULL) { snew(pull->dbuf, 3*pull->ngroup); } if (pull->bRefAt && pull->bSetPBCatoms) { pull_set_pbcatoms(cr, pull, x, pull->rbuf); if (cr != NULL && DOMAINDECOMP(cr)) { /* We can keep these PBC reference coordinates fixed for nstlist * steps, since atoms won't jump over PBC. * This avoids a global reduction at the next nstlist-1 steps. * Note that the exact values of the pbc reference coordinates * are irrelevant, as long all atoms in the group are within * half a box distance of the reference coordinate. */ pull->bSetPBCatoms = FALSE; } } if (pull->cosdim >= 0) { for (m = pull->cosdim+1; m < pull->npbcdim; m++) { if (pbc->box[m][pull->cosdim] != 0) { gmx_fatal(FARGS, "Can not do cosine weighting for trilinic dimensions"); } } twopi_box = 2.0*M_PI/pbc->box[pull->cosdim][pull->cosdim]; } for (g = 0; g < pull->ngroup; g++) { pgrp = &pull->group[g]; clear_dvec(com); clear_dvec(comp); wmass = 0; wwmass = 0; cm = 0; sm = 0; cmp = 0; smp = 0; ccm = 0; csm = 0; ssm = 0; if (!(g == 0 && PULL_CYL(pull))) { if (pgrp->epgrppbc == epgrppbcREFAT) { /* Set the pbc atom */ copy_rvec(pull->rbuf[g], x_pbc); } w = 1; for (i = 0; i < pgrp->nat_loc; i++) { ii = pgrp->ind_loc[i]; mass = md->massT[ii]; if (pgrp->epgrppbc != epgrppbcCOS) { if (pgrp->weight_loc) { w = pgrp->weight_loc[i]; } wm = w*mass; wmass += wm; wwmass += wm*w; if (pgrp->epgrppbc == epgrppbcNONE) { /* Plain COM: sum the coordinates */ for (m = 0; m < DIM; m++) { com[m] += wm*x[ii][m]; } if (xp) { for (m = 0; m < DIM; m++) { comp[m] += wm*xp[ii][m]; } } } else { /* Sum the difference with the reference atom */ pbc_dx(pbc, x[ii], x_pbc, dx); for (m = 0; m < DIM; m++) { com[m] += wm*dx[m]; } if (xp) { /* For xp add the difference between xp and x to dx, * such that we use the same periodic image, * also when xp has a large displacement. */ for (m = 0; m < DIM; m++) { comp[m] += wm*(dx[m] + xp[ii][m] - x[ii][m]); } } } } else { /* Determine cos and sin sums */ csw = cos(x[ii][pull->cosdim]*twopi_box); snw = sin(x[ii][pull->cosdim]*twopi_box); cm += csw*mass; sm += snw*mass; ccm += csw*csw*mass; csm += csw*snw*mass; ssm += snw*snw*mass; if (xp) { csw = cos(xp[ii][pull->cosdim]*twopi_box); snw = sin(xp[ii][pull->cosdim]*twopi_box); cmp += csw*mass; smp += snw*mass; } } } } /* Copy local sums to a buffer for global summing */ switch (pgrp->epgrppbc) { case epgrppbcNONE: case epgrppbcREFAT: copy_dvec(com, pull->dbuf[g*3]); copy_dvec(comp, pull->dbuf[g*3+1]); pull->dbuf[g*3+2][0] = wmass; pull->dbuf[g*3+2][1] = wwmass; pull->dbuf[g*3+2][2] = 0; break; case epgrppbcCOS: pull->dbuf[g*3 ][0] = cm; pull->dbuf[g*3 ][1] = sm; pull->dbuf[g*3 ][2] = 0; pull->dbuf[g*3+1][0] = ccm; pull->dbuf[g*3+1][1] = csm; pull->dbuf[g*3+1][2] = ssm; pull->dbuf[g*3+2][0] = cmp; pull->dbuf[g*3+2][1] = smp; pull->dbuf[g*3+2][2] = 0; break; } } if (cr && PAR(cr)) { /* Sum the contributions over the nodes */ gmx_sumd(pull->ngroup*3*DIM, pull->dbuf[0], cr); } for (g = 0; g < pull->ngroup; g++) { pgrp = &pull->group[g]; if (pgrp->nat > 0 && !(g == 0 && PULL_CYL(pull))) { if (pgrp->epgrppbc != epgrppbcCOS) { /* Determine the inverse mass */ wmass = pull->dbuf[g*3+2][0]; wwmass = pull->dbuf[g*3+2][1]; invwmass = 1/wmass; /* invtm==0 signals a frozen group, so then we should keep it zero */ if (pgrp->invtm > 0) { pgrp->wscale = wmass/wwmass; pgrp->invtm = 1.0/(pgrp->wscale*wmass); } /* Divide by the total mass */ for (m = 0; m < DIM; m++) { pgrp->x[m] = pull->dbuf[g*3 ][m]*invwmass; if (xp) { pgrp->xp[m] = pull->dbuf[g*3+1][m]*invwmass; } if (pgrp->epgrppbc == epgrppbcREFAT) { pgrp->x[m] += pull->rbuf[g][m]; if (xp) { pgrp->xp[m] += pull->rbuf[g][m]; } } } } else { /* Determine the optimal location of the cosine weight */ csw = pull->dbuf[g*3][0]; snw = pull->dbuf[g*3][1]; pgrp->x[pull->cosdim] = atan2_0_2pi(snw, csw)/twopi_box; /* Set the weights for the local atoms */ wmass = sqrt(csw*csw + snw*snw); wwmass = (pull->dbuf[g*3+1][0]*csw*csw + pull->dbuf[g*3+1][1]*csw*snw + pull->dbuf[g*3+1][2]*snw*snw)/(wmass*wmass); pgrp->wscale = wmass/wwmass; pgrp->invtm = 1.0/(pgrp->wscale*wmass); /* Set the weights for the local atoms */ csw *= pgrp->invtm; snw *= pgrp->invtm; for (i = 0; i < pgrp->nat_loc; i++) { ii = pgrp->ind_loc[i]; pgrp->weight_loc[i] = csw*cos(twopi_box*x[ii][pull->cosdim]) + snw*sin(twopi_box*x[ii][pull->cosdim]); } if (xp) { csw = pull->dbuf[g*3+2][0]; snw = pull->dbuf[g*3+2][1]; pgrp->xp[pull->cosdim] = atan2_0_2pi(snw, csw)/twopi_box; } } if (debug) { fprintf(debug, "Pull group %d wmass %f wwmass %f invtm %f\n", g, wmass, wwmass, pgrp->invtm); } } } if (PULL_CYL(pull)) { /* Calculate the COMs for the cyclinder reference groups */ make_cyl_refgrps(cr, pull, md, pbc, t, x, xp); } }
void do_force_lowlevel(FILE *fplog, gmx_large_int_t step, t_forcerec *fr, t_inputrec *ir, t_idef *idef, t_commrec *cr, t_nrnb *nrnb, gmx_wallcycle_t wcycle, t_mdatoms *md, t_grpopts *opts, rvec x[], history_t *hist, rvec f[], gmx_enerdata_t *enerd, t_fcdata *fcd, gmx_mtop_t *mtop, gmx_localtop_t *top, gmx_genborn_t *born, t_atomtypes *atype, gmx_bool bBornRadii, matrix box, real lambda, t_graph *graph, t_blocka *excl, rvec mu_tot[], int flags, float *cycles_pme) { int i,status; int donb_flags; gmx_bool bDoEpot,bSepDVDL,bSB; int pme_flags; matrix boxs; rvec box_size; real dvdlambda,Vsr,Vlr,Vcorr=0,vdip,vcharge; t_pbc pbc; real dvdgb; char buf[22]; gmx_enerdata_t ed_lam; double lam_i; real dvdl_dum; #ifdef GMX_MPI double t0=0.0,t1,t2,t3; /* time measurement for coarse load balancing */ #endif #define PRINT_SEPDVDL(s,v,dvdl) if (bSepDVDL) fprintf(fplog,sepdvdlformat,s,v,dvdl); GMX_MPE_LOG(ev_force_start); set_pbc(&pbc,fr->ePBC,box); /* Reset box */ for(i=0; (i<DIM); i++) { box_size[i]=box[i][i]; } bSepDVDL=(fr->bSepDVDL && do_per_step(step,ir->nstlog)); debug_gmx(); /* do QMMM first if requested */ if(fr->bQMMM) { enerd->term[F_EQM] = calculate_QMMM(cr,x,f,fr,md); } if (bSepDVDL) { fprintf(fplog,"Step %s: non-bonded V and dVdl for node %d:\n", gmx_step_str(step,buf),cr->nodeid); } /* Call the short range functions all in one go. */ GMX_MPE_LOG(ev_do_fnbf_start); dvdlambda = 0; #ifdef GMX_MPI /*#define TAKETIME ((cr->npmenodes) && (fr->timesteps < 12))*/ #define TAKETIME FALSE if (TAKETIME) { MPI_Barrier(cr->mpi_comm_mygroup); t0=MPI_Wtime(); } #endif if (ir->nwall) { dvdlambda = do_walls(ir,fr,box,md,x,f,lambda, enerd->grpp.ener[egLJSR],nrnb); PRINT_SEPDVDL("Walls",0.0,dvdlambda); enerd->dvdl_lin += dvdlambda; } /* If doing GB, reset dvda and calculate the Born radii */ if (ir->implicit_solvent) { /* wallcycle_start(wcycle,ewcGB); */ for(i=0; i<born->nr; i++) { fr->dvda[i]=0; } if(bBornRadii) { calc_gb_rad(cr,fr,ir,top,atype,x,&(fr->gblist),born,md,nrnb); } /* wallcycle_stop(wcycle, ewcGB); */ } where(); donb_flags = 0; if (flags & GMX_FORCE_FORCES) { donb_flags |= GMX_DONB_FORCES; } do_nonbonded(cr,fr,x,f,md,excl, fr->bBHAM ? enerd->grpp.ener[egBHAMSR] : enerd->grpp.ener[egLJSR], enerd->grpp.ener[egCOULSR], enerd->grpp.ener[egGB],box_size,nrnb, lambda,&dvdlambda,-1,-1,donb_flags); /* If we do foreign lambda and we have soft-core interactions * we have to recalculate the (non-linear) energies contributions. */ if (ir->n_flambda > 0 && (flags & GMX_FORCE_DHDL) && ir->sc_alpha != 0) { init_enerdata(mtop->groups.grps[egcENER].nr,ir->n_flambda,&ed_lam); for(i=0; i<enerd->n_lambda; i++) { lam_i = (i==0 ? lambda : ir->flambda[i-1]); dvdl_dum = 0; reset_enerdata(&ir->opts,fr,TRUE,&ed_lam,FALSE); do_nonbonded(cr,fr,x,f,md,excl, fr->bBHAM ? ed_lam.grpp.ener[egBHAMSR] : ed_lam.grpp.ener[egLJSR], ed_lam.grpp.ener[egCOULSR], enerd->grpp.ener[egGB], box_size,nrnb, lam_i,&dvdl_dum,-1,-1, GMX_DONB_FOREIGNLAMBDA); sum_epot(&ir->opts,&ed_lam); enerd->enerpart_lambda[i] += ed_lam.term[F_EPOT]; } destroy_enerdata(&ed_lam); } where(); /* If we are doing GB, calculate bonded forces and apply corrections * to the solvation forces */ if (ir->implicit_solvent) { calc_gb_forces(cr,md,born,top,atype,x,f,fr,idef, ir->gb_algorithm,ir->sa_algorithm,nrnb,bBornRadii,&pbc,graph,enerd); } #ifdef GMX_MPI if (TAKETIME) { t1=MPI_Wtime(); fr->t_fnbf += t1-t0; } #endif if (ir->sc_alpha != 0) { enerd->dvdl_nonlin += dvdlambda; } else { enerd->dvdl_lin += dvdlambda; } Vsr = 0; if (bSepDVDL) { for(i=0; i<enerd->grpp.nener; i++) { Vsr += (fr->bBHAM ? enerd->grpp.ener[egBHAMSR][i] : enerd->grpp.ener[egLJSR][i]) + enerd->grpp.ener[egCOULSR][i] + enerd->grpp.ener[egGB][i]; } } PRINT_SEPDVDL("VdW and Coulomb SR particle-p.",Vsr,dvdlambda); debug_gmx(); GMX_MPE_LOG(ev_do_fnbf_finish); if (debug) { pr_rvecs(debug,0,"fshift after SR",fr->fshift,SHIFTS); } /* Shift the coordinates. Must be done before bonded forces and PPPM, * but is also necessary for SHAKE and update, therefore it can NOT * go when no bonded forces have to be evaluated. */ /* Here sometimes we would not need to shift with NBFonly, * but we do so anyhow for consistency of the returned coordinates. */ if (graph) { shift_self(graph,box,x); if (TRICLINIC(box)) { inc_nrnb(nrnb,eNR_SHIFTX,2*graph->nnodes); } else { inc_nrnb(nrnb,eNR_SHIFTX,graph->nnodes); } } /* Check whether we need to do bondeds or correct for exclusions */ if (fr->bMolPBC && ((flags & GMX_FORCE_BONDED) || EEL_RF(fr->eeltype) || EEL_FULL(fr->eeltype))) { /* Since all atoms are in the rectangular or triclinic unit-cell, * only single box vector shifts (2 in x) are required. */ set_pbc_dd(&pbc,fr->ePBC,cr->dd,TRUE,box); } debug_gmx(); if (flags & GMX_FORCE_BONDED) { GMX_MPE_LOG(ev_calc_bonds_start); calc_bonds(fplog,cr->ms, idef,x,hist,f,fr,&pbc,graph,enerd,nrnb,lambda,md,fcd, DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL, atype, born, fr->bSepDVDL && do_per_step(step,ir->nstlog),step); /* Check if we have to determine energy differences * at foreign lambda's. */ if (ir->n_flambda > 0 && (flags & GMX_FORCE_DHDL) && idef->ilsort != ilsortNO_FE) { if (idef->ilsort != ilsortFE_SORTED) { gmx_incons("The bonded interactions are not sorted for free energy"); } init_enerdata(mtop->groups.grps[egcENER].nr,ir->n_flambda,&ed_lam); for(i=0; i<enerd->n_lambda; i++) { lam_i = (i==0 ? lambda : ir->flambda[i-1]); dvdl_dum = 0; reset_enerdata(&ir->opts,fr,TRUE,&ed_lam,FALSE); calc_bonds_lambda(fplog, idef,x,fr,&pbc,graph,&ed_lam,nrnb,lam_i,md, fcd, DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL); sum_epot(&ir->opts,&ed_lam); enerd->enerpart_lambda[i] += ed_lam.term[F_EPOT]; } destroy_enerdata(&ed_lam); } debug_gmx(); GMX_MPE_LOG(ev_calc_bonds_finish); } where(); *cycles_pme = 0; if (EEL_FULL(fr->eeltype)) { bSB = (ir->nwall == 2); if (bSB) { copy_mat(box,boxs); svmul(ir->wall_ewald_zfac,boxs[ZZ],boxs[ZZ]); box_size[ZZ] *= ir->wall_ewald_zfac; } clear_mat(fr->vir_el_recip); if (fr->bEwald) { if (fr->n_tpi == 0) { dvdlambda = 0; Vcorr = ewald_LRcorrection(fplog,md->start,md->start+md->homenr, cr,fr, md->chargeA, md->nChargePerturbed ? md->chargeB : NULL, excl,x,bSB ? boxs : box,mu_tot, ir->ewald_geometry, ir->epsilon_surface, lambda,&dvdlambda,&vdip,&vcharge); PRINT_SEPDVDL("Ewald excl./charge/dip. corr.",Vcorr,dvdlambda); enerd->dvdl_lin += dvdlambda; } else { if (ir->ewald_geometry != eewg3D || ir->epsilon_surface != 0) { gmx_fatal(FARGS,"TPI with PME currently only works in a 3D geometry with tin-foil boundary conditions"); } /* The TPI molecule does not have exclusions with the rest * of the system and no intra-molecular PME grid contributions * will be calculated in gmx_pme_calc_energy. */ Vcorr = 0; } } else { Vcorr = shift_LRcorrection(fplog,md->start,md->homenr,cr,fr, md->chargeA,excl,x,TRUE,box, fr->vir_el_recip); } dvdlambda = 0; status = 0; switch (fr->eeltype) { case eelPPPM: status = gmx_pppm_do(fplog,fr->pmedata,FALSE,x,fr->f_novirsum, md->chargeA, box_size,fr->phi,cr,md->start,md->homenr, nrnb,ir->pme_order,&Vlr); break; case eelPME: case eelPMESWITCH: case eelPMEUSER: case eelPMEUSERSWITCH: if (cr->duty & DUTY_PME) { if (fr->n_tpi == 0 || (flags & GMX_FORCE_STATECHANGED)) { pme_flags = GMX_PME_SPREAD_Q | GMX_PME_SOLVE; if (flags & GMX_FORCE_FORCES) { pme_flags |= GMX_PME_CALC_F; } if (flags & GMX_FORCE_VIRIAL) { pme_flags |= GMX_PME_CALC_ENER_VIR; } if (fr->n_tpi > 0) { /* We don't calculate f, but we do want the potential */ pme_flags |= GMX_PME_CALC_POT; } wallcycle_start(wcycle,ewcPMEMESH); status = gmx_pme_do(fr->pmedata, md->start,md->homenr - fr->n_tpi, x,fr->f_novirsum, md->chargeA,md->chargeB, bSB ? boxs : box,cr, DOMAINDECOMP(cr) ? dd_pme_maxshift_x(cr->dd) : 0, DOMAINDECOMP(cr) ? dd_pme_maxshift_y(cr->dd) : 0, nrnb,wcycle, fr->vir_el_recip,fr->ewaldcoeff, &Vlr,lambda,&dvdlambda, pme_flags); *cycles_pme = wallcycle_stop(wcycle,ewcPMEMESH); /* We should try to do as little computation after * this as possible, because parallel PME synchronizes * the nodes, so we want all load imbalance of the rest * of the force calculation to be before the PME call. * DD load balancing is done on the whole time of * the force call (without PME). */ } if (fr->n_tpi > 0) { /* Determine the PME grid energy of the test molecule * with the PME grid potential of the other charges. */ gmx_pme_calc_energy(fr->pmedata,fr->n_tpi, x + md->homenr - fr->n_tpi, md->chargeA + md->homenr - fr->n_tpi, &Vlr); } PRINT_SEPDVDL("PME mesh",Vlr,dvdlambda); } else { /* Energies and virial are obtained later from the PME nodes */ /* but values have to be zeroed out here */ Vlr=0.0; } break; case eelEWALD: Vlr = do_ewald(fplog,FALSE,ir,x,fr->f_novirsum, md->chargeA,md->chargeB, box_size,cr,md->homenr, fr->vir_el_recip,fr->ewaldcoeff, lambda,&dvdlambda,fr->ewald_table); PRINT_SEPDVDL("Ewald long-range",Vlr,dvdlambda); break; default: Vlr = 0; gmx_fatal(FARGS,"No such electrostatics method implemented %s", eel_names[fr->eeltype]); } if (status != 0) { gmx_fatal(FARGS,"Error %d in long range electrostatics routine %s", status,EELTYPE(fr->eeltype)); } enerd->dvdl_lin += dvdlambda; enerd->term[F_COUL_RECIP] = Vlr + Vcorr; if (debug) { fprintf(debug,"Vlr = %g, Vcorr = %g, Vlr_corr = %g\n", Vlr,Vcorr,enerd->term[F_COUL_RECIP]); pr_rvecs(debug,0,"vir_el_recip after corr",fr->vir_el_recip,DIM); pr_rvecs(debug,0,"fshift after LR Corrections",fr->fshift,SHIFTS); } } else { if (EEL_RF(fr->eeltype)) { dvdlambda = 0; if (fr->eeltype != eelRF_NEC) { enerd->term[F_RF_EXCL] = RF_excl_correction(fplog,fr,graph,md,excl,x,f, fr->fshift,&pbc,lambda,&dvdlambda); } enerd->dvdl_lin += dvdlambda; PRINT_SEPDVDL("RF exclusion correction", enerd->term[F_RF_EXCL],dvdlambda); } } where(); debug_gmx(); if (debug) { print_nrnb(debug,nrnb); } debug_gmx(); #ifdef GMX_MPI if (TAKETIME) { t2=MPI_Wtime(); MPI_Barrier(cr->mpi_comm_mygroup); t3=MPI_Wtime(); fr->t_wait += t3-t2; if (fr->timesteps == 11) { fprintf(stderr,"* PP load balancing info: node %d, step %s, rel wait time=%3.0f%% , load string value: %7.2f\n", cr->nodeid, gmx_step_str(fr->timesteps,buf), 100*fr->t_wait/(fr->t_wait+fr->t_fnbf), (fr->t_fnbf+fr->t_wait)/fr->t_fnbf); } fr->timesteps++; } #endif if (debug) { pr_rvecs(debug,0,"fshift after bondeds",fr->fshift,SHIFTS); } GMX_MPE_LOG(ev_force_finish); }
void global_stat(FILE *fplog, gmx_global_stat_t gs, t_commrec *cr, gmx_enerdata_t *enerd, tensor fvir, tensor svir, rvec mu_tot, t_inputrec *inputrec, gmx_ekindata_t *ekind, gmx_constr_t constr, t_vcm *vcm, int nsig, real *sig, gmx_mtop_t *top_global, t_state *state_local, gmx_bool bSumEkinhOld, int flags) /* instead of current system, gmx_booleans for summing virial, kinetic energy, and other terms */ { t_bin *rb; int *itc0, *itc1; int ie = 0, ifv = 0, isv = 0, irmsd = 0, imu = 0; int idedl = 0, idvdll = 0, idvdlnl = 0, iepl = 0, icm = 0, imass = 0, ica = 0, inb = 0; int isig = -1; int icj = -1, ici = -1, icx = -1; int inn[egNR]; real copyenerd[F_NRE]; int nener, j; real *rmsd_data = NULL; double nb; gmx_bool bVV, bTemp, bEner, bPres, bConstrVir, bEkinAveVel, bReadEkin; bVV = EI_VV(inputrec->eI); bTemp = flags & CGLO_TEMPERATURE; bEner = flags & CGLO_ENERGY; bPres = (flags & CGLO_PRESSURE); bConstrVir = (flags & CGLO_CONSTRAINT); bEkinAveVel = (inputrec->eI == eiVV || (inputrec->eI == eiVVAK && bPres)); bReadEkin = (flags & CGLO_READEKIN); rb = gs->rb; itc0 = gs->itc0; itc1 = gs->itc1; reset_bin(rb); /* This routine copies all the data to be summed to one big buffer * using the t_bin struct. */ /* First, we neeed to identify which enerd->term should be communicated. Temperature and pressure terms should only be communicated and summed when they need to be, to avoid repeating the sums and overcounting. */ nener = filter_enerdterm(enerd->term, TRUE, copyenerd, bTemp, bPres, bEner); /* First, the data that needs to be communicated with velocity verlet every time This is just the constraint virial.*/ if (bConstrVir) { isv = add_binr(rb, DIM*DIM, svir[0]); where(); } /* We need the force virial and the kinetic energy for the first time through with velocity verlet */ if (bTemp || !bVV) { if (ekind) { for (j = 0; (j < inputrec->opts.ngtc); j++) { if (bSumEkinhOld) { itc0[j] = add_binr(rb, DIM*DIM, ekind->tcstat[j].ekinh_old[0]); } if (bEkinAveVel && !bReadEkin) { itc1[j] = add_binr(rb, DIM*DIM, ekind->tcstat[j].ekinf[0]); } else if (!bReadEkin) { itc1[j] = add_binr(rb, DIM*DIM, ekind->tcstat[j].ekinh[0]); } } /* these probably need to be put into one of these categories */ where(); idedl = add_binr(rb, 1, &(ekind->dekindl)); where(); ica = add_binr(rb, 1, &(ekind->cosacc.mvcos)); where(); } } where(); if (bPres || !bVV) { ifv = add_binr(rb, DIM*DIM, fvir[0]); } if (bEner) { where(); ie = add_binr(rb, nener, copyenerd); where(); if (constr) { rmsd_data = constr_rmsd_data(constr); if (rmsd_data) { irmsd = add_binr(rb, inputrec->eI == eiSD2 ? 3 : 2, rmsd_data); } } if (!NEED_MUTOT(*inputrec)) { imu = add_binr(rb, DIM, mu_tot); where(); } for (j = 0; (j < egNR); j++) { inn[j] = add_binr(rb, enerd->grpp.nener, enerd->grpp.ener[j]); } where(); if (inputrec->efep != efepNO) { idvdll = add_bind(rb, efptNR, enerd->dvdl_lin); idvdlnl = add_bind(rb, efptNR, enerd->dvdl_nonlin); if (enerd->n_lambda > 0) { iepl = add_bind(rb, enerd->n_lambda, enerd->enerpart_lambda); } } } if (vcm) { icm = add_binr(rb, DIM*vcm->nr, vcm->group_p[0]); where(); imass = add_binr(rb, vcm->nr, vcm->group_mass); where(); if (vcm->mode == ecmANGULAR) { icj = add_binr(rb, DIM*vcm->nr, vcm->group_j[0]); where(); icx = add_binr(rb, DIM*vcm->nr, vcm->group_x[0]); where(); ici = add_binr(rb, DIM*DIM*vcm->nr, vcm->group_i[0][0]); where(); } } if (DOMAINDECOMP(cr)) { nb = cr->dd->nbonded_local; inb = add_bind(rb, 1, &nb); } where(); if (nsig > 0) { isig = add_binr(rb, nsig, sig); } /* Global sum it all */ if (debug) { fprintf(debug, "Summing %d energies\n", rb->maxreal); } sum_bin(rb, cr); where(); /* Extract all the data locally */ if (bConstrVir) { extract_binr(rb, isv, DIM*DIM, svir[0]); } /* We need the force virial and the kinetic energy for the first time through with velocity verlet */ if (bTemp || !bVV) { if (ekind) { for (j = 0; (j < inputrec->opts.ngtc); j++) { if (bSumEkinhOld) { extract_binr(rb, itc0[j], DIM*DIM, ekind->tcstat[j].ekinh_old[0]); } if (bEkinAveVel && !bReadEkin) { extract_binr(rb, itc1[j], DIM*DIM, ekind->tcstat[j].ekinf[0]); } else if (!bReadEkin) { extract_binr(rb, itc1[j], DIM*DIM, ekind->tcstat[j].ekinh[0]); } } extract_binr(rb, idedl, 1, &(ekind->dekindl)); extract_binr(rb, ica, 1, &(ekind->cosacc.mvcos)); where(); } } if (bPres || !bVV) { extract_binr(rb, ifv, DIM*DIM, fvir[0]); } if (bEner) { extract_binr(rb, ie, nener, copyenerd); if (rmsd_data) { extract_binr(rb, irmsd, inputrec->eI == eiSD2 ? 3 : 2, rmsd_data); } if (!NEED_MUTOT(*inputrec)) { extract_binr(rb, imu, DIM, mu_tot); } for (j = 0; (j < egNR); j++) { extract_binr(rb, inn[j], enerd->grpp.nener, enerd->grpp.ener[j]); } if (inputrec->efep != efepNO) { extract_bind(rb, idvdll, efptNR, enerd->dvdl_lin); extract_bind(rb, idvdlnl, efptNR, enerd->dvdl_nonlin); if (enerd->n_lambda > 0) { extract_bind(rb, iepl, enerd->n_lambda, enerd->enerpart_lambda); } } if (DOMAINDECOMP(cr)) { extract_bind(rb, inb, 1, &nb); if ((int)(nb + 0.5) != cr->dd->nbonded_global) { dd_print_missing_interactions(fplog, cr, (int)(nb + 0.5), top_global, state_local); } } where(); filter_enerdterm(copyenerd, FALSE, enerd->term, bTemp, bPres, bEner); } if (vcm) { extract_binr(rb, icm, DIM*vcm->nr, vcm->group_p[0]); where(); extract_binr(rb, imass, vcm->nr, vcm->group_mass); where(); if (vcm->mode == ecmANGULAR) { extract_binr(rb, icj, DIM*vcm->nr, vcm->group_j[0]); where(); extract_binr(rb, icx, DIM*vcm->nr, vcm->group_x[0]); where(); extract_binr(rb, ici, DIM*DIM*vcm->nr, vcm->group_i[0][0]); where(); } } if (nsig > 0) { extract_binr(rb, isig, nsig, sig); } where(); }
gmx_bool pme_load_balance(pme_load_balancing_t pme_lb, t_commrec *cr, FILE *fp_err, FILE *fp_log, t_inputrec *ir, t_state *state, double cycles, interaction_const_t *ic, struct nonbonded_verlet_t *nbv, struct gmx_pme_t ** pmedata, gmx_int64_t step) { gmx_bool OK; pme_setup_t *set; double cycles_fast; char buf[STRLEN], sbuf[22]; real rtab; gmx_bool bUsesSimpleTables = TRUE; if (pme_lb->stage == pme_lb->nstage) { return FALSE; } if (PAR(cr)) { gmx_sumd(1, &cycles, cr); cycles /= cr->nnodes; } set = &pme_lb->setup[pme_lb->cur]; set->count++; rtab = ir->rlistlong + ir->tabext; if (set->count % 2 == 1) { /* Skip the first cycle, because the first step after a switch * is much slower due to allocation and/or caching effects. */ return TRUE; } sprintf(buf, "step %4s: ", gmx_step_str(step, sbuf)); print_grid(fp_err, fp_log, buf, "timed with", set, cycles); if (set->count <= 2) { set->cycles = cycles; } else { if (cycles*PME_LB_ACCEL_TOL < set->cycles && pme_lb->stage == pme_lb->nstage - 1) { /* The performance went up a lot (due to e.g. DD load balancing). * Add a stage, keep the minima, but rescan all setups. */ pme_lb->nstage++; if (debug) { fprintf(debug, "The performance for grid %d %d %d went from %.3f to %.1f M-cycles, this is more than %f\n" "Increased the number stages to %d" " and ignoring the previous performance\n", set->grid[XX], set->grid[YY], set->grid[ZZ], cycles*1e-6, set->cycles*1e-6, PME_LB_ACCEL_TOL, pme_lb->nstage); } } set->cycles = min(set->cycles, cycles); } if (set->cycles < pme_lb->setup[pme_lb->fastest].cycles) { pme_lb->fastest = pme_lb->cur; if (DOMAINDECOMP(cr)) { /* We found a new fastest setting, ensure that with subsequent * shorter cut-off's the dynamic load balancing does not make * the use of the current cut-off impossible. This solution is * a trade-off, as the PME load balancing and DD domain size * load balancing can interact in complex ways. * With the Verlet kernels, DD load imbalance will usually be * mainly due to bonded interaction imbalance, which will often * quickly push the domain boundaries beyond the limit for the * optimal, PME load balanced, cut-off. But it could be that * better overal performance can be obtained with a slightly * shorter cut-off and better DD load balancing. */ change_dd_dlb_cutoff_limit(cr); } } cycles_fast = pme_lb->setup[pme_lb->fastest].cycles; /* Check in stage 0 if we should stop scanning grids. * Stop when the time is more than SLOW_FAC longer than the fastest. */ if (pme_lb->stage == 0 && pme_lb->cur > 0 && cycles > pme_lb->setup[pme_lb->fastest].cycles*PME_LB_SLOW_FAC) { pme_lb->n = pme_lb->cur + 1; /* Done with scanning, go to stage 1 */ switch_to_stage1(pme_lb); } if (pme_lb->stage == 0) { int gridsize_start; gridsize_start = set->grid[XX]*set->grid[YY]*set->grid[ZZ]; do { if (pme_lb->cur+1 < pme_lb->n) { /* We had already generated the next setup */ OK = TRUE; } else { /* Find the next setup */ OK = pme_loadbal_increase_cutoff(pme_lb, ir->pme_order, cr->dd); if (!OK) { pme_lb->elimited = epmelblimPMEGRID; } } if (OK && ir->ePBC != epbcNONE) { OK = (sqr(pme_lb->setup[pme_lb->cur+1].rlistlong) <= max_cutoff2(ir->ePBC, state->box)); if (!OK) { pme_lb->elimited = epmelblimBOX; } } if (OK) { pme_lb->cur++; if (DOMAINDECOMP(cr)) { OK = change_dd_cutoff(cr, state, ir, pme_lb->setup[pme_lb->cur].rlistlong); if (!OK) { /* Failed: do not use this setup */ pme_lb->cur--; pme_lb->elimited = epmelblimDD; } } } if (!OK) { /* We hit the upper limit for the cut-off, * the setup should not go further than cur. */ pme_lb->n = pme_lb->cur + 1; print_loadbal_limited(fp_err, fp_log, step, pme_lb); /* Switch to the next stage */ switch_to_stage1(pme_lb); } } while (OK && !(pme_lb->setup[pme_lb->cur].grid[XX]* pme_lb->setup[pme_lb->cur].grid[YY]* pme_lb->setup[pme_lb->cur].grid[ZZ] < gridsize_start*PME_LB_GRID_SCALE_FAC && pme_lb->setup[pme_lb->cur].grid_efficiency < pme_lb->setup[pme_lb->cur-1].grid_efficiency*PME_LB_GRID_EFFICIENCY_REL_FAC)); } if (pme_lb->stage > 0 && pme_lb->end == 1) { pme_lb->cur = 0; pme_lb->stage = pme_lb->nstage; } else if (pme_lb->stage > 0 && pme_lb->end > 1) { /* If stage = nstage-1: * scan over all setups, rerunning only those setups * which are not much slower than the fastest * else: * use the next setup */ do { pme_lb->cur++; if (pme_lb->cur == pme_lb->end) { pme_lb->stage++; pme_lb->cur = pme_lb->start; } } while (pme_lb->stage == pme_lb->nstage - 1 && pme_lb->setup[pme_lb->cur].count > 0 && pme_lb->setup[pme_lb->cur].cycles > cycles_fast*PME_LB_SLOW_FAC); if (pme_lb->stage == pme_lb->nstage) { /* We are done optimizing, use the fastest setup we found */ pme_lb->cur = pme_lb->fastest; } } if (DOMAINDECOMP(cr) && pme_lb->stage > 0) { OK = change_dd_cutoff(cr, state, ir, pme_lb->setup[pme_lb->cur].rlistlong); if (!OK) { /* Failsafe solution */ if (pme_lb->cur > 1 && pme_lb->stage == pme_lb->nstage) { pme_lb->stage--; } pme_lb->fastest = 0; pme_lb->start = 0; pme_lb->end = pme_lb->cur; pme_lb->cur = pme_lb->start; pme_lb->elimited = epmelblimDD; print_loadbal_limited(fp_err, fp_log, step, pme_lb); } } /* Change the Coulomb cut-off and the PME grid */ set = &pme_lb->setup[pme_lb->cur]; ic->rcoulomb = set->rcut_coulomb; ic->rlist = set->rlist; ic->rlistlong = set->rlistlong; ir->nstcalclr = set->nstcalclr; ic->ewaldcoeff_q = set->ewaldcoeff_q; /* TODO: centralize the code that sets the potentials shifts */ if (ic->coulomb_modifier == eintmodPOTSHIFT) { ic->sh_ewald = gmx_erfc(ic->ewaldcoeff_q*ic->rcoulomb); } if (EVDW_PME(ic->vdwtype)) { /* We have PME for both Coulomb and VdW, set rvdw equal to rcoulomb */ ic->rvdw = set->rcut_coulomb; ic->ewaldcoeff_lj = set->ewaldcoeff_lj; if (ic->vdw_modifier == eintmodPOTSHIFT) { real crc2; ic->dispersion_shift.cpot = -pow(ic->rvdw, -6.0); ic->repulsion_shift.cpot = -pow(ic->rvdw, -12.0); ic->sh_invrc6 = -ic->dispersion_shift.cpot; crc2 = sqr(ic->ewaldcoeff_lj*ic->rvdw); ic->sh_lj_ewald = (exp(-crc2)*(1 + crc2 + 0.5*crc2*crc2) - 1)*pow(ic->rvdw, -6.0); } } bUsesSimpleTables = uses_simple_tables(ir->cutoff_scheme, nbv, 0); nbnxn_gpu_pme_loadbal_update_param(nbv, ic); /* With tMPI + GPUs some ranks may be sharing GPU(s) and therefore * also sharing texture references. To keep the code simple, we don't * treat texture references as shared resources, but this means that * the coulomb_tab texture ref will get updated by multiple threads. * Hence, to ensure that the non-bonded kernels don't start before all * texture binding operations are finished, we need to wait for all ranks * to arrive here before continuing. * * Note that we could omit this barrier if GPUs are not shared (or * texture objects are used), but as this is initialization code, there * is not point in complicating things. */ #ifdef GMX_THREAD_MPI if (PAR(cr) && use_GPU(nbv)) { gmx_barrier(cr); } #endif /* GMX_THREAD_MPI */ /* Usually we won't need the simple tables with GPUs. * But we do with hybrid acceleration and with free energy. * To avoid bugs, we always re-initialize the simple tables here. */ init_interaction_const_tables(NULL, ic, bUsesSimpleTables, rtab); if (cr->duty & DUTY_PME) { if (pme_lb->setup[pme_lb->cur].pmedata == NULL) { /* Generate a new PME data structure, * copying part of the old pointers. */ gmx_pme_reinit(&set->pmedata, cr, pme_lb->setup[0].pmedata, ir, set->grid); } *pmedata = set->pmedata; } else { /* Tell our PME-only node to switch grid */ gmx_pme_send_switchgrid(cr, set->grid, set->ewaldcoeff_q, set->ewaldcoeff_lj); } if (debug) { print_grid(NULL, debug, "", "switched to", set, -1); } if (pme_lb->stage == pme_lb->nstage) { print_grid(fp_err, fp_log, "", "optimal", set, -1); } return TRUE; }
void update_QMMMrec(t_commrec *cr, t_forcerec *fr, rvec x[], t_mdatoms *md, matrix box, gmx_localtop_t *top) { /* updates the coordinates of both QM atoms and MM atoms and stores * them in the QMMMrec. * * NOTE: is NOT yet working if there are no PBC. Also in ns.c, simple * ns needs to be fixed! */ int mm_max=0,mm_nr=0,mm_nr_new,i,j,is,k,shift; t_j_particle *mm_j_particles=NULL,*qm_i_particles=NULL; t_QMMMrec *qr; t_nblist QMMMlist; rvec dx,crd; int *MMatoms; t_QMrec *qm; t_MMrec *mm; t_pbc pbc; int *parallelMMarray=NULL; real c12au,c6au; c6au = (HARTREE2KJ*AVOGADRO*pow(BOHR2NM,6)); c12au = (HARTREE2KJ*AVOGADRO*pow(BOHR2NM,12)); /* every cpu has this array. On every processor we fill this array * with 1's and 0's. 1's indicate the atoms is a QM atom on the * current cpu in a later stage these arrays are all summed. indexes * > 0 indicate the atom is a QM atom. Every node therefore knows * whcih atoms are part of the QM subsystem. */ /* copy some pointers */ qr = fr->qr; mm = qr->mm; QMMMlist = fr->QMMMlist; /* init_pbc(box); needs to be called first, see pbc.h */ set_pbc_dd(&pbc,fr->ePBC,DOMAINDECOMP(cr) ? cr->dd : NULL,FALSE,box); /* only in standard (normal) QMMM we need the neighbouring MM * particles to provide a electric field of point charges for the QM * atoms. */ if(qr->QMMMscheme==eQMMMschemenormal){ /* also implies 1 QM-layer */ /* we NOW create/update a number of QMMMrec entries: * * 1) the shiftQM, containing the shifts of the QM atoms * * 2) the indexMM array, containing the index of the MM atoms * * 3) the shiftMM, containing the shifts of the MM atoms * * 4) the shifted coordinates of the MM atoms * * the shifts are used for computing virial of the QM/MM particles. */ qm = qr->qm[0]; /* in case of normal QMMM, there is only one group */ snew(qm_i_particles,QMMMlist.nri); if(QMMMlist.nri){ qm_i_particles[0].shift = XYZ2IS(0,0,0); for(i=0;i<QMMMlist.nri;i++){ qm_i_particles[i].j = QMMMlist.iinr[i]; if(i){ qm_i_particles[i].shift = pbc_dx_aiuc(&pbc,x[QMMMlist.iinr[0]], x[QMMMlist.iinr[i]],dx); } /* However, since nri >= nrQMatoms, we do a quicksort, and throw * out double, triple, etc. entries later, as we do for the MM * list too. */ /* compute the shift for the MM j-particles with respect to * the QM i-particle and store them. */ crd[0] = IS2X(QMMMlist.shift[i]) + IS2X(qm_i_particles[i].shift); crd[1] = IS2Y(QMMMlist.shift[i]) + IS2Y(qm_i_particles[i].shift); crd[2] = IS2Z(QMMMlist.shift[i]) + IS2Z(qm_i_particles[i].shift); is = XYZ2IS(crd[0],crd[1],crd[2]); for(j=QMMMlist.jindex[i]; j<QMMMlist.jindex[i+1]; j++){ if(mm_nr >= mm_max){ mm_max += 1000; srenew(mm_j_particles,mm_max); } mm_j_particles[mm_nr].j = QMMMlist.jjnr[j]; mm_j_particles[mm_nr].shift = is; mm_nr++; } } /* quicksort QM and MM shift arrays and throw away multiple entries */ qsort(qm_i_particles,QMMMlist.nri, (size_t)sizeof(qm_i_particles[0]), struct_comp); qsort(mm_j_particles,mm_nr, (size_t)sizeof(mm_j_particles[0]), struct_comp); /* remove multiples in the QM shift array, since in init_QMMM() we * went through the atom numbers from 0 to md.nr, the order sorted * here matches the one of QMindex already. */ j=0; for(i=0;i<QMMMlist.nri;i++){ if (i==0 || qm_i_particles[i].j!=qm_i_particles[i-1].j){ qm_i_particles[j++] = qm_i_particles[i]; } } mm_nr_new = 0; if(qm->bTS||qm->bOPT){ /* only remove double entries for the MM array */ for(i=0;i<mm_nr;i++){ if((i==0 || mm_j_particles[i].j!=mm_j_particles[i-1].j) && !md->bQM[mm_j_particles[i].j]){ mm_j_particles[mm_nr_new++] = mm_j_particles[i]; } } } /* we also remove mm atoms that have no charges! * actually this is already done in the ns.c */ else{ for(i=0;i<mm_nr;i++){ if((i==0 || mm_j_particles[i].j!=mm_j_particles[i-1].j) && !md->bQM[mm_j_particles[i].j] && (md->chargeA[mm_j_particles[i].j] || (md->chargeB && md->chargeB[mm_j_particles[i].j]))) { mm_j_particles[mm_nr_new++] = mm_j_particles[i]; } } } mm_nr = mm_nr_new; /* store the data retrieved above into the QMMMrec */ k=0; /* Keep the compiler happy, * shift will always be set in the loop for i=0 */ shift = 0; for(i=0;i<qm->nrQMatoms;i++){ /* not all qm particles might have appeared as i * particles. They might have been part of the same charge * group for instance. */ if (qm->indexQM[i] == qm_i_particles[k].j) { shift = qm_i_particles[k++].shift; } /* use previous shift, assuming they belong the same charge * group anyway, */ qm->shiftQM[i] = shift; } } /* parallel excecution */ if(PAR(cr)){ snew(parallelMMarray,2*(md->nr)); /* only MM particles have a 1 at their atomnumber. The second part * of the array contains the shifts. Thus: * p[i]=1/0 depending on wether atomnumber i is a MM particle in the QM * step or not. p[i+md->nr] is the shift of atomnumber i. */ for(i=0;i<2*(md->nr);i++){ parallelMMarray[i]=0; } for(i=0;i<mm_nr;i++){ parallelMMarray[mm_j_particles[i].j]=1; parallelMMarray[mm_j_particles[i].j+(md->nr)]=mm_j_particles[i].shift; } gmx_sumi(md->nr,parallelMMarray,cr); mm_nr=0; mm_max = 0; for(i=0;i<md->nr;i++){ if(parallelMMarray[i]){ if(mm_nr >= mm_max){ mm_max += 1000; srenew(mm->indexMM,mm_max); srenew(mm->shiftMM,mm_max); } mm->indexMM[mm_nr] = i; mm->shiftMM[mm_nr++]= parallelMMarray[i+md->nr]/parallelMMarray[i]; } } mm->nrMMatoms=mm_nr; free(parallelMMarray); } /* serial execution */ else{ mm->nrMMatoms = mm_nr; srenew(mm->shiftMM,mm_nr); srenew(mm->indexMM,mm_nr); for(i=0;i<mm_nr;i++){ mm->indexMM[i]=mm_j_particles[i].j; mm->shiftMM[i]=mm_j_particles[i].shift; } } /* (re) allocate memory for the MM coordiate array. The QM * coordinate array was already allocated in init_QMMM, and is * only (re)filled in the update_QMMM_coordinates routine */ srenew(mm->xMM,mm->nrMMatoms); /* now we (re) fill the array that contains the MM charges with * the forcefield charges. If requested, these charges will be * scaled by a factor */ srenew(mm->MMcharges,mm->nrMMatoms); for(i=0;i<mm->nrMMatoms;i++){/* no free energy yet */ mm->MMcharges[i]=md->chargeA[mm->indexMM[i]]*mm->scalefactor; } if(qm->bTS||qm->bOPT){ /* store (copy) the c6 and c12 parameters into the MMrec struct */ srenew(mm->c6,mm->nrMMatoms); srenew(mm->c12,mm->nrMMatoms); for (i=0;i<mm->nrMMatoms;i++){ mm->c6[i] = C6(fr->nbfp,top->idef.atnr, md->typeA[mm->indexMM[i]], md->typeA[mm->indexMM[i]])/c6au; mm->c12[i] =C12(fr->nbfp,top->idef.atnr, md->typeA[mm->indexMM[i]], md->typeA[mm->indexMM[i]])/c12au; } punch_QMMM_excl(qr->qm[0],mm,&(top->excls)); } /* the next routine fills the coordinate fields in the QMMM rec of * both the qunatum atoms and the MM atoms, using the shifts * calculated above. */ update_QMMM_coord(x,fr,qr->qm[0],qr->mm); free(qm_i_particles); free(mm_j_particles); } else { /* ONIOM */ /* ????? */ mm->nrMMatoms=0; /* do for each layer */ for (j=0;j<qr->nrQMlayers;j++){ qm = qr->qm[j]; qm->shiftQM[0]=XYZ2IS(0,0,0); for(i=1;i<qm->nrQMatoms;i++){ qm->shiftQM[i] = pbc_dx_aiuc(&pbc,x[qm->indexQM[0]],x[qm->indexQM[i]], dx); } update_QMMM_coord(x,fr,qm,mm); } } } /* update_QMMM_rec */
void check_resource_division_efficiency(const gmx_hw_info_t *hwinfo, const gmx_hw_opt_t *hw_opt, gmx_bool bNtOmpOptionSet, t_commrec *cr, FILE *fplog) { #if defined GMX_OPENMP && defined GMX_MPI int nth_omp_min, nth_omp_max, ngpu; char buf[1000]; #ifdef GMX_THREAD_MPI const char *mpi_option = " (option -ntmpi)"; #else const char *mpi_option = ""; #endif /* This function should be called after thread-MPI (when configured) and * OpenMP have been initialized. Check that here. */ #ifdef GMX_THREAD_MPI GMX_RELEASE_ASSERT(nthreads_omp_faster_default >= nthreads_omp_mpi_ok_max, "Inconsistent OpenMP thread count default values"); GMX_RELEASE_ASSERT(hw_opt->nthreads_tmpi >= 1, "Must have at least one thread-MPI rank"); #endif GMX_RELEASE_ASSERT(gmx_omp_nthreads_get(emntDefault) >= 1, "Must have at least one OpenMP thread"); nth_omp_min = gmx_omp_nthreads_get(emntDefault); nth_omp_max = gmx_omp_nthreads_get(emntDefault); ngpu = hw_opt->gpu_opt.n_dev_use; /* Thread-MPI seems to have a bug with reduce on 1 node, so use a cond. */ if (cr->nnodes + cr->npmenodes > 1) { int count[3], count_max[3]; count[0] = -nth_omp_min; count[1] = nth_omp_max; count[2] = ngpu; MPI_Allreduce(count, count_max, 3, MPI_INT, MPI_MAX, cr->mpi_comm_mysim); /* In case of an inhomogeneous run setup we use the maximum counts */ nth_omp_min = -count_max[0]; nth_omp_max = count_max[1]; ngpu = count_max[2]; } int nthreads_omp_mpi_ok_min; if (ngpu == 0) { nthreads_omp_mpi_ok_min = nthreads_omp_mpi_ok_min_cpu; } else { /* With GPUs we set the minimum number of OpenMP threads to 2 to catch * cases where the user specifies #ranks == #cores. */ nthreads_omp_mpi_ok_min = nthreads_omp_mpi_ok_min_gpu; } if (DOMAINDECOMP(cr) && cr->nnodes > 1) { if (nth_omp_max < nthreads_omp_mpi_ok_min || (!(ngpu > 0 && !gmx_gpu_sharing_supported()) && nth_omp_max > nthreads_omp_mpi_ok_max)) { /* Note that we print target_max here, not ok_max */ sprintf(buf, "Your choice of number of MPI ranks and amount of resources results in using %d OpenMP threads per rank, which is most likely inefficient. The optimum is usually between %d and %d threads per rank.", nth_omp_max, nthreads_omp_mpi_ok_min, nthreads_omp_mpi_target_max); if (bNtOmpOptionSet) { md_print_warn(cr, fplog, "NOTE: %s\n", buf); } else { /* This fatal error, and the one below, is nasty, but it's * probably the only way to ensure that all users don't waste * a lot of resources, since many users don't read logs/stderr. */ gmx_fatal(FARGS, "%s If you want to run with this setup, specify the -ntomp option. But we suggest to change the number of MPI ranks%s.", buf, mpi_option); } } } else { /* No domain decomposition (or only one domain) */ if (!(ngpu > 0 && !gmx_gpu_sharing_supported()) && nth_omp_max > nthreads_omp_faster(hwinfo->cpuid_info, ngpu > 0)) { /* To arrive here, the user/system set #ranks and/or #OMPthreads */ gmx_bool bEnvSet; char buf2[256]; bEnvSet = (getenv("OMP_NUM_THREADS") != NULL); if (bNtOmpOptionSet || bEnvSet) { sprintf(buf2, "You requested %d OpenMP threads", nth_omp_max); } else { sprintf(buf2, "Your choice of %d MPI rank%s and the use of %d total threads %sleads to the use of %d OpenMP threads", cr->nnodes + cr->npmenodes, cr->nnodes + cr->npmenodes == 1 ? "" : "s", hw_opt->nthreads_tot > 0 ? hw_opt->nthreads_tot : hwinfo->nthreads_hw_avail, hwinfo->nphysicalnode > 1 ? "on a node " : "", nth_omp_max); } sprintf(buf, "%s, whereas we expect the optimum to be with more MPI ranks with %d to %d OpenMP threads.", buf2, nthreads_omp_mpi_ok_min, nthreads_omp_mpi_target_max); /* We can not quit with a fatal error when OMP_NUM_THREADS is set * with different values per rank or node, since in that case * the user can not set -ntomp to override the error. */ if (bNtOmpOptionSet || (bEnvSet && nth_omp_min != nth_omp_max)) { md_print_warn(cr, fplog, "NOTE: %s\n", buf); } else { gmx_fatal(FARGS, "%s If you want to run with this many OpenMP threads, specify the -ntomp option. But we suggest to increase the number of MPI ranks%s.", buf, mpi_option); } } } #else /* GMX_OPENMP && GMX_MPI */ /* No OpenMP and/or MPI: it doesn't make much sense to check */ GMX_UNUSED_VALUE(hw_opt); GMX_UNUSED_VALUE(bNtOmpOptionSet); /* Check if we have more than 1 physical core, if detected, * or more than 1 hardware thread if physical cores were not detected. */ #if !(defined GMX_OPENMP) && !(defined GMX_MPI) if ((hwinfo->ncore > 1) || (hwinfo->ncore == 0 && hwinfo->nthreads_hw_avail > 1)) { md_print_warn(cr, fplog, "NOTE: GROMACS was compiled without OpenMP and (thread-)MPI support, can only use a single CPU core\n"); } #else GMX_UNUSED_VALUE(hwinfo); GMX_UNUSED_VALUE(cr); GMX_UNUSED_VALUE(fplog); #endif #endif /* GMX_OPENMP && GMX_MPI */ }
static void init_adir(FILE *log, gmx_shellfc_t shfc, gmx_constr_t constr, t_idef *idef, t_inputrec *ir, t_commrec *cr, int dd_ac1, gmx_int64_t step, t_mdatoms *md, int start, int end, rvec *x_old, rvec *x_init, rvec *x, rvec *f, rvec *acc_dir, gmx_bool bMolPBC, matrix box, real *lambda, real *dvdlambda, t_nrnb *nrnb) { rvec *xnold, *xnew; double w_dt; int gf, ga, gt; real dt, scale; int n, d; unsigned short *ptype; rvec p, dx; if (DOMAINDECOMP(cr)) { n = dd_ac1; } else { n = end - start; } if (n > shfc->adir_nalloc) { shfc->adir_nalloc = over_alloc_dd(n); srenew(shfc->adir_xnold, shfc->adir_nalloc); srenew(shfc->adir_xnew, shfc->adir_nalloc); } xnold = shfc->adir_xnold; xnew = shfc->adir_xnew; ptype = md->ptype; dt = ir->delta_t; /* Does NOT work with freeze or acceleration groups (yet) */ for (n = start; n < end; n++) { w_dt = md->invmass[n]*dt; for (d = 0; d < DIM; d++) { if ((ptype[n] != eptVSite) && (ptype[n] != eptShell)) { xnold[n-start][d] = x[n][d] - (x_init[n][d] - x_old[n][d]); xnew[n-start][d] = 2*x[n][d] - x_old[n][d] + f[n][d]*w_dt*dt; } else { xnold[n-start][d] = x[n][d]; xnew[n-start][d] = x[n][d]; } } } constrain(log, FALSE, FALSE, constr, idef, ir, NULL, cr, step, 0, md, x, xnold-start, NULL, bMolPBC, box, lambda[efptBONDED], &(dvdlambda[efptBONDED]), NULL, NULL, nrnb, econqCoord, FALSE, 0, 0); constrain(log, FALSE, FALSE, constr, idef, ir, NULL, cr, step, 0, md, x, xnew-start, NULL, bMolPBC, box, lambda[efptBONDED], &(dvdlambda[efptBONDED]), NULL, NULL, nrnb, econqCoord, FALSE, 0, 0); for (n = start; n < end; n++) { for (d = 0; d < DIM; d++) { xnew[n-start][d] = -(2*x[n][d]-xnold[n-start][d]-xnew[n-start][d])/sqr(dt) - f[n][d]*md->invmass[n]; } clear_rvec(acc_dir[n]); } /* Project the acceleration on the old bond directions */ constrain(log, FALSE, FALSE, constr, idef, ir, NULL, cr, step, 0, md, x_old, xnew-start, acc_dir, bMolPBC, box, lambda[efptBONDED], &(dvdlambda[efptBONDED]), NULL, NULL, nrnb, econqDeriv_FlexCon, FALSE, 0, 0); }
void do_force_lowlevel(t_forcerec *fr, t_inputrec *ir, t_idef *idef, t_commrec *cr, t_nrnb *nrnb, gmx_wallcycle_t wcycle, t_mdatoms *md, rvec x[], history_t *hist, rvec f[], rvec f_longrange[], gmx_enerdata_t *enerd, t_fcdata *fcd, gmx_localtop_t *top, gmx_genborn_t *born, gmx_bool bBornRadii, matrix box, t_lambda *fepvals, real *lambda, t_graph *graph, t_blocka *excl, rvec mu_tot[], int flags, float *cycles_pme) { int i, j; int donb_flags; gmx_bool bSB; int pme_flags; matrix boxs; rvec box_size; t_pbc pbc; real dvdl_dum[efptNR], dvdl_nb[efptNR]; #ifdef GMX_MPI double t0 = 0.0, t1, t2, t3; /* time measurement for coarse load balancing */ #endif set_pbc(&pbc, fr->ePBC, box); /* reset free energy components */ for (i = 0; i < efptNR; i++) { dvdl_nb[i] = 0; dvdl_dum[i] = 0; } /* Reset box */ for (i = 0; (i < DIM); i++) { box_size[i] = box[i][i]; } debug_gmx(); /* do QMMM first if requested */ if (fr->bQMMM) { enerd->term[F_EQM] = calculate_QMMM(cr, x, f, fr); } /* Call the short range functions all in one go. */ #ifdef GMX_MPI /*#define TAKETIME ((cr->npmenodes) && (fr->timesteps < 12))*/ #define TAKETIME FALSE if (TAKETIME) { MPI_Barrier(cr->mpi_comm_mygroup); t0 = MPI_Wtime(); } #endif if (ir->nwall) { /* foreign lambda component for walls */ real dvdl_walls = do_walls(ir, fr, box, md, x, f, lambda[efptVDW], enerd->grpp.ener[egLJSR], nrnb); enerd->dvdl_lin[efptVDW] += dvdl_walls; } /* If doing GB, reset dvda and calculate the Born radii */ if (ir->implicit_solvent) { wallcycle_sub_start(wcycle, ewcsNONBONDED); for (i = 0; i < born->nr; i++) { fr->dvda[i] = 0; } if (bBornRadii) { calc_gb_rad(cr, fr, ir, top, x, &(fr->gblist), born, md, nrnb); } wallcycle_sub_stop(wcycle, ewcsNONBONDED); } where(); /* We only do non-bonded calculation with group scheme here, the verlet * calls are done from do_force_cutsVERLET(). */ if (fr->cutoff_scheme == ecutsGROUP && (flags & GMX_FORCE_NONBONDED)) { donb_flags = 0; /* Add short-range interactions */ donb_flags |= GMX_NONBONDED_DO_SR; /* Currently all group scheme kernels always calculate (shift-)forces */ if (flags & GMX_FORCE_FORCES) { donb_flags |= GMX_NONBONDED_DO_FORCE; } if (flags & GMX_FORCE_VIRIAL) { donb_flags |= GMX_NONBONDED_DO_SHIFTFORCE; } if (flags & GMX_FORCE_ENERGY) { donb_flags |= GMX_NONBONDED_DO_POTENTIAL; } if (flags & GMX_FORCE_DO_LR) { donb_flags |= GMX_NONBONDED_DO_LR; } wallcycle_sub_start(wcycle, ewcsNONBONDED); do_nonbonded(fr, x, f, f_longrange, md, excl, &enerd->grpp, nrnb, lambda, dvdl_nb, -1, -1, donb_flags); /* If we do foreign lambda and we have soft-core interactions * we have to recalculate the (non-linear) energies contributions. */ if (fepvals->n_lambda > 0 && (flags & GMX_FORCE_DHDL) && fepvals->sc_alpha != 0) { for (i = 0; i < enerd->n_lambda; i++) { real lam_i[efptNR]; for (j = 0; j < efptNR; j++) { lam_i[j] = (i == 0 ? lambda[j] : fepvals->all_lambda[j][i-1]); } reset_foreign_enerdata(enerd); do_nonbonded(fr, x, f, f_longrange, md, excl, &(enerd->foreign_grpp), nrnb, lam_i, dvdl_dum, -1, -1, (donb_flags & ~GMX_NONBONDED_DO_FORCE) | GMX_NONBONDED_DO_FOREIGNLAMBDA); sum_epot(&(enerd->foreign_grpp), enerd->foreign_term); enerd->enerpart_lambda[i] += enerd->foreign_term[F_EPOT]; } } wallcycle_sub_stop(wcycle, ewcsNONBONDED); where(); } /* If we are doing GB, calculate bonded forces and apply corrections * to the solvation forces */ /* MRS: Eventually, many need to include free energy contribution here! */ if (ir->implicit_solvent) { wallcycle_sub_start(wcycle, ewcsLISTED); calc_gb_forces(cr, md, born, top, x, f, fr, idef, ir->gb_algorithm, ir->sa_algorithm, nrnb, &pbc, graph, enerd); wallcycle_sub_stop(wcycle, ewcsLISTED); } #ifdef GMX_MPI if (TAKETIME) { t1 = MPI_Wtime(); fr->t_fnbf += t1-t0; } #endif if (fepvals->sc_alpha != 0) { enerd->dvdl_nonlin[efptVDW] += dvdl_nb[efptVDW]; } else { enerd->dvdl_lin[efptVDW] += dvdl_nb[efptVDW]; } if (fepvals->sc_alpha != 0) /* even though coulomb part is linear, we already added it, beacuse we need to go through the vdw calculation anyway */ { enerd->dvdl_nonlin[efptCOUL] += dvdl_nb[efptCOUL]; } else { enerd->dvdl_lin[efptCOUL] += dvdl_nb[efptCOUL]; } debug_gmx(); if (debug) { pr_rvecs(debug, 0, "fshift after SR", fr->fshift, SHIFTS); } /* Shift the coordinates. Must be done before listed forces and PPPM, * but is also necessary for SHAKE and update, therefore it can NOT * go when no listed forces have to be evaluated. * * The shifting and PBC code is deliberately not timed, since with * the Verlet scheme it only takes non-zero time with triclinic * boxes, and even then the time is around a factor of 100 less * than the next smallest counter. */ /* Here sometimes we would not need to shift with NBFonly, * but we do so anyhow for consistency of the returned coordinates. */ if (graph) { shift_self(graph, box, x); if (TRICLINIC(box)) { inc_nrnb(nrnb, eNR_SHIFTX, 2*graph->nnodes); } else { inc_nrnb(nrnb, eNR_SHIFTX, graph->nnodes); } } /* Check whether we need to do listed interactions or correct for exclusions */ if (fr->bMolPBC && ((flags & GMX_FORCE_LISTED) || EEL_RF(fr->eeltype) || EEL_FULL(fr->eeltype) || EVDW_PME(fr->vdwtype))) { /* TODO There are no electrostatics methods that require this transformation, when using the Verlet scheme, so update the above conditional. */ /* Since all atoms are in the rectangular or triclinic unit-cell, * only single box vector shifts (2 in x) are required. */ set_pbc_dd(&pbc, fr->ePBC, cr->dd, TRUE, box); } debug_gmx(); do_force_listed(wcycle, box, ir->fepvals, cr->ms, idef, (const rvec *) x, hist, f, fr, &pbc, graph, enerd, nrnb, lambda, md, fcd, DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL, flags); where(); *cycles_pme = 0; clear_mat(fr->vir_el_recip); clear_mat(fr->vir_lj_recip); /* Do long-range electrostatics and/or LJ-PME, including related short-range * corrections. */ if (EEL_FULL(fr->eeltype) || EVDW_PME(fr->vdwtype)) { int status = 0; real Vlr_q = 0, Vlr_lj = 0, Vcorr_q = 0, Vcorr_lj = 0; real dvdl_long_range_q = 0, dvdl_long_range_lj = 0; bSB = (ir->nwall == 2); if (bSB) { copy_mat(box, boxs); svmul(ir->wall_ewald_zfac, boxs[ZZ], boxs[ZZ]); box_size[ZZ] *= ir->wall_ewald_zfac; } if (EEL_PME_EWALD(fr->eeltype) || EVDW_PME(fr->vdwtype)) { real dvdl_long_range_correction_q = 0; real dvdl_long_range_correction_lj = 0; /* With the Verlet scheme exclusion forces are calculated * in the non-bonded kernel. */ /* The TPI molecule does not have exclusions with the rest * of the system and no intra-molecular PME grid * contributions will be calculated in * gmx_pme_calc_energy. */ if ((ir->cutoff_scheme == ecutsGROUP && fr->n_tpi == 0) || ir->ewald_geometry != eewg3D || ir->epsilon_surface != 0) { int nthreads, t; wallcycle_sub_start(wcycle, ewcsEWALD_CORRECTION); if (fr->n_tpi > 0) { gmx_fatal(FARGS, "TPI with PME currently only works in a 3D geometry with tin-foil boundary conditions"); } nthreads = gmx_omp_nthreads_get(emntBonded); #pragma omp parallel for num_threads(nthreads) schedule(static) for (t = 0; t < nthreads; t++) { int i; rvec *fnv; tensor *vir_q, *vir_lj; real *Vcorrt_q, *Vcorrt_lj, *dvdlt_q, *dvdlt_lj; if (t == 0) { fnv = fr->f_novirsum; vir_q = &fr->vir_el_recip; vir_lj = &fr->vir_lj_recip; Vcorrt_q = &Vcorr_q; Vcorrt_lj = &Vcorr_lj; dvdlt_q = &dvdl_long_range_correction_q; dvdlt_lj = &dvdl_long_range_correction_lj; } else { fnv = fr->f_t[t].f; vir_q = &fr->f_t[t].vir_q; vir_lj = &fr->f_t[t].vir_lj; Vcorrt_q = &fr->f_t[t].Vcorr_q; Vcorrt_lj = &fr->f_t[t].Vcorr_lj; dvdlt_q = &fr->f_t[t].dvdl[efptCOUL]; dvdlt_lj = &fr->f_t[t].dvdl[efptVDW]; for (i = 0; i < fr->natoms_force; i++) { clear_rvec(fnv[i]); } clear_mat(*vir_q); clear_mat(*vir_lj); } *dvdlt_q = 0; *dvdlt_lj = 0; ewald_LRcorrection(fr->excl_load[t], fr->excl_load[t+1], cr, t, fr, md->chargeA, md->chargeB, md->sqrt_c6A, md->sqrt_c6B, md->sigmaA, md->sigmaB, md->sigma3A, md->sigma3B, md->nChargePerturbed || md->nTypePerturbed, ir->cutoff_scheme != ecutsVERLET, excl, x, bSB ? boxs : box, mu_tot, ir->ewald_geometry, ir->epsilon_surface, fnv, *vir_q, *vir_lj, Vcorrt_q, Vcorrt_lj, lambda[efptCOUL], lambda[efptVDW], dvdlt_q, dvdlt_lj); } if (nthreads > 1) { reduce_thread_forces(fr->natoms_force, fr->f_novirsum, fr->vir_el_recip, fr->vir_lj_recip, &Vcorr_q, &Vcorr_lj, &dvdl_long_range_correction_q, &dvdl_long_range_correction_lj, nthreads, fr->f_t); } wallcycle_sub_stop(wcycle, ewcsEWALD_CORRECTION); } if (EEL_PME_EWALD(fr->eeltype) && fr->n_tpi == 0) { /* This is not in a subcounter because it takes a negligible and constant-sized amount of time */ Vcorr_q += ewald_charge_correction(cr, fr, lambda[efptCOUL], box, &dvdl_long_range_correction_q, fr->vir_el_recip); } enerd->dvdl_lin[efptCOUL] += dvdl_long_range_correction_q; enerd->dvdl_lin[efptVDW] += dvdl_long_range_correction_lj; if ((EEL_PME(fr->eeltype) || EVDW_PME(fr->vdwtype)) && (cr->duty & DUTY_PME)) { /* Do reciprocal PME for Coulomb and/or LJ. */ assert(fr->n_tpi >= 0); if (fr->n_tpi == 0 || (flags & GMX_FORCE_STATECHANGED)) { pme_flags = GMX_PME_SPREAD | GMX_PME_SOLVE; if (EEL_PME(fr->eeltype)) { pme_flags |= GMX_PME_DO_COULOMB; } if (EVDW_PME(fr->vdwtype)) { pme_flags |= GMX_PME_DO_LJ; } if (flags & GMX_FORCE_FORCES) { pme_flags |= GMX_PME_CALC_F; } if (flags & GMX_FORCE_VIRIAL) { pme_flags |= GMX_PME_CALC_ENER_VIR; } if (fr->n_tpi > 0) { /* We don't calculate f, but we do want the potential */ pme_flags |= GMX_PME_CALC_POT; } wallcycle_start(wcycle, ewcPMEMESH); status = gmx_pme_do(fr->pmedata, 0, md->homenr - fr->n_tpi, x, fr->f_novirsum, md->chargeA, md->chargeB, md->sqrt_c6A, md->sqrt_c6B, md->sigmaA, md->sigmaB, bSB ? boxs : box, cr, DOMAINDECOMP(cr) ? dd_pme_maxshift_x(cr->dd) : 0, DOMAINDECOMP(cr) ? dd_pme_maxshift_y(cr->dd) : 0, nrnb, wcycle, fr->vir_el_recip, fr->ewaldcoeff_q, fr->vir_lj_recip, fr->ewaldcoeff_lj, &Vlr_q, &Vlr_lj, lambda[efptCOUL], lambda[efptVDW], &dvdl_long_range_q, &dvdl_long_range_lj, pme_flags); *cycles_pme = wallcycle_stop(wcycle, ewcPMEMESH); if (status != 0) { gmx_fatal(FARGS, "Error %d in reciprocal PME routine", status); } /* We should try to do as little computation after * this as possible, because parallel PME synchronizes * the nodes, so we want all load imbalance of the * rest of the force calculation to be before the PME * call. DD load balancing is done on the whole time * of the force call (without PME). */ } if (fr->n_tpi > 0) { if (EVDW_PME(ir->vdwtype)) { gmx_fatal(FARGS, "Test particle insertion not implemented with LJ-PME"); } /* Determine the PME grid energy of the test molecule * with the PME grid potential of the other charges. */ gmx_pme_calc_energy(fr->pmedata, fr->n_tpi, x + md->homenr - fr->n_tpi, md->chargeA + md->homenr - fr->n_tpi, &Vlr_q); } } } if (!EEL_PME(fr->eeltype) && EEL_PME_EWALD(fr->eeltype)) { Vlr_q = do_ewald(ir, x, fr->f_novirsum, md->chargeA, md->chargeB, box_size, cr, md->homenr, fr->vir_el_recip, fr->ewaldcoeff_q, lambda[efptCOUL], &dvdl_long_range_q, fr->ewald_table); } /* Note that with separate PME nodes we get the real energies later */ enerd->dvdl_lin[efptCOUL] += dvdl_long_range_q; enerd->dvdl_lin[efptVDW] += dvdl_long_range_lj; enerd->term[F_COUL_RECIP] = Vlr_q + Vcorr_q; enerd->term[F_LJ_RECIP] = Vlr_lj + Vcorr_lj; if (debug) { fprintf(debug, "Vlr_q = %g, Vcorr_q = %g, Vlr_corr_q = %g\n", Vlr_q, Vcorr_q, enerd->term[F_COUL_RECIP]); pr_rvecs(debug, 0, "vir_el_recip after corr", fr->vir_el_recip, DIM); pr_rvecs(debug, 0, "fshift after LR Corrections", fr->fshift, SHIFTS); fprintf(debug, "Vlr_lj: %g, Vcorr_lj = %g, Vlr_corr_lj = %g\n", Vlr_lj, Vcorr_lj, enerd->term[F_LJ_RECIP]); pr_rvecs(debug, 0, "vir_lj_recip after corr", fr->vir_lj_recip, DIM); } } else { /* Is there a reaction-field exclusion correction needed? */ if (EEL_RF(fr->eeltype) && eelRF_NEC != fr->eeltype) { /* With the Verlet scheme, exclusion forces are calculated * in the non-bonded kernel. */ if (ir->cutoff_scheme != ecutsVERLET) { real dvdl_rf_excl = 0; enerd->term[F_RF_EXCL] = RF_excl_correction(fr, graph, md, excl, x, f, fr->fshift, &pbc, lambda[efptCOUL], &dvdl_rf_excl); enerd->dvdl_lin[efptCOUL] += dvdl_rf_excl; } } } where(); debug_gmx(); if (debug) { print_nrnb(debug, nrnb); } debug_gmx(); #ifdef GMX_MPI if (TAKETIME) { t2 = MPI_Wtime(); MPI_Barrier(cr->mpi_comm_mygroup); t3 = MPI_Wtime(); fr->t_wait += t3-t2; if (fr->timesteps == 11) { char buf[22]; fprintf(stderr, "* PP load balancing info: rank %d, step %s, rel wait time=%3.0f%% , load string value: %7.2f\n", cr->nodeid, gmx_step_str(fr->timesteps, buf), 100*fr->t_wait/(fr->t_wait+fr->t_fnbf), (fr->t_fnbf+fr->t_wait)/fr->t_fnbf); } fr->timesteps++; } #endif if (debug) { pr_rvecs(debug, 0, "fshift after bondeds", fr->fshift, SHIFTS); } }
gmx_bool pme_load_balance(pme_load_balancing_t pme_lb, t_commrec *cr, FILE *fp_err, FILE *fp_log, t_inputrec *ir, t_state *state, double cycles, interaction_const_t *ic, nonbonded_verlet_t *nbv, gmx_pme_t *pmedata, gmx_large_int_t step) { gmx_bool OK; pme_setup_t *set; double cycles_fast; char buf[STRLEN], sbuf[22]; real rtab; gmx_bool bUsesSimpleTables = TRUE; if (pme_lb->stage == pme_lb->nstage) { return FALSE; } if (PAR(cr)) { gmx_sumd(1, &cycles, cr); cycles /= cr->nnodes; } set = &pme_lb->setup[pme_lb->cur]; set->count++; rtab = ir->rlistlong + ir->tabext; if (set->count % 2 == 1) { /* Skip the first cycle, because the first step after a switch * is much slower due to allocation and/or caching effects. */ return TRUE; } sprintf(buf, "step %4s: ", gmx_step_str(step, sbuf)); print_grid(fp_err, fp_log, buf, "timed with", set, cycles); if (set->count <= 2) { set->cycles = cycles; } else { if (cycles*PME_LB_ACCEL_TOL < set->cycles && pme_lb->stage == pme_lb->nstage - 1) { /* The performance went up a lot (due to e.g. DD load balancing). * Add a stage, keep the minima, but rescan all setups. */ pme_lb->nstage++; if (debug) { fprintf(debug, "The performance for grid %d %d %d went from %.3f to %.1f M-cycles, this is more than %f\n" "Increased the number stages to %d" " and ignoring the previous performance\n", set->grid[XX], set->grid[YY], set->grid[ZZ], cycles*1e-6, set->cycles*1e-6, PME_LB_ACCEL_TOL, pme_lb->nstage); } } set->cycles = min(set->cycles, cycles); } if (set->cycles < pme_lb->setup[pme_lb->fastest].cycles) { pme_lb->fastest = pme_lb->cur; if (DOMAINDECOMP(cr)) { /* We found a new fastest setting, ensure that with subsequent * shorter cut-off's the dynamic load balancing does not make * the use of the current cut-off impossible. This solution is * a trade-off, as the PME load balancing and DD domain size * load balancing can interact in complex ways. * With the Verlet kernels, DD load imbalance will usually be * mainly due to bonded interaction imbalance, which will often * quickly push the domain boundaries beyond the limit for the * optimal, PME load balanced, cut-off. But it could be that * better overal performance can be obtained with a slightly * shorter cut-off and better DD load balancing. */ change_dd_dlb_cutoff_limit(cr); } } cycles_fast = pme_lb->setup[pme_lb->fastest].cycles; /* Check in stage 0 if we should stop scanning grids. * Stop when the time is more than SLOW_FAC longer than the fastest. */ if (pme_lb->stage == 0 && pme_lb->cur > 0 && cycles > pme_lb->setup[pme_lb->fastest].cycles*PME_LB_SLOW_FAC) { pme_lb->n = pme_lb->cur + 1; /* Done with scanning, go to stage 1 */ switch_to_stage1(pme_lb); } if (pme_lb->stage == 0) { int gridsize_start; gridsize_start = set->grid[XX]*set->grid[YY]*set->grid[ZZ]; do { if (pme_lb->cur+1 < pme_lb->n) { /* We had already generated the next setup */ OK = TRUE; } else { /* Find the next setup */ OK = pme_loadbal_increase_cutoff(pme_lb, ir->pme_order); } if (OK && ir->ePBC != epbcNONE) { OK = (sqr(pme_lb->setup[pme_lb->cur+1].rlistlong) <= max_cutoff2(ir->ePBC, state->box)); if (!OK) { pme_lb->elimited = epmelblimBOX; } } if (OK) { pme_lb->cur++; if (DOMAINDECOMP(cr)) { OK = change_dd_cutoff(cr, state, ir, pme_lb->setup[pme_lb->cur].rlistlong); if (!OK) { /* Failed: do not use this setup */ pme_lb->cur--; pme_lb->elimited = epmelblimDD; } } } if (!OK) { /* We hit the upper limit for the cut-off, * the setup should not go further than cur. */ pme_lb->n = pme_lb->cur + 1; print_loadbal_limited(fp_err, fp_log, step, pme_lb); /* Switch to the next stage */ switch_to_stage1(pme_lb); } } while (OK && !(pme_lb->setup[pme_lb->cur].grid[XX]* pme_lb->setup[pme_lb->cur].grid[YY]* pme_lb->setup[pme_lb->cur].grid[ZZ] < gridsize_start*PME_LB_GRID_SCALE_FAC && pme_lb->setup[pme_lb->cur].grid_efficiency < pme_lb->setup[pme_lb->cur-1].grid_efficiency*PME_LB_GRID_EFFICIENCY_REL_FAC)); } if (pme_lb->stage > 0 && pme_lb->end == 1) { pme_lb->cur = 0; pme_lb->stage = pme_lb->nstage; } else if (pme_lb->stage > 0 && pme_lb->end > 1) { /* If stage = nstage-1: * scan over all setups, rerunning only those setups * which are not much slower than the fastest * else: * use the next setup */ do { pme_lb->cur++; if (pme_lb->cur == pme_lb->end) { pme_lb->stage++; pme_lb->cur = pme_lb->start; } } while (pme_lb->stage == pme_lb->nstage - 1 && pme_lb->setup[pme_lb->cur].count > 0 && pme_lb->setup[pme_lb->cur].cycles > cycles_fast*PME_LB_SLOW_FAC); if (pme_lb->stage == pme_lb->nstage) { /* We are done optimizing, use the fastest setup we found */ pme_lb->cur = pme_lb->fastest; } } if (DOMAINDECOMP(cr) && pme_lb->stage > 0) { OK = change_dd_cutoff(cr, state, ir, pme_lb->setup[pme_lb->cur].rlistlong); if (!OK) { /* Failsafe solution */ if (pme_lb->cur > 1 && pme_lb->stage == pme_lb->nstage) { pme_lb->stage--; } pme_lb->fastest = 0; pme_lb->start = 0; pme_lb->end = pme_lb->cur; pme_lb->cur = pme_lb->start; pme_lb->elimited = epmelblimDD; print_loadbal_limited(fp_err, fp_log, step, pme_lb); } } /* Change the Coulomb cut-off and the PME grid */ set = &pme_lb->setup[pme_lb->cur]; ic->rcoulomb = set->rcut_coulomb; ic->rlist = set->rlist; ic->rlistlong = set->rlistlong; ir->nstcalclr = set->nstcalclr; ic->ewaldcoeff = set->ewaldcoeff; bUsesSimpleTables = uses_simple_tables(ir->cutoff_scheme, nbv, 0); if (pme_lb->cutoff_scheme == ecutsVERLET && nbv->grp[0].kernel_type == nbnxnk8x8x8_CUDA) { nbnxn_cuda_pme_loadbal_update_param(nbv->cu_nbv, ic); } else { init_interaction_const_tables(NULL, ic, bUsesSimpleTables, rtab); } if (pme_lb->cutoff_scheme == ecutsVERLET && nbv->ngrp > 1) { init_interaction_const_tables(NULL, ic, bUsesSimpleTables, rtab); } if (cr->duty & DUTY_PME) { if (pme_lb->setup[pme_lb->cur].pmedata == NULL) { /* Generate a new PME data structure, * copying part of the old pointers. */ gmx_pme_reinit(&set->pmedata, cr, pme_lb->setup[0].pmedata, ir, set->grid); } *pmedata = set->pmedata; } else { /* Tell our PME-only node to switch grid */ gmx_pme_send_switchgrid(cr, set->grid, set->ewaldcoeff); } if (debug) { print_grid(NULL, debug, "", "switched to", set, -1); } if (pme_lb->stage == pme_lb->nstage) { print_grid(fp_err, fp_log, "", "optimal", set, -1); } return TRUE; }
void set_lincs(t_idef *idef,t_mdatoms *md, gmx_bool bDynamics,t_commrec *cr, struct gmx_lincsdata *li) { int start,natoms,nflexcon; t_blocka at2con; t_iatom *iatom; int i,k,ncc_alloc,ni,con,nconnect,concon; int type,a1,a2; real lenA=0,lenB; gmx_bool bLocal; li->nc = 0; li->ncc = 0; /* This is the local topology, so there are only F_CONSTR constraints */ if (idef->il[F_CONSTR].nr == 0) { /* There are no constraints, * we do not need to fill any data structures. */ return; } if (debug) { fprintf(debug,"Building the LINCS connectivity\n"); } if (DOMAINDECOMP(cr)) { if (cr->dd->constraints) { dd_get_constraint_range(cr->dd,&start,&natoms); } else { natoms = cr->dd->nat_home; } start = 0; } else if(PARTDECOMP(cr)) { pd_get_constraint_range(cr->pd,&start,&natoms); } else { start = md->start; natoms = md->homenr; } at2con = make_at2con(start,natoms,idef->il,idef->iparams,bDynamics, &nflexcon); if (idef->il[F_CONSTR].nr/3 > li->nc_alloc || li->nc_alloc == 0) { li->nc_alloc = over_alloc_dd(idef->il[F_CONSTR].nr/3); srenew(li->bllen0,li->nc_alloc); srenew(li->ddist,li->nc_alloc); srenew(li->bla,2*li->nc_alloc); srenew(li->blc,li->nc_alloc); srenew(li->blc1,li->nc_alloc); srenew(li->blnr,li->nc_alloc+1); srenew(li->bllen,li->nc_alloc); srenew(li->tmpv,li->nc_alloc); srenew(li->tmp1,li->nc_alloc); srenew(li->tmp2,li->nc_alloc); srenew(li->tmp3,li->nc_alloc); srenew(li->lambda,li->nc_alloc); if (li->ncg_triangle > 0) { /* This is allocating too much, but it is difficult to improve */ srenew(li->triangle,li->nc_alloc); srenew(li->tri_bits,li->nc_alloc); } } iatom = idef->il[F_CONSTR].iatoms; ncc_alloc = li->ncc_alloc; li->blnr[0] = 0; ni = idef->il[F_CONSTR].nr/3; con = 0; nconnect = 0; li->blnr[con] = nconnect; for(i=0; i<ni; i++) { bLocal = TRUE; type = iatom[3*i]; a1 = iatom[3*i+1]; a2 = iatom[3*i+2]; lenA = idef->iparams[type].constr.dA; lenB = idef->iparams[type].constr.dB; /* Skip the flexible constraints when not doing dynamics */ if (bDynamics || lenA!=0 || lenB!=0) { li->bllen0[con] = lenA; li->ddist[con] = lenB - lenA; /* Set the length to the topology A length */ li->bllen[con] = li->bllen0[con]; li->bla[2*con] = a1; li->bla[2*con+1] = a2; /* Construct the constraint connection matrix blbnb */ for(k=at2con.index[a1-start]; k<at2con.index[a1-start+1]; k++) { concon = at2con.a[k]; if (concon != i) { if (nconnect >= ncc_alloc) { ncc_alloc = over_alloc_small(nconnect+1); srenew(li->blbnb,ncc_alloc); } li->blbnb[nconnect++] = concon; } } for(k=at2con.index[a2-start]; k<at2con.index[a2-start+1]; k++) { concon = at2con.a[k]; if (concon != i) { if (nconnect+1 > ncc_alloc) { ncc_alloc = over_alloc_small(nconnect+1); srenew(li->blbnb,ncc_alloc); } li->blbnb[nconnect++] = concon; } } li->blnr[con+1] = nconnect; if (cr->dd == NULL) { /* Order the blbnb matrix to optimize memory access */ qsort(&(li->blbnb[li->blnr[con]]),li->blnr[con+1]-li->blnr[con], sizeof(li->blbnb[0]),int_comp); } /* Increase the constraint count */ con++; } } done_blocka(&at2con); /* This is the real number of constraints, * without dynamics the flexible constraints are not present. */ li->nc = con; li->ncc = li->blnr[con]; if (cr->dd == NULL) { /* Since the matrix is static, we can free some memory */ ncc_alloc = li->ncc; srenew(li->blbnb,ncc_alloc); } if (ncc_alloc > li->ncc_alloc) { li->ncc_alloc = ncc_alloc; srenew(li->blmf,li->ncc_alloc); srenew(li->blmf1,li->ncc_alloc); srenew(li->tmpncc,li->ncc_alloc); } if (debug) { fprintf(debug,"Number of constraints is %d, couplings %d\n", li->nc,li->ncc); } set_lincs_matrix(li,md->invmass,md->lambda); }
static void make_cyl_refgrps(t_commrec *cr, struct pull_t *pull, t_mdatoms *md, t_pbc *pbc, double t, rvec *x) { /* The size and stride per coord for the reduction buffer */ const int stride = 9; int c, i, ii, m, start, end; rvec g_x, dx, dir; double inv_cyl_r2; pull_comm_t *comm; gmx_ga2la_t *ga2la = NULL; comm = &pull->comm; if (comm->dbuf_cyl == NULL) { snew(comm->dbuf_cyl, pull->ncoord*stride); } if (cr && DOMAINDECOMP(cr)) { ga2la = cr->dd->ga2la; } start = 0; end = md->homenr; inv_cyl_r2 = 1.0/gmx::square(pull->params.cylinder_r); /* loop over all groups to make a reference group for each*/ for (c = 0; c < pull->ncoord; c++) { pull_coord_work_t *pcrd; double sum_a, wmass, wwmass; dvec radf_fac0, radf_fac1; pcrd = &pull->coord[c]; sum_a = 0; wmass = 0; wwmass = 0; clear_dvec(radf_fac0); clear_dvec(radf_fac1); if (pcrd->params.eGeom == epullgCYL) { pull_group_work_t *pref, *pgrp, *pdyna; /* pref will be the same group for all pull coordinates */ pref = &pull->group[pcrd->params.group[0]]; pgrp = &pull->group[pcrd->params.group[1]]; pdyna = &pull->dyna[c]; copy_rvec(pcrd->vec, dir); pdyna->nat_loc = 0; /* We calculate distances with respect to the reference location * of this cylinder group (g_x), which we already have now since * we reduced the other group COM over the ranks. This resolves * any PBC issues and we don't need to use a PBC-atom here. */ if (pcrd->params.rate != 0) { /* With rate=0, value_ref is set initially */ pcrd->value_ref = pcrd->params.init + pcrd->params.rate*t; } for (m = 0; m < DIM; m++) { g_x[m] = pgrp->x[m] - pcrd->vec[m]*pcrd->value_ref; } /* loop over all atoms in the main ref group */ for (i = 0; i < pref->params.nat; i++) { ii = pref->params.ind[i]; if (ga2la) { if (!ga2la_get_home(ga2la, pref->params.ind[i], &ii)) { ii = -1; } } if (ii >= start && ii < end) { double dr2, dr2_rel, inp; dvec dr; pbc_dx_aiuc(pbc, x[ii], g_x, dx); inp = iprod(dir, dx); dr2 = 0; for (m = 0; m < DIM; m++) { /* Determine the radial components */ dr[m] = dx[m] - inp*dir[m]; dr2 += dr[m]*dr[m]; } dr2_rel = dr2*inv_cyl_r2; if (dr2_rel < 1) { double mass, weight, dweight_r; dvec mdw; /* add to index, to sum of COM, to weight array */ if (pdyna->nat_loc >= pdyna->nalloc_loc) { pdyna->nalloc_loc = over_alloc_large(pdyna->nat_loc+1); srenew(pdyna->ind_loc, pdyna->nalloc_loc); srenew(pdyna->weight_loc, pdyna->nalloc_loc); srenew(pdyna->mdw, pdyna->nalloc_loc); srenew(pdyna->dv, pdyna->nalloc_loc); } pdyna->ind_loc[pdyna->nat_loc] = ii; mass = md->massT[ii]; /* The radial weight function is 1-2x^2+x^4, * where x=r/cylinder_r. Since this function depends * on the radial component, we also get radial forces * on both groups. */ weight = 1 + (-2 + dr2_rel)*dr2_rel; dweight_r = (-4 + 4*dr2_rel)*inv_cyl_r2; pdyna->weight_loc[pdyna->nat_loc] = weight; sum_a += mass*weight*inp; wmass += mass*weight; wwmass += mass*weight*weight; dsvmul(mass*dweight_r, dr, mdw); copy_dvec(mdw, pdyna->mdw[pdyna->nat_loc]); /* Currently we only have the axial component of the * distance (inp) up to an unkown offset. We add this * offset after the reduction needs to determine the * COM of the cylinder group. */ pdyna->dv[pdyna->nat_loc] = inp; for (m = 0; m < DIM; m++) { radf_fac0[m] += mdw[m]; radf_fac1[m] += mdw[m]*inp; } pdyna->nat_loc++; } } } } comm->dbuf_cyl[c*stride+0] = wmass; comm->dbuf_cyl[c*stride+1] = wwmass; comm->dbuf_cyl[c*stride+2] = sum_a; comm->dbuf_cyl[c*stride+3] = radf_fac0[XX]; comm->dbuf_cyl[c*stride+4] = radf_fac0[YY]; comm->dbuf_cyl[c*stride+5] = radf_fac0[ZZ]; comm->dbuf_cyl[c*stride+6] = radf_fac1[XX]; comm->dbuf_cyl[c*stride+7] = radf_fac1[YY]; comm->dbuf_cyl[c*stride+8] = radf_fac1[ZZ]; } if (cr != NULL && PAR(cr)) { /* Sum the contributions over the ranks */ pull_reduce_double(cr, comm, pull->ncoord*stride, comm->dbuf_cyl); } for (c = 0; c < pull->ncoord; c++) { pull_coord_work_t *pcrd; pcrd = &pull->coord[c]; if (pcrd->params.eGeom == epullgCYL) { pull_group_work_t *pdyna, *pgrp; double wmass, wwmass, dist; pdyna = &pull->dyna[c]; pgrp = &pull->group[pcrd->params.group[1]]; wmass = comm->dbuf_cyl[c*stride+0]; wwmass = comm->dbuf_cyl[c*stride+1]; pdyna->mwscale = 1.0/wmass; /* Cylinder pulling can't be used with constraints, but we set * wscale and invtm anyhow, in case someone would like to use them. */ pdyna->wscale = wmass/wwmass; pdyna->invtm = wwmass/(wmass*wmass); /* We store the deviation of the COM from the reference location * used above, since we need it when we apply the radial forces * to the atoms in the cylinder group. */ pcrd->cyl_dev = 0; for (m = 0; m < DIM; m++) { g_x[m] = pgrp->x[m] - pcrd->vec[m]*pcrd->value_ref; dist = -pcrd->vec[m]*comm->dbuf_cyl[c*stride+2]*pdyna->mwscale; pdyna->x[m] = g_x[m] - dist; pcrd->cyl_dev += dist; } /* Now we know the exact COM of the cylinder reference group, * we can determine the radial force factor (ffrad) that when * multiplied with the axial pull force will give the radial * force on the pulled (non-cylinder) group. */ for (m = 0; m < DIM; m++) { pcrd->ffrad[m] = (comm->dbuf_cyl[c*stride+6+m] + comm->dbuf_cyl[c*stride+3+m]*pcrd->cyl_dev)/wmass; } if (debug) { fprintf(debug, "Pull cylinder group %d:%8.3f%8.3f%8.3f m:%8.3f\n", c, pdyna->x[0], pdyna->x[1], pdyna->x[2], 1.0/pdyna->invtm); fprintf(debug, "ffrad %8.3f %8.3f %8.3f\n", pcrd->ffrad[XX], pcrd->ffrad[YY], pcrd->ffrad[ZZ]); } } } }
gmx_bool replica_exchange(FILE *fplog, const t_commrec *cr, struct gmx_repl_ex *re, t_state *state, gmx_enerdata_t *enerd, t_state *state_local, gmx_int64_t step, real time) { int i, j; int replica_id = 0; int exchange_partner; int maxswap = 0; /* Number of rounds of exchanges needed to deal with any multiple * exchanges. */ /* Where each replica ends up after the exchange attempt(s). */ /* The order in which multiple exchanges will occur. */ gmx_bool bThisReplicaExchanged = FALSE; if (MASTER(cr)) { replica_id = re->repl; test_for_replica_exchange(fplog, cr->ms, re, enerd, det(state_local->box), step, time); prepare_to_do_exchange(fplog, re->destinations, replica_id, re->nrepl, &maxswap, re->order, re->cyclic, re->incycle, &bThisReplicaExchanged); } /* Do intra-simulation broadcast so all processors belonging to * each simulation know whether they need to participate in * collecting the state. Otherwise, they might as well get on with * the next thing to do. */ if (DOMAINDECOMP(cr)) { #ifdef GMX_MPI MPI_Bcast(&bThisReplicaExchanged, sizeof(gmx_bool), MPI_BYTE, MASTERRANK(cr), cr->mpi_comm_mygroup); #endif } if (bThisReplicaExchanged) { /* Exchange the states */ /* Collect the global state on the master node */ if (DOMAINDECOMP(cr)) { dd_collect_state(cr->dd, state_local, state); } else { copy_state_nonatomdata(state_local, state); } if (MASTER(cr)) { /* There will be only one swap cycle with standard replica * exchange, but there may be multiple swap cycles if we * allow multiple swaps. */ for (j = 0; j < maxswap; j++) { exchange_partner = re->order[replica_id][j]; if (exchange_partner != replica_id) { /* Exchange the global states between the master nodes */ if (debug) { fprintf(debug, "Exchanging %d with %d\n", replica_id, exchange_partner); } exchange_state(cr->ms, exchange_partner, state); } } /* For temperature-type replica exchange, we need to scale * the velocities. */ if (re->type == ereTEMP || re->type == ereTL) { scale_velocities(state, sqrt(re->q[ereTEMP][replica_id]/re->q[ereTEMP][re->destinations[replica_id]])); } } /* With domain decomposition the global state is distributed later */ if (!DOMAINDECOMP(cr)) { /* Copy the global state to the local state data structure */ copy_state_nonatomdata(state, state_local); } } return bThisReplicaExchanged; }
void mdAlgorithmsSetupAtomData(t_commrec *cr, const t_inputrec *ir, const gmx_mtop_t *top_global, gmx_localtop_t *top, t_forcerec *fr, t_graph **graph, t_mdatoms *mdatoms, gmx_vsite_t *vsite, gmx_shellfc_t *shellfc) { bool usingDomDec = DOMAINDECOMP(cr); int numAtomIndex, numHomeAtoms; int *atomIndex; if (usingDomDec) { numAtomIndex = dd_natoms_mdatoms(cr->dd); atomIndex = cr->dd->gatindex; numHomeAtoms = cr->dd->nat_home; } else { numAtomIndex = -1; atomIndex = NULL; numHomeAtoms = top_global->natoms; } atoms2md(top_global, ir, numAtomIndex, atomIndex, numHomeAtoms, mdatoms); if (usingDomDec) { dd_sort_local_top(cr->dd, mdatoms, top); } else { /* Currently gmx_generate_local_top allocates and returns a pointer. * We should implement a more elegant solution. */ gmx_localtop_t *tmpTop; tmpTop = gmx_mtop_generate_local_top(top_global, ir->efep != efepNO); *top = *tmpTop; sfree(tmpTop); } if (vsite) { if (usingDomDec) { /* The vsites were already assigned by the domdec topology code. * We only need to do the thread division here. */ split_vsites_over_threads(top->idef.il, top->idef.iparams, mdatoms, FALSE, vsite); } else { set_vsite_top(vsite, top, mdatoms, cr); } } if (!usingDomDec && ir->ePBC != epbcNONE && !fr->bMolPBC) { GMX_ASSERT(graph != NULL, "We use a graph with PBC (no periodic mols) and without DD"); *graph = mk_graph(NULL, &(top->idef), 0, top_global->natoms, FALSE, FALSE); } else if (graph != NULL) { *graph = NULL; } /* Note that with DD only flexible constraints, not shells, are supported * and these don't require setup in make_local_shells(). */ if (!usingDomDec && shellfc) { make_local_shells(cr, mdatoms, shellfc); } setup_bonded_threading(fr, &top->idef); }
int relax_shell_flexcon(FILE *fplog, t_commrec *cr, gmx_bool bVerbose, gmx_int64_t mdstep, t_inputrec *inputrec, gmx_bool bDoNS, int force_flags, gmx_localtop_t *top, gmx_constr_t constr, gmx_enerdata_t *enerd, t_fcdata *fcd, t_state *state, rvec f[], tensor force_vir, t_mdatoms *md, t_nrnb *nrnb, gmx_wallcycle_t wcycle, t_graph *graph, gmx_groups_t *groups, struct gmx_shellfc *shfc, t_forcerec *fr, gmx_bool bBornRadii, double t, rvec mu_tot, gmx_bool *bConverged, gmx_vsite_t *vsite, FILE *fp_field) { int nshell; t_shell *shell; t_idef *idef; rvec *pos[2], *force[2], *acc_dir = NULL, *x_old = NULL; real Epot[2], df[2]; rvec dx; real sf_dir, invdt; real ftol, xiH, xiS, dum = 0; char sbuf[22]; gmx_bool bCont, bInit; int nat, dd_ac0, dd_ac1 = 0, i; int start = 0, homenr = md->homenr, end = start+homenr, cg0, cg1; int nflexcon, g, number_steps, d, Min = 0, count = 0; #define Try (1-Min) /* At start Try = 1 */ bCont = (mdstep == inputrec->init_step) && inputrec->bContinuation; bInit = (mdstep == inputrec->init_step) || shfc->bRequireInit; ftol = inputrec->em_tol; number_steps = inputrec->niter; nshell = shfc->nshell; shell = shfc->shell; nflexcon = shfc->nflexcon; idef = &top->idef; if (DOMAINDECOMP(cr)) { nat = dd_natoms_vsite(cr->dd); if (nflexcon > 0) { dd_get_constraint_range(cr->dd, &dd_ac0, &dd_ac1); nat = max(nat, dd_ac1); } } else { nat = state->natoms; } if (nat > shfc->x_nalloc) { /* Allocate local arrays */ shfc->x_nalloc = over_alloc_dd(nat); for (i = 0; (i < 2); i++) { srenew(shfc->x[i], shfc->x_nalloc); srenew(shfc->f[i], shfc->x_nalloc); } } for (i = 0; (i < 2); i++) { pos[i] = shfc->x[i]; force[i] = shfc->f[i]; } /* When we had particle decomposition, this code only worked with * PD when all particles involved with each shell were in the same * charge group. Not sure if this is still relevant. */ if (bDoNS && inputrec->ePBC != epbcNONE && !DOMAINDECOMP(cr)) { /* This is the only time where the coordinates are used * before do_force is called, which normally puts all * charge groups in the box. */ cg0 = 0; cg1 = top->cgs.nr; put_charge_groups_in_box(fplog, cg0, cg1, fr->ePBC, state->box, &(top->cgs), state->x, fr->cg_cm); if (graph) { mk_mshift(fplog, graph, fr->ePBC, state->box, state->x); } } /* After this all coordinate arrays will contain whole molecules */ if (graph) { shift_self(graph, state->box, state->x); } if (nflexcon) { if (nat > shfc->flex_nalloc) { shfc->flex_nalloc = over_alloc_dd(nat); srenew(shfc->acc_dir, shfc->flex_nalloc); srenew(shfc->x_old, shfc->flex_nalloc); } acc_dir = shfc->acc_dir; x_old = shfc->x_old; for (i = 0; i < homenr; i++) { for (d = 0; d < DIM; d++) { shfc->x_old[i][d] = state->x[start+i][d] - state->v[start+i][d]*inputrec->delta_t; } } } /* Do a prediction of the shell positions */ if (shfc->bPredict && !bCont) { predict_shells(fplog, state->x, state->v, inputrec->delta_t, nshell, shell, md->massT, NULL, bInit); } /* do_force expected the charge groups to be in the box */ if (graph) { unshift_self(graph, state->box, state->x); } /* Calculate the forces first time around */ if (gmx_debug_at) { pr_rvecs(debug, 0, "x b4 do_force", state->x + start, homenr); } do_force(fplog, cr, inputrec, mdstep, nrnb, wcycle, top, groups, state->box, state->x, &state->hist, force[Min], force_vir, md, enerd, fcd, state->lambda, graph, fr, vsite, mu_tot, t, fp_field, NULL, bBornRadii, (bDoNS ? GMX_FORCE_NS : 0) | force_flags); sf_dir = 0; if (nflexcon) { init_adir(fplog, shfc, constr, idef, inputrec, cr, dd_ac1, mdstep, md, start, end, shfc->x_old-start, state->x, state->x, force[Min], shfc->acc_dir-start, fr->bMolPBC, state->box, state->lambda, &dum, nrnb); for (i = start; i < end; i++) { sf_dir += md->massT[i]*norm2(shfc->acc_dir[i-start]); } } Epot[Min] = enerd->term[F_EPOT]; df[Min] = rms_force(cr, shfc->f[Min], nshell, shell, nflexcon, &sf_dir, &Epot[Min]); df[Try] = 0; if (debug) { fprintf(debug, "df = %g %g\n", df[Min], df[Try]); } if (gmx_debug_at) { pr_rvecs(debug, 0, "force0", force[Min], md->nr); } if (nshell+nflexcon > 0) { /* Copy x to pos[Min] & pos[Try]: during minimization only the * shell positions are updated, therefore the other particles must * be set here. */ memcpy(pos[Min], state->x, nat*sizeof(state->x[0])); memcpy(pos[Try], state->x, nat*sizeof(state->x[0])); } if (bVerbose && MASTER(cr)) { print_epot(stdout, mdstep, 0, Epot[Min], df[Min], nflexcon, sf_dir); } if (debug) { fprintf(debug, "%17s: %14.10e\n", interaction_function[F_EKIN].longname, enerd->term[F_EKIN]); fprintf(debug, "%17s: %14.10e\n", interaction_function[F_EPOT].longname, enerd->term[F_EPOT]); fprintf(debug, "%17s: %14.10e\n", interaction_function[F_ETOT].longname, enerd->term[F_ETOT]); fprintf(debug, "SHELLSTEP %s\n", gmx_step_str(mdstep, sbuf)); } /* First check whether we should do shells, or whether the force is * low enough even without minimization. */ *bConverged = (df[Min] < ftol); for (count = 1; (!(*bConverged) && (count < number_steps)); count++) { if (vsite) { construct_vsites(vsite, pos[Min], inputrec->delta_t, state->v, idef->iparams, idef->il, fr->ePBC, fr->bMolPBC, cr, state->box); } if (nflexcon) { init_adir(fplog, shfc, constr, idef, inputrec, cr, dd_ac1, mdstep, md, start, end, x_old-start, state->x, pos[Min], force[Min], acc_dir-start, fr->bMolPBC, state->box, state->lambda, &dum, nrnb); directional_sd(pos[Min], pos[Try], acc_dir-start, start, end, fr->fc_stepsize); } /* New positions, Steepest descent */ shell_pos_sd(pos[Min], pos[Try], force[Min], nshell, shell, count); /* do_force expected the charge groups to be in the box */ if (graph) { unshift_self(graph, state->box, pos[Try]); } if (gmx_debug_at) { pr_rvecs(debug, 0, "RELAX: pos[Min] ", pos[Min] + start, homenr); pr_rvecs(debug, 0, "RELAX: pos[Try] ", pos[Try] + start, homenr); } /* Try the new positions */ do_force(fplog, cr, inputrec, 1, nrnb, wcycle, top, groups, state->box, pos[Try], &state->hist, force[Try], force_vir, md, enerd, fcd, state->lambda, graph, fr, vsite, mu_tot, t, fp_field, NULL, bBornRadii, force_flags); if (gmx_debug_at) { pr_rvecs(debug, 0, "RELAX: force[Min]", force[Min] + start, homenr); pr_rvecs(debug, 0, "RELAX: force[Try]", force[Try] + start, homenr); } sf_dir = 0; if (nflexcon) { init_adir(fplog, shfc, constr, idef, inputrec, cr, dd_ac1, mdstep, md, start, end, x_old-start, state->x, pos[Try], force[Try], acc_dir-start, fr->bMolPBC, state->box, state->lambda, &dum, nrnb); for (i = start; i < end; i++) { sf_dir += md->massT[i]*norm2(acc_dir[i-start]); } } Epot[Try] = enerd->term[F_EPOT]; df[Try] = rms_force(cr, force[Try], nshell, shell, nflexcon, &sf_dir, &Epot[Try]); if (debug) { fprintf(debug, "df = %g %g\n", df[Min], df[Try]); } if (debug) { if (gmx_debug_at) { pr_rvecs(debug, 0, "F na do_force", force[Try] + start, homenr); } if (gmx_debug_at) { fprintf(debug, "SHELL ITER %d\n", count); dump_shells(debug, pos[Try], force[Try], ftol, nshell, shell); } } if (bVerbose && MASTER(cr)) { print_epot(stdout, mdstep, count, Epot[Try], df[Try], nflexcon, sf_dir); } *bConverged = (df[Try] < ftol); if ((df[Try] < df[Min])) { if (debug) { fprintf(debug, "Swapping Min and Try\n"); } if (nflexcon) { /* Correct the velocities for the flexible constraints */ invdt = 1/inputrec->delta_t; for (i = start; i < end; i++) { for (d = 0; d < DIM; d++) { state->v[i][d] += (pos[Try][i][d] - pos[Min][i][d])*invdt; } } } Min = Try; } else { decrease_step_size(nshell, shell); } } if (MASTER(cr) && !(*bConverged)) { /* Note that the energies and virial are incorrect when not converged */ if (fplog) { fprintf(fplog, "step %s: EM did not converge in %d iterations, RMS force %.3f\n", gmx_step_str(mdstep, sbuf), number_steps, df[Min]); } fprintf(stderr, "step %s: EM did not converge in %d iterations, RMS force %.3f\n", gmx_step_str(mdstep, sbuf), number_steps, df[Min]); } /* Copy back the coordinates and the forces */ memcpy(state->x, pos[Min], nat*sizeof(state->x[0])); memcpy(f, force[Min], nat*sizeof(f[0])); return count; }
void pme_loadbal_init(pme_load_balancing_t **pme_lb_p, t_commrec *cr, FILE *fp_log, const t_inputrec *ir, matrix box, const interaction_const_t *ic, struct gmx_pme_t *pmedata, gmx_bool bUseGPU, gmx_bool *bPrinting) { pme_load_balancing_t *pme_lb; real spm, sp; int d; snew(pme_lb, 1); pme_lb->bSepPMERanks = !(cr->duty & DUTY_PME); /* Initially we turn on balancing directly on based on PP/PME imbalance */ pme_lb->bTriggerOnDLB = FALSE; /* Any number of stages >= 2 is supported */ pme_lb->nstage = 2; pme_lb->cutoff_scheme = ir->cutoff_scheme; if (pme_lb->cutoff_scheme == ecutsVERLET) { pme_lb->rbuf_coulomb = ic->rlist - ic->rcoulomb; pme_lb->rbuf_vdw = pme_lb->rbuf_coulomb; } else { if (ic->rcoulomb > ic->rlist) { pme_lb->rbuf_coulomb = ic->rlistlong - ic->rcoulomb; } else { pme_lb->rbuf_coulomb = ic->rlist - ic->rcoulomb; } if (ic->rvdw > ic->rlist) { pme_lb->rbuf_vdw = ic->rlistlong - ic->rvdw; } else { pme_lb->rbuf_vdw = ic->rlist - ic->rvdw; } } copy_mat(box, pme_lb->box_start); if (ir->ePBC == epbcXY && ir->nwall == 2) { svmul(ir->wall_ewald_zfac, pme_lb->box_start[ZZ], pme_lb->box_start[ZZ]); } pme_lb->n = 1; snew(pme_lb->setup, pme_lb->n); pme_lb->rcut_vdw = ic->rvdw; pme_lb->rcut_coulomb_start = ir->rcoulomb; pme_lb->nstcalclr_start = ir->nstcalclr; pme_lb->cur = 0; pme_lb->setup[0].rcut_coulomb = ic->rcoulomb; pme_lb->setup[0].rlist = ic->rlist; pme_lb->setup[0].rlistlong = ic->rlistlong; pme_lb->setup[0].nstcalclr = ir->nstcalclr; pme_lb->setup[0].grid[XX] = ir->nkx; pme_lb->setup[0].grid[YY] = ir->nky; pme_lb->setup[0].grid[ZZ] = ir->nkz; pme_lb->setup[0].ewaldcoeff_q = ic->ewaldcoeff_q; pme_lb->setup[0].ewaldcoeff_lj = ic->ewaldcoeff_lj; pme_lb->setup[0].pmedata = pmedata; spm = 0; for (d = 0; d < DIM; d++) { sp = norm(pme_lb->box_start[d])/pme_lb->setup[0].grid[d]; if (sp > spm) { spm = sp; } } pme_lb->setup[0].spacing = spm; if (ir->fourier_spacing > 0) { pme_lb->cut_spacing = ir->rcoulomb/ir->fourier_spacing; } else { pme_lb->cut_spacing = ir->rcoulomb/pme_lb->setup[0].spacing; } pme_lb->stage = 0; pme_lb->fastest = 0; pme_lb->lower_limit = 0; pme_lb->start = 0; pme_lb->end = 0; pme_lb->elimited = epmelblimNO; pme_lb->cycles_n = 0; pme_lb->cycles_c = 0; /* Tune with GPUs and/or separate PME ranks. * When running only on a CPU without PME ranks, PME tuning will only help * with small numbers of atoms in the cut-off sphere. */ pme_lb->bActive = (wallcycle_have_counter() && (bUseGPU || pme_lb->bSepPMERanks)); /* With GPUs and no separate PME ranks we can't measure the PP/PME * imbalance, so we start balancing right away. * Otherwise we only start balancing after we observe imbalance. */ pme_lb->bBalance = (pme_lb->bActive && (bUseGPU && !pme_lb->bSepPMERanks)); pme_lb->step_rel_stop = PMETunePeriod*ir->nstlist; /* Delay DD load balancing when GPUs are used */ if (pme_lb->bActive && DOMAINDECOMP(cr) && cr->dd->nnodes > 1 && bUseGPU) { /* Lock DLB=auto to off (does nothing when DLB=yes/no. * With GPUs and separate PME nodes, we want to first * do PME tuning without DLB, since DLB might limit * the cut-off, which never improves performance. * We allow for DLB + PME tuning after a first round of tuning. */ dd_dlb_lock(cr->dd); if (dd_dlb_is_locked(cr->dd)) { md_print_warn(cr, fp_log, "NOTE: DLB will not turn on during the first phase of PME tuning\n"); } } *pme_lb_p = pme_lb; *bPrinting = pme_lb->bBalance; }