void pme_gpu_launch_spread(gmx_pme_t *pme, const rvec *x, gmx_wallcycle *wcycle) { GMX_ASSERT(pme_gpu_active(pme), "This should be a GPU run of PME but it is not enabled."); PmeGpu *pmeGpu = pme->gpu; // The only spot of PME GPU where LAUNCH_GPU counter increases call-count wallcycle_start(wcycle, ewcLAUNCH_GPU); // The only spot of PME GPU where ewcsLAUNCH_GPU_PME subcounter increases call-count wallcycle_sub_start(wcycle, ewcsLAUNCH_GPU_PME); pme_gpu_copy_input_coordinates(pmeGpu, x); wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_PME); wallcycle_stop(wcycle, ewcLAUNCH_GPU); const unsigned int gridIndex = 0; real *fftgrid = pme->fftgrid[gridIndex]; if (pmeGpu->settings.currentFlags & GMX_PME_SPREAD) { /* Spread the coefficients on a grid */ const bool computeSplines = true; const bool spreadCharges = true; wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU); wallcycle_sub_start_nocount(wcycle, ewcsLAUNCH_GPU_PME); pme_gpu_spread(pmeGpu, gridIndex, fftgrid, computeSplines, spreadCharges); wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_PME); wallcycle_stop(wcycle, ewcLAUNCH_GPU); } }
void posres_wrapper_lambda(struct gmx_wallcycle *wcycle, const t_lambda *fepvals, const t_idef *idef, const struct t_pbc *pbc, const rvec x[], gmx_enerdata_t *enerd, real *lambda, t_forcerec *fr) { real v; int i; if (0 == idef->il[F_POSRES].nr) { return; } wallcycle_sub_start_nocount(wcycle, ewcsRESTRAINTS); for (i = 0; i < enerd->n_lambda; i++) { real dvdl_dum = 0, lambda_dum; lambda_dum = (i == 0 ? lambda[efptRESTRAINT] : fepvals->all_lambda[efptRESTRAINT][i-1]); v = posres(idef->il[F_POSRES].nr, idef->il[F_POSRES].iatoms, idef->iparams_posres, x, NULL, NULL, fr->ePBC == epbcNONE ? NULL : pbc, lambda_dum, &dvdl_dum, fr->rc_scaling, fr->ePBC, fr->posres_com, fr->posres_comB); enerd->enerpart_lambda[i] += v; } wallcycle_sub_stop(wcycle, ewcsRESTRAINTS); }
/*! \brief * A convenience wrapper for launching either the GPU or CPU FFT. * * \param[in] pme The PME structure. * \param[in] gridIndex The grid index - should currently always be 0. * \param[in] dir The FFT direction enum. * \param[in] wcycle The wallclock counter. */ void inline parallel_3dfft_execute_gpu_wrapper(gmx_pme_t *pme, const int gridIndex, enum gmx_fft_direction dir, gmx_wallcycle_t wcycle) { GMX_ASSERT(gridIndex == 0, "Only single grid supported"); if (pme_gpu_performs_FFT(pme->gpu)) { wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU); wallcycle_sub_start_nocount(wcycle, ewcsLAUNCH_GPU_PME); pme_gpu_3dfft(pme->gpu, dir, gridIndex); wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_PME); wallcycle_stop(wcycle, ewcLAUNCH_GPU); } else { wallcycle_start(wcycle, ewcPME_FFT_MIXED_MODE); #pragma omp parallel for num_threads(pme->nthread) schedule(static) for (int thread = 0; thread < pme->nthread; thread++) { gmx_parallel_3dfft_execute(pme->pfft_setup[gridIndex], dir, thread, wcycle); } wallcycle_stop(wcycle, ewcPME_FFT_MIXED_MODE); } }
void pme_gpu_launch_complex_transforms(gmx_pme_t *pme, gmx_wallcycle *wcycle) { PmeGpu *pmeGpu = pme->gpu; const bool computeEnergyAndVirial = (pmeGpu->settings.currentFlags & GMX_PME_CALC_ENER_VIR) != 0; const bool performBackFFT = (pmeGpu->settings.currentFlags & (GMX_PME_CALC_F | GMX_PME_CALC_POT)) != 0; const unsigned int gridIndex = 0; t_complex *cfftgrid = pme->cfftgrid[gridIndex]; if (pmeGpu->settings.currentFlags & GMX_PME_SPREAD) { if (!pme_gpu_performs_FFT(pmeGpu)) { wallcycle_start(wcycle, ewcWAIT_GPU_PME_SPREAD); pme_gpu_sync_spread_grid(pme->gpu); wallcycle_stop(wcycle, ewcWAIT_GPU_PME_SPREAD); } } try { if (pmeGpu->settings.currentFlags & GMX_PME_SOLVE) { /* do R2C 3D-FFT */ parallel_3dfft_execute_gpu_wrapper(pme, gridIndex, GMX_FFT_REAL_TO_COMPLEX, wcycle); /* solve in k-space for our local cells */ if (pme_gpu_performs_solve(pmeGpu)) { const auto gridOrdering = pme_gpu_uses_dd(pmeGpu) ? GridOrdering::YZX : GridOrdering::XYZ; wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU); wallcycle_sub_start_nocount(wcycle, ewcsLAUNCH_GPU_PME); pme_gpu_solve(pmeGpu, cfftgrid, gridOrdering, computeEnergyAndVirial); wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_PME); wallcycle_stop(wcycle, ewcLAUNCH_GPU); } else { wallcycle_start(wcycle, ewcPME_SOLVE_MIXED_MODE); #pragma omp parallel for num_threads(pme->nthread) schedule(static) for (int thread = 0; thread < pme->nthread; thread++) { solve_pme_yzx(pme, cfftgrid, pme->boxVolume, computeEnergyAndVirial, pme->nthread, thread); } wallcycle_stop(wcycle, ewcPME_SOLVE_MIXED_MODE); } } if (performBackFFT) { parallel_3dfft_execute_gpu_wrapper(pme, gridIndex, GMX_FFT_COMPLEX_TO_REAL, wcycle); } } GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; }
void pme_gpu_reinit_computation(const gmx_pme_t *pme, gmx_wallcycle *wcycle) { GMX_ASSERT(pme_gpu_active(pme), "This should be a GPU run of PME but it is not enabled."); wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU); wallcycle_sub_start_nocount(wcycle, ewcsLAUNCH_GPU_PME); pme_gpu_clear_grids(pme->gpu); pme_gpu_clear_energy_virial(pme->gpu); wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_PME); wallcycle_stop(wcycle, ewcLAUNCH_GPU); }
void pme_gpu_launch_gather(const gmx_pme_t *pme, gmx_wallcycle gmx_unused *wcycle, PmeForceOutputHandling forceTreatment) { GMX_ASSERT(pme_gpu_active(pme), "This should be a GPU run of PME but it is not enabled."); if (!pme_gpu_performs_gather(pme->gpu)) { return; } wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU); wallcycle_sub_start_nocount(wcycle, ewcsLAUNCH_GPU_PME); const unsigned int gridIndex = 0; real *fftgrid = pme->fftgrid[gridIndex]; pme_gpu_gather(pme->gpu, forceTreatment, reinterpret_cast<float *>(fftgrid)); wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_PME); wallcycle_stop(wcycle, ewcLAUNCH_GPU); }
void pme_gpu_prepare_computation(gmx_pme_t *pme, bool needToUpdateBox, const matrix box, gmx_wallcycle *wcycle, int flags) { GMX_ASSERT(pme_gpu_active(pme), "This should be a GPU run of PME but it is not enabled."); GMX_ASSERT(pme->nnodes > 0, ""); GMX_ASSERT(pme->nnodes == 1 || pme->ndecompdim > 0, ""); PmeGpu *pmeGpu = pme->gpu; pmeGpu->settings.currentFlags = flags; // TODO these flags are only here to honor the CPU PME code, and probably should be removed bool shouldUpdateBox = false; for (int i = 0; i < DIM; ++i) { for (int j = 0; j <= i; ++j) { shouldUpdateBox |= (pmeGpu->common->previousBox[i][j] != box[i][j]); pmeGpu->common->previousBox[i][j] = box[i][j]; } } if (needToUpdateBox || shouldUpdateBox) // || is to make the first computation always update { wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU); wallcycle_sub_start_nocount(wcycle, ewcsLAUNCH_GPU_PME); pme_gpu_update_input_box(pmeGpu, box); wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_PME); wallcycle_stop(wcycle, ewcLAUNCH_GPU); if (!pme_gpu_performs_solve(pmeGpu)) { // TODO remove code duplication and add test coverage matrix scaledBox; pmeGpu->common->boxScaler->scaleBox(box, scaledBox); gmx::invertBoxMatrix(scaledBox, pme->recipbox); pme->boxVolume = scaledBox[XX][XX] * scaledBox[YY][YY] * scaledBox[ZZ][ZZ]; } } }
void do_force_lowlevel(FILE *fplog, gmx_large_int_t step, t_forcerec *fr, t_inputrec *ir, t_idef *idef, t_commrec *cr, t_nrnb *nrnb, gmx_wallcycle_t wcycle, t_mdatoms *md, t_grpopts *opts, rvec x[], history_t *hist, rvec f[], rvec f_longrange[], gmx_enerdata_t *enerd, t_fcdata *fcd, gmx_mtop_t *mtop, gmx_localtop_t *top, gmx_genborn_t *born, t_atomtypes *atype, gmx_bool bBornRadii, matrix box, t_lambda *fepvals, real *lambda, t_graph *graph, t_blocka *excl, rvec mu_tot[], int flags, float *cycles_pme) { int i, j, status; int donb_flags; gmx_bool bDoEpot, bSepDVDL, bSB; int pme_flags; matrix boxs; rvec box_size; real Vsr, Vlr, Vcorr = 0; t_pbc pbc; real dvdgb; char buf[22]; double clam_i, vlam_i; real dvdl_dum[efptNR], dvdl, dvdl_nb[efptNR], lam_i[efptNR]; real dvdlsum; #ifdef GMX_MPI double t0 = 0.0, t1, t2, t3; /* time measurement for coarse load balancing */ #endif #define PRINT_SEPDVDL(s, v, dvdlambda) if (bSepDVDL) {fprintf(fplog, sepdvdlformat, s, v, dvdlambda); } GMX_MPE_LOG(ev_force_start); set_pbc(&pbc, fr->ePBC, box); /* reset free energy components */ for (i = 0; i < efptNR; i++) { dvdl_nb[i] = 0; dvdl_dum[i] = 0; } /* Reset box */ for (i = 0; (i < DIM); i++) { box_size[i] = box[i][i]; } bSepDVDL = (fr->bSepDVDL && do_per_step(step, ir->nstlog)); debug_gmx(); /* do QMMM first if requested */ if (fr->bQMMM) { enerd->term[F_EQM] = calculate_QMMM(cr, x, f, fr, md); } if (bSepDVDL) { fprintf(fplog, "Step %s: non-bonded V and dVdl for node %d:\n", gmx_step_str(step, buf), cr->nodeid); } /* Call the short range functions all in one go. */ GMX_MPE_LOG(ev_do_fnbf_start); #ifdef GMX_MPI /*#define TAKETIME ((cr->npmenodes) && (fr->timesteps < 12))*/ #define TAKETIME FALSE if (TAKETIME) { MPI_Barrier(cr->mpi_comm_mygroup); t0 = MPI_Wtime(); } #endif if (ir->nwall) { /* foreign lambda component for walls */ dvdl = do_walls(ir, fr, box, md, x, f, lambda[efptVDW], enerd->grpp.ener[egLJSR], nrnb); PRINT_SEPDVDL("Walls", 0.0, dvdl); enerd->dvdl_lin[efptVDW] += dvdl; } /* If doing GB, reset dvda and calculate the Born radii */ if (ir->implicit_solvent) { wallcycle_sub_start(wcycle, ewcsNONBONDED); for (i = 0; i < born->nr; i++) { fr->dvda[i] = 0; } if (bBornRadii) { calc_gb_rad(cr, fr, ir, top, atype, x, &(fr->gblist), born, md, nrnb); } wallcycle_sub_stop(wcycle, ewcsNONBONDED); } where(); /* We only do non-bonded calculation with group scheme here, the verlet * calls are done from do_force_cutsVERLET(). */ if (fr->cutoff_scheme == ecutsGROUP && (flags & GMX_FORCE_NONBONDED)) { donb_flags = 0; /* Add short-range interactions */ donb_flags |= GMX_NONBONDED_DO_SR; if (flags & GMX_FORCE_FORCES) { donb_flags |= GMX_NONBONDED_DO_FORCE; } if (flags & GMX_FORCE_ENERGY) { donb_flags |= GMX_NONBONDED_DO_POTENTIAL; } if (flags & GMX_FORCE_DO_LR) { donb_flags |= GMX_NONBONDED_DO_LR; } wallcycle_sub_start(wcycle, ewcsNONBONDED); do_nonbonded(cr, fr, x, f, f_longrange, md, excl, &enerd->grpp, box_size, nrnb, lambda, dvdl_nb, -1, -1, donb_flags); /* If we do foreign lambda and we have soft-core interactions * we have to recalculate the (non-linear) energies contributions. */ if (fepvals->n_lambda > 0 && (flags & GMX_FORCE_DHDL) && fepvals->sc_alpha != 0) { for (i = 0; i < enerd->n_lambda; i++) { for (j = 0; j < efptNR; j++) { lam_i[j] = (i == 0 ? lambda[j] : fepvals->all_lambda[j][i-1]); } reset_foreign_enerdata(enerd); do_nonbonded(cr, fr, x, f, f_longrange, md, excl, &(enerd->foreign_grpp), box_size, nrnb, lam_i, dvdl_dum, -1, -1, (donb_flags & ~GMX_NONBONDED_DO_FORCE) | GMX_NONBONDED_DO_FOREIGNLAMBDA); sum_epot(&ir->opts, &(enerd->foreign_grpp), enerd->foreign_term); enerd->enerpart_lambda[i] += enerd->foreign_term[F_EPOT]; } } wallcycle_sub_stop(wcycle, ewcsNONBONDED); where(); } /* If we are doing GB, calculate bonded forces and apply corrections * to the solvation forces */ /* MRS: Eventually, many need to include free energy contribution here! */ if (ir->implicit_solvent) { wallcycle_sub_start(wcycle, ewcsBONDED); calc_gb_forces(cr, md, born, top, atype, x, f, fr, idef, ir->gb_algorithm, ir->sa_algorithm, nrnb, bBornRadii, &pbc, graph, enerd); wallcycle_sub_stop(wcycle, ewcsBONDED); } #ifdef GMX_MPI if (TAKETIME) { t1 = MPI_Wtime(); fr->t_fnbf += t1-t0; } #endif if (fepvals->sc_alpha != 0) { enerd->dvdl_nonlin[efptVDW] += dvdl_nb[efptVDW]; } else { enerd->dvdl_lin[efptVDW] += dvdl_nb[efptVDW]; } if (fepvals->sc_alpha != 0) /* even though coulomb part is linear, we already added it, beacuse we need to go through the vdw calculation anyway */ { enerd->dvdl_nonlin[efptCOUL] += dvdl_nb[efptCOUL]; } else { enerd->dvdl_lin[efptCOUL] += dvdl_nb[efptCOUL]; } Vsr = 0; if (bSepDVDL) { for (i = 0; i < enerd->grpp.nener; i++) { Vsr += (fr->bBHAM ? enerd->grpp.ener[egBHAMSR][i] : enerd->grpp.ener[egLJSR][i]) + enerd->grpp.ener[egCOULSR][i] + enerd->grpp.ener[egGB][i]; } dvdlsum = dvdl_nb[efptVDW] + dvdl_nb[efptCOUL]; PRINT_SEPDVDL("VdW and Coulomb SR particle-p.", Vsr, dvdlsum); } debug_gmx(); GMX_MPE_LOG(ev_do_fnbf_finish); if (debug) { pr_rvecs(debug, 0, "fshift after SR", fr->fshift, SHIFTS); } /* Shift the coordinates. Must be done before bonded forces and PPPM, * but is also necessary for SHAKE and update, therefore it can NOT * go when no bonded forces have to be evaluated. */ /* Here sometimes we would not need to shift with NBFonly, * but we do so anyhow for consistency of the returned coordinates. */ if (graph) { shift_self(graph, box, x); if (TRICLINIC(box)) { inc_nrnb(nrnb, eNR_SHIFTX, 2*graph->nnodes); } else { inc_nrnb(nrnb, eNR_SHIFTX, graph->nnodes); } } /* Check whether we need to do bondeds or correct for exclusions */ if (fr->bMolPBC && ((flags & GMX_FORCE_BONDED) || EEL_RF(fr->eeltype) || EEL_FULL(fr->eeltype))) { /* Since all atoms are in the rectangular or triclinic unit-cell, * only single box vector shifts (2 in x) are required. */ set_pbc_dd(&pbc, fr->ePBC, cr->dd, TRUE, box); } debug_gmx(); if (flags & GMX_FORCE_BONDED) { GMX_MPE_LOG(ev_calc_bonds_start); wallcycle_sub_start(wcycle, ewcsBONDED); calc_bonds(fplog, cr->ms, idef, x, hist, f, fr, &pbc, graph, enerd, nrnb, lambda, md, fcd, DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL, atype, born, flags, fr->bSepDVDL && do_per_step(step, ir->nstlog), step); /* Check if we have to determine energy differences * at foreign lambda's. */ if (fepvals->n_lambda > 0 && (flags & GMX_FORCE_DHDL) && idef->ilsort != ilsortNO_FE) { if (idef->ilsort != ilsortFE_SORTED) { gmx_incons("The bonded interactions are not sorted for free energy"); } for (i = 0; i < enerd->n_lambda; i++) { reset_foreign_enerdata(enerd); for (j = 0; j < efptNR; j++) { lam_i[j] = (i == 0 ? lambda[j] : fepvals->all_lambda[j][i-1]); } calc_bonds_lambda(fplog, idef, x, fr, &pbc, graph, &(enerd->foreign_grpp), enerd->foreign_term, nrnb, lam_i, md, fcd, DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL); sum_epot(&ir->opts, &(enerd->foreign_grpp), enerd->foreign_term); enerd->enerpart_lambda[i] += enerd->foreign_term[F_EPOT]; } } debug_gmx(); GMX_MPE_LOG(ev_calc_bonds_finish); wallcycle_sub_stop(wcycle, ewcsBONDED); } where(); *cycles_pme = 0; if (EEL_FULL(fr->eeltype)) { bSB = (ir->nwall == 2); if (bSB) { copy_mat(box, boxs); svmul(ir->wall_ewald_zfac, boxs[ZZ], boxs[ZZ]); box_size[ZZ] *= ir->wall_ewald_zfac; } clear_mat(fr->vir_el_recip); if (fr->bEwald) { Vcorr = 0; dvdl = 0; /* With the Verlet scheme exclusion forces are calculated * in the non-bonded kernel. */ /* The TPI molecule does not have exclusions with the rest * of the system and no intra-molecular PME grid contributions * will be calculated in gmx_pme_calc_energy. */ if ((ir->cutoff_scheme == ecutsGROUP && fr->n_tpi == 0) || ir->ewald_geometry != eewg3D || ir->epsilon_surface != 0) { int nthreads, t; wallcycle_sub_start(wcycle, ewcsEWALD_CORRECTION); if (fr->n_tpi > 0) { gmx_fatal(FARGS, "TPI with PME currently only works in a 3D geometry with tin-foil boundary conditions"); } nthreads = gmx_omp_nthreads_get(emntBonded); #pragma omp parallel for num_threads(nthreads) schedule(static) for (t = 0; t < nthreads; t++) { int s, e, i; rvec *fnv; tensor *vir; real *Vcorrt, *dvdlt; if (t == 0) { fnv = fr->f_novirsum; vir = &fr->vir_el_recip; Vcorrt = &Vcorr; dvdlt = &dvdl; } else { fnv = fr->f_t[t].f; vir = &fr->f_t[t].vir; Vcorrt = &fr->f_t[t].Vcorr; dvdlt = &fr->f_t[t].dvdl[efptCOUL]; for (i = 0; i < fr->natoms_force; i++) { clear_rvec(fnv[i]); } clear_mat(*vir); } *dvdlt = 0; *Vcorrt = ewald_LRcorrection(fplog, fr->excl_load[t], fr->excl_load[t+1], cr, t, fr, md->chargeA, md->nChargePerturbed ? md->chargeB : NULL, ir->cutoff_scheme != ecutsVERLET, excl, x, bSB ? boxs : box, mu_tot, ir->ewald_geometry, ir->epsilon_surface, fnv, *vir, lambda[efptCOUL], dvdlt); } if (nthreads > 1) { reduce_thread_forces(fr->natoms_force, fr->f_novirsum, fr->vir_el_recip, &Vcorr, efptCOUL, &dvdl, nthreads, fr->f_t); } wallcycle_sub_stop(wcycle, ewcsEWALD_CORRECTION); } if (fr->n_tpi == 0) { Vcorr += ewald_charge_correction(cr, fr, lambda[efptCOUL], box, &dvdl, fr->vir_el_recip); } PRINT_SEPDVDL("Ewald excl./charge/dip. corr.", Vcorr, dvdl); enerd->dvdl_lin[efptCOUL] += dvdl; } status = 0; Vlr = 0; dvdl = 0; switch (fr->eeltype) { case eelPME: case eelPMESWITCH: case eelPMEUSER: case eelPMEUSERSWITCH: case eelP3M_AD: if (cr->duty & DUTY_PME) { assert(fr->n_tpi >= 0); if (fr->n_tpi == 0 || (flags & GMX_FORCE_STATECHANGED)) { pme_flags = GMX_PME_SPREAD_Q | GMX_PME_SOLVE; if (flags & GMX_FORCE_FORCES) { pme_flags |= GMX_PME_CALC_F; } if (flags & (GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY)) { pme_flags |= GMX_PME_CALC_ENER_VIR; } if (fr->n_tpi > 0) { /* We don't calculate f, but we do want the potential */ pme_flags |= GMX_PME_CALC_POT; } wallcycle_start(wcycle, ewcPMEMESH); status = gmx_pme_do(fr->pmedata, md->start, md->homenr - fr->n_tpi, x, fr->f_novirsum, md->chargeA, md->chargeB, bSB ? boxs : box, cr, DOMAINDECOMP(cr) ? dd_pme_maxshift_x(cr->dd) : 0, DOMAINDECOMP(cr) ? dd_pme_maxshift_y(cr->dd) : 0, nrnb, wcycle, fr->vir_el_recip, fr->ewaldcoeff, &Vlr, lambda[efptCOUL], &dvdl, pme_flags); *cycles_pme = wallcycle_stop(wcycle, ewcPMEMESH); /* We should try to do as little computation after * this as possible, because parallel PME synchronizes * the nodes, so we want all load imbalance of the rest * of the force calculation to be before the PME call. * DD load balancing is done on the whole time of * the force call (without PME). */ } if (fr->n_tpi > 0) { /* Determine the PME grid energy of the test molecule * with the PME grid potential of the other charges. */ gmx_pme_calc_energy(fr->pmedata, fr->n_tpi, x + md->homenr - fr->n_tpi, md->chargeA + md->homenr - fr->n_tpi, &Vlr); } PRINT_SEPDVDL("PME mesh", Vlr, dvdl); } break; case eelEWALD: Vlr = do_ewald(fplog, FALSE, ir, x, fr->f_novirsum, md->chargeA, md->chargeB, box_size, cr, md->homenr, fr->vir_el_recip, fr->ewaldcoeff, lambda[efptCOUL], &dvdl, fr->ewald_table); PRINT_SEPDVDL("Ewald long-range", Vlr, dvdl); break; default: gmx_fatal(FARGS, "No such electrostatics method implemented %s", eel_names[fr->eeltype]); } if (status != 0) { gmx_fatal(FARGS, "Error %d in long range electrostatics routine %s", status, EELTYPE(fr->eeltype)); } /* Note that with separate PME nodes we get the real energies later */ enerd->dvdl_lin[efptCOUL] += dvdl; enerd->term[F_COUL_RECIP] = Vlr + Vcorr; if (debug) { fprintf(debug, "Vlr = %g, Vcorr = %g, Vlr_corr = %g\n", Vlr, Vcorr, enerd->term[F_COUL_RECIP]); pr_rvecs(debug, 0, "vir_el_recip after corr", fr->vir_el_recip, DIM); pr_rvecs(debug, 0, "fshift after LR Corrections", fr->fshift, SHIFTS); } } else { if (EEL_RF(fr->eeltype)) { /* With the Verlet scheme exclusion forces are calculated * in the non-bonded kernel. */ if (ir->cutoff_scheme != ecutsVERLET && fr->eeltype != eelRF_NEC) { dvdl = 0; enerd->term[F_RF_EXCL] = RF_excl_correction(fplog, fr, graph, md, excl, x, f, fr->fshift, &pbc, lambda[efptCOUL], &dvdl); } enerd->dvdl_lin[efptCOUL] += dvdl; PRINT_SEPDVDL("RF exclusion correction", enerd->term[F_RF_EXCL], dvdl); } } where(); debug_gmx(); if (debug) { print_nrnb(debug, nrnb); } debug_gmx(); #ifdef GMX_MPI if (TAKETIME) { t2 = MPI_Wtime(); MPI_Barrier(cr->mpi_comm_mygroup); t3 = MPI_Wtime(); fr->t_wait += t3-t2; if (fr->timesteps == 11) { fprintf(stderr, "* PP load balancing info: node %d, step %s, rel wait time=%3.0f%% , load string value: %7.2f\n", cr->nodeid, gmx_step_str(fr->timesteps, buf), 100*fr->t_wait/(fr->t_wait+fr->t_fnbf), (fr->t_fnbf+fr->t_wait)/fr->t_fnbf); } fr->timesteps++; } #endif if (debug) { pr_rvecs(debug, 0, "fshift after bondeds", fr->fshift, SHIFTS); } GMX_MPE_LOG(ev_force_finish); }
void do_force_lowlevel(t_forcerec *fr, t_inputrec *ir, t_idef *idef, t_commrec *cr, t_nrnb *nrnb, gmx_wallcycle_t wcycle, t_mdatoms *md, rvec x[], history_t *hist, rvec f[], rvec f_longrange[], gmx_enerdata_t *enerd, t_fcdata *fcd, gmx_localtop_t *top, gmx_genborn_t *born, gmx_bool bBornRadii, matrix box, t_lambda *fepvals, real *lambda, t_graph *graph, t_blocka *excl, rvec mu_tot[], int flags, float *cycles_pme) { int i, j; int donb_flags; gmx_bool bSB; int pme_flags; matrix boxs; rvec box_size; t_pbc pbc; real dvdl_dum[efptNR], dvdl_nb[efptNR]; #ifdef GMX_MPI double t0 = 0.0, t1, t2, t3; /* time measurement for coarse load balancing */ #endif set_pbc(&pbc, fr->ePBC, box); /* reset free energy components */ for (i = 0; i < efptNR; i++) { dvdl_nb[i] = 0; dvdl_dum[i] = 0; } /* Reset box */ for (i = 0; (i < DIM); i++) { box_size[i] = box[i][i]; } debug_gmx(); /* do QMMM first if requested */ if (fr->bQMMM) { enerd->term[F_EQM] = calculate_QMMM(cr, x, f, fr); } /* Call the short range functions all in one go. */ #ifdef GMX_MPI /*#define TAKETIME ((cr->npmenodes) && (fr->timesteps < 12))*/ #define TAKETIME FALSE if (TAKETIME) { MPI_Barrier(cr->mpi_comm_mygroup); t0 = MPI_Wtime(); } #endif if (ir->nwall) { /* foreign lambda component for walls */ real dvdl_walls = do_walls(ir, fr, box, md, x, f, lambda[efptVDW], enerd->grpp.ener[egLJSR], nrnb); enerd->dvdl_lin[efptVDW] += dvdl_walls; } /* If doing GB, reset dvda and calculate the Born radii */ if (ir->implicit_solvent) { wallcycle_sub_start(wcycle, ewcsNONBONDED); for (i = 0; i < born->nr; i++) { fr->dvda[i] = 0; } if (bBornRadii) { calc_gb_rad(cr, fr, ir, top, x, &(fr->gblist), born, md, nrnb); } wallcycle_sub_stop(wcycle, ewcsNONBONDED); } where(); /* We only do non-bonded calculation with group scheme here, the verlet * calls are done from do_force_cutsVERLET(). */ if (fr->cutoff_scheme == ecutsGROUP && (flags & GMX_FORCE_NONBONDED)) { donb_flags = 0; /* Add short-range interactions */ donb_flags |= GMX_NONBONDED_DO_SR; /* Currently all group scheme kernels always calculate (shift-)forces */ if (flags & GMX_FORCE_FORCES) { donb_flags |= GMX_NONBONDED_DO_FORCE; } if (flags & GMX_FORCE_VIRIAL) { donb_flags |= GMX_NONBONDED_DO_SHIFTFORCE; } if (flags & GMX_FORCE_ENERGY) { donb_flags |= GMX_NONBONDED_DO_POTENTIAL; } if (flags & GMX_FORCE_DO_LR) { donb_flags |= GMX_NONBONDED_DO_LR; } wallcycle_sub_start(wcycle, ewcsNONBONDED); do_nonbonded(fr, x, f, f_longrange, md, excl, &enerd->grpp, nrnb, lambda, dvdl_nb, -1, -1, donb_flags); /* If we do foreign lambda and we have soft-core interactions * we have to recalculate the (non-linear) energies contributions. */ if (fepvals->n_lambda > 0 && (flags & GMX_FORCE_DHDL) && fepvals->sc_alpha != 0) { for (i = 0; i < enerd->n_lambda; i++) { real lam_i[efptNR]; for (j = 0; j < efptNR; j++) { lam_i[j] = (i == 0 ? lambda[j] : fepvals->all_lambda[j][i-1]); } reset_foreign_enerdata(enerd); do_nonbonded(fr, x, f, f_longrange, md, excl, &(enerd->foreign_grpp), nrnb, lam_i, dvdl_dum, -1, -1, (donb_flags & ~GMX_NONBONDED_DO_FORCE) | GMX_NONBONDED_DO_FOREIGNLAMBDA); sum_epot(&(enerd->foreign_grpp), enerd->foreign_term); enerd->enerpart_lambda[i] += enerd->foreign_term[F_EPOT]; } } wallcycle_sub_stop(wcycle, ewcsNONBONDED); where(); } /* If we are doing GB, calculate bonded forces and apply corrections * to the solvation forces */ /* MRS: Eventually, many need to include free energy contribution here! */ if (ir->implicit_solvent) { wallcycle_sub_start(wcycle, ewcsLISTED); calc_gb_forces(cr, md, born, top, x, f, fr, idef, ir->gb_algorithm, ir->sa_algorithm, nrnb, &pbc, graph, enerd); wallcycle_sub_stop(wcycle, ewcsLISTED); } #ifdef GMX_MPI if (TAKETIME) { t1 = MPI_Wtime(); fr->t_fnbf += t1-t0; } #endif if (fepvals->sc_alpha != 0) { enerd->dvdl_nonlin[efptVDW] += dvdl_nb[efptVDW]; } else { enerd->dvdl_lin[efptVDW] += dvdl_nb[efptVDW]; } if (fepvals->sc_alpha != 0) /* even though coulomb part is linear, we already added it, beacuse we need to go through the vdw calculation anyway */ { enerd->dvdl_nonlin[efptCOUL] += dvdl_nb[efptCOUL]; } else { enerd->dvdl_lin[efptCOUL] += dvdl_nb[efptCOUL]; } debug_gmx(); if (debug) { pr_rvecs(debug, 0, "fshift after SR", fr->fshift, SHIFTS); } /* Shift the coordinates. Must be done before listed forces and PPPM, * but is also necessary for SHAKE and update, therefore it can NOT * go when no listed forces have to be evaluated. * * The shifting and PBC code is deliberately not timed, since with * the Verlet scheme it only takes non-zero time with triclinic * boxes, and even then the time is around a factor of 100 less * than the next smallest counter. */ /* Here sometimes we would not need to shift with NBFonly, * but we do so anyhow for consistency of the returned coordinates. */ if (graph) { shift_self(graph, box, x); if (TRICLINIC(box)) { inc_nrnb(nrnb, eNR_SHIFTX, 2*graph->nnodes); } else { inc_nrnb(nrnb, eNR_SHIFTX, graph->nnodes); } } /* Check whether we need to do listed interactions or correct for exclusions */ if (fr->bMolPBC && ((flags & GMX_FORCE_LISTED) || EEL_RF(fr->eeltype) || EEL_FULL(fr->eeltype) || EVDW_PME(fr->vdwtype))) { /* TODO There are no electrostatics methods that require this transformation, when using the Verlet scheme, so update the above conditional. */ /* Since all atoms are in the rectangular or triclinic unit-cell, * only single box vector shifts (2 in x) are required. */ set_pbc_dd(&pbc, fr->ePBC, cr->dd, TRUE, box); } debug_gmx(); do_force_listed(wcycle, box, ir->fepvals, cr->ms, idef, (const rvec *) x, hist, f, fr, &pbc, graph, enerd, nrnb, lambda, md, fcd, DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL, flags); where(); *cycles_pme = 0; clear_mat(fr->vir_el_recip); clear_mat(fr->vir_lj_recip); /* Do long-range electrostatics and/or LJ-PME, including related short-range * corrections. */ if (EEL_FULL(fr->eeltype) || EVDW_PME(fr->vdwtype)) { int status = 0; real Vlr_q = 0, Vlr_lj = 0, Vcorr_q = 0, Vcorr_lj = 0; real dvdl_long_range_q = 0, dvdl_long_range_lj = 0; bSB = (ir->nwall == 2); if (bSB) { copy_mat(box, boxs); svmul(ir->wall_ewald_zfac, boxs[ZZ], boxs[ZZ]); box_size[ZZ] *= ir->wall_ewald_zfac; } if (EEL_PME_EWALD(fr->eeltype) || EVDW_PME(fr->vdwtype)) { real dvdl_long_range_correction_q = 0; real dvdl_long_range_correction_lj = 0; /* With the Verlet scheme exclusion forces are calculated * in the non-bonded kernel. */ /* The TPI molecule does not have exclusions with the rest * of the system and no intra-molecular PME grid * contributions will be calculated in * gmx_pme_calc_energy. */ if ((ir->cutoff_scheme == ecutsGROUP && fr->n_tpi == 0) || ir->ewald_geometry != eewg3D || ir->epsilon_surface != 0) { int nthreads, t; wallcycle_sub_start(wcycle, ewcsEWALD_CORRECTION); if (fr->n_tpi > 0) { gmx_fatal(FARGS, "TPI with PME currently only works in a 3D geometry with tin-foil boundary conditions"); } nthreads = fr->nthread_ewc; #pragma omp parallel for num_threads(nthreads) schedule(static) for (t = 0; t < nthreads; t++) { try { tensor *vir_q, *vir_lj; real *Vcorrt_q, *Vcorrt_lj, *dvdlt_q, *dvdlt_lj; if (t == 0) { vir_q = &fr->vir_el_recip; vir_lj = &fr->vir_lj_recip; Vcorrt_q = &Vcorr_q; Vcorrt_lj = &Vcorr_lj; dvdlt_q = &dvdl_long_range_correction_q; dvdlt_lj = &dvdl_long_range_correction_lj; } else { vir_q = &fr->ewc_t[t].vir_q; vir_lj = &fr->ewc_t[t].vir_lj; Vcorrt_q = &fr->ewc_t[t].Vcorr_q; Vcorrt_lj = &fr->ewc_t[t].Vcorr_lj; dvdlt_q = &fr->ewc_t[t].dvdl[efptCOUL]; dvdlt_lj = &fr->ewc_t[t].dvdl[efptVDW]; clear_mat(*vir_q); clear_mat(*vir_lj); } *dvdlt_q = 0; *dvdlt_lj = 0; /* Threading is only supported with the Verlet cut-off * scheme and then only single particle forces (no * exclusion forces) are calculated, so we can store * the forces in the normal, single fr->f_novirsum array. */ ewald_LRcorrection(fr->excl_load[t], fr->excl_load[t+1], cr, t, fr, md->chargeA, md->chargeB, md->sqrt_c6A, md->sqrt_c6B, md->sigmaA, md->sigmaB, md->sigma3A, md->sigma3B, md->nChargePerturbed || md->nTypePerturbed, ir->cutoff_scheme != ecutsVERLET, excl, x, bSB ? boxs : box, mu_tot, ir->ewald_geometry, ir->epsilon_surface, fr->f_novirsum, *vir_q, *vir_lj, Vcorrt_q, Vcorrt_lj, lambda[efptCOUL], lambda[efptVDW], dvdlt_q, dvdlt_lj); } GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; } if (nthreads > 1) { reduce_thread_energies(fr->vir_el_recip, fr->vir_lj_recip, &Vcorr_q, &Vcorr_lj, &dvdl_long_range_correction_q, &dvdl_long_range_correction_lj, nthreads, fr->ewc_t); } wallcycle_sub_stop(wcycle, ewcsEWALD_CORRECTION); }
void do_force_listed(gmx_wallcycle *wcycle, matrix box, const t_lambda *fepvals, const gmx_multisim_t *ms, const t_idef *idef, const rvec x[], history_t *hist, rvec f[], t_forcerec *fr, const struct t_pbc *pbc, const struct t_graph *graph, gmx_enerdata_t *enerd, t_nrnb *nrnb, real *lambda, const t_mdatoms *md, t_fcdata *fcd, int *global_atom_index, int flags) { t_pbc pbc_full; /* Full PBC is needed for position restraints */ if (!(flags & GMX_FORCE_LISTED)) { return; } if ((idef->il[F_POSRES].nr > 0) || (idef->il[F_FBPOSRES].nr > 0)) { /* Not enough flops to bother counting */ set_pbc(&pbc_full, fr->ePBC, box); } calc_listed(ms, wcycle, idef, x, hist, f, fr, pbc, &pbc_full, graph, enerd, nrnb, lambda, md, fcd, global_atom_index, flags); /* Check if we have to determine energy differences * at foreign lambda's. */ if (fepvals->n_lambda > 0 && (flags & GMX_FORCE_DHDL)) { posres_wrapper_lambda(wcycle, fepvals, idef, &pbc_full, x, enerd, lambda, fr); if (idef->ilsort != ilsortNO_FE) { wallcycle_sub_start(wcycle, ewcsLISTED_FEP); if (idef->ilsort != ilsortFE_SORTED) { gmx_incons("The bonded interactions are not sorted for free energy"); } for (int i = 0; i < enerd->n_lambda; i++) { real lam_i[efptNR]; reset_foreign_enerdata(enerd); for (int j = 0; j < efptNR; j++) { lam_i[j] = (i == 0 ? lambda[j] : fepvals->all_lambda[j][i-1]); } calc_listed_lambda(idef, x, fr, pbc, graph, &(enerd->foreign_grpp), enerd->foreign_term, nrnb, lam_i, md, fcd, global_atom_index); sum_epot(&(enerd->foreign_grpp), enerd->foreign_term); enerd->enerpart_lambda[i] += enerd->foreign_term[F_EPOT]; } wallcycle_sub_stop(wcycle, ewcsLISTED_FEP); } } debug_gmx(); }
void calc_listed(const gmx_multisim_t *ms, gmx_wallcycle *wcycle, const t_idef *idef, const rvec x[], history_t *hist, rvec f[], t_forcerec *fr, const struct t_pbc *pbc, const struct t_pbc *pbc_full, const struct t_graph *g, gmx_enerdata_t *enerd, t_nrnb *nrnb, real *lambda, const t_mdatoms *md, t_fcdata *fcd, int *global_atom_index, int force_flags) { gmx_bool bCalcEnerVir; int i; real dvdl[efptNR]; /* The dummy array is to have a place to store the dhdl at other values of lambda, which will be thrown away in the end*/ const t_pbc *pbc_null; int thread; assert(fr->nthreads == idef->nthreads); bCalcEnerVir = (force_flags & (GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY)); for (i = 0; i < efptNR; i++) { dvdl[i] = 0.0; } if (fr->bMolPBC) { pbc_null = pbc; } else { pbc_null = NULL; } #ifdef DEBUG if (g && debug) { p_graph(debug, "Bondage is fun", g); } #endif if ((idef->il[F_POSRES].nr > 0) || (idef->il[F_FBPOSRES].nr > 0) || (idef->il[F_ORIRES].nr > 0) || (idef->il[F_DISRES].nr > 0)) { /* TODO Use of restraints triggers further function calls inside the loop over calc_one_bond(), but those are too awkward to account to this subtimer properly in the present code. We don't test / care much about performance with restraints, anyway. */ wallcycle_sub_start(wcycle, ewcsRESTRAINTS); if (idef->il[F_POSRES].nr > 0) { posres_wrapper(nrnb, idef, pbc_full, x, enerd, lambda, fr); } if (idef->il[F_FBPOSRES].nr > 0) { fbposres_wrapper(nrnb, idef, pbc_full, x, enerd, fr); } /* Do pre force calculation stuff which might require communication */ if (idef->il[F_ORIRES].nr > 0) { enerd->term[F_ORIRESDEV] = calc_orires_dev(ms, idef->il[F_ORIRES].nr, idef->il[F_ORIRES].iatoms, idef->iparams, md, x, pbc_null, fcd, hist); } if (idef->il[F_DISRES].nr) { calc_disres_R_6(idef->il[F_DISRES].nr, idef->il[F_DISRES].iatoms, idef->iparams, x, pbc_null, fcd, hist); #ifdef GMX_MPI if (fcd->disres.nsystems > 1) { gmx_sum_sim(2*fcd->disres.nres, fcd->disres.Rt_6, ms); } #endif } wallcycle_sub_stop(wcycle, ewcsRESTRAINTS); } wallcycle_sub_start(wcycle, ewcsLISTED); #pragma omp parallel for num_threads(fr->nthreads) schedule(static) for (thread = 0; thread < fr->nthreads; thread++) { int ftype; real *epot, v; /* thread stuff */ rvec *ft, *fshift; real *dvdlt; gmx_grppairener_t *grpp; if (thread == 0) { ft = f; fshift = fr->fshift; epot = enerd->term; grpp = &enerd->grpp; dvdlt = dvdl; } else { zero_thread_forces(&fr->f_t[thread], fr->natoms_force, fr->red_nblock, 1<<fr->red_ashift); ft = fr->f_t[thread].f; fshift = fr->f_t[thread].fshift; epot = fr->f_t[thread].ener; grpp = &fr->f_t[thread].grpp; dvdlt = fr->f_t[thread].dvdl; } /* Loop over all bonded force types to calculate the bonded forces */ for (ftype = 0; (ftype < F_NRE); ftype++) { if (idef->il[ftype].nr > 0 && ftype_is_bonded_potential(ftype)) { v = calc_one_bond(thread, ftype, idef, x, ft, fshift, fr, pbc_null, g, grpp, nrnb, lambda, dvdlt, md, fcd, bCalcEnerVir, global_atom_index); epot[ftype] += v; } } } wallcycle_sub_stop(wcycle, ewcsLISTED); if (fr->nthreads > 1) { wallcycle_sub_start(wcycle, ewcsLISTED_BUF_OPS); reduce_thread_forces(fr->natoms_force, f, fr->fshift, enerd->term, &enerd->grpp, dvdl, fr->nthreads, fr->f_t, fr->red_nblock, 1<<fr->red_ashift, bCalcEnerVir, force_flags & GMX_FORCE_DHDL); wallcycle_sub_stop(wcycle, ewcsLISTED_BUF_OPS); } /* Remaining code does not have enough flops to bother counting */ if (force_flags & GMX_FORCE_DHDL) { for (i = 0; i < efptNR; i++) { enerd->dvdl_nonlin[i] += dvdl[i]; } } /* Copy the sum of violations for the distance restraints from fcd */ if (fcd) { enerd->term[F_DISRESVIOL] = fcd->disres.sumviol; } }
void do_force_lowlevel(t_forcerec *fr, t_inputrec *ir, t_idef *idef, t_commrec *cr, t_nrnb *nrnb, gmx_wallcycle_t wcycle, t_mdatoms *md, rvec x[], history_t *hist, rvec f[], rvec f_longrange[], gmx_enerdata_t *enerd, t_fcdata *fcd, gmx_localtop_t *top, gmx_genborn_t *born, gmx_bool bBornRadii, matrix box, t_lambda *fepvals, real *lambda, t_graph *graph, t_blocka *excl, rvec mu_tot[], int flags, float *cycles_pme) { int i, j; int donb_flags; gmx_bool bSB; int pme_flags; matrix boxs; rvec box_size; t_pbc pbc; real dvdl_dum[efptNR], dvdl_nb[efptNR]; #ifdef GMX_MPI double t0 = 0.0, t1, t2, t3; /* time measurement for coarse load balancing */ #endif set_pbc(&pbc, fr->ePBC, box); /* reset free energy components */ for (i = 0; i < efptNR; i++) { dvdl_nb[i] = 0; dvdl_dum[i] = 0; } /* Reset box */ for (i = 0; (i < DIM); i++) { box_size[i] = box[i][i]; } debug_gmx(); /* do QMMM first if requested */ if (fr->bQMMM) { enerd->term[F_EQM] = calculate_QMMM(cr, x, f, fr); } /* Call the short range functions all in one go. */ #ifdef GMX_MPI /*#define TAKETIME ((cr->npmenodes) && (fr->timesteps < 12))*/ #define TAKETIME FALSE if (TAKETIME) { MPI_Barrier(cr->mpi_comm_mygroup); t0 = MPI_Wtime(); } #endif if (ir->nwall) { /* foreign lambda component for walls */ real dvdl_walls = do_walls(ir, fr, box, md, x, f, lambda[efptVDW], enerd->grpp.ener[egLJSR], nrnb); enerd->dvdl_lin[efptVDW] += dvdl_walls; } /* If doing GB, reset dvda and calculate the Born radii */ if (ir->implicit_solvent) { wallcycle_sub_start(wcycle, ewcsNONBONDED); for (i = 0; i < born->nr; i++) { fr->dvda[i] = 0; } if (bBornRadii) { calc_gb_rad(cr, fr, ir, top, x, &(fr->gblist), born, md, nrnb); } wallcycle_sub_stop(wcycle, ewcsNONBONDED); } where(); /* We only do non-bonded calculation with group scheme here, the verlet * calls are done from do_force_cutsVERLET(). */ if (fr->cutoff_scheme == ecutsGROUP && (flags & GMX_FORCE_NONBONDED)) { donb_flags = 0; /* Add short-range interactions */ donb_flags |= GMX_NONBONDED_DO_SR; /* Currently all group scheme kernels always calculate (shift-)forces */ if (flags & GMX_FORCE_FORCES) { donb_flags |= GMX_NONBONDED_DO_FORCE; } if (flags & GMX_FORCE_VIRIAL) { donb_flags |= GMX_NONBONDED_DO_SHIFTFORCE; } if (flags & GMX_FORCE_ENERGY) { donb_flags |= GMX_NONBONDED_DO_POTENTIAL; } if (flags & GMX_FORCE_DO_LR) { donb_flags |= GMX_NONBONDED_DO_LR; } wallcycle_sub_start(wcycle, ewcsNONBONDED); do_nonbonded(fr, x, f, f_longrange, md, excl, &enerd->grpp, nrnb, lambda, dvdl_nb, -1, -1, donb_flags); /* If we do foreign lambda and we have soft-core interactions * we have to recalculate the (non-linear) energies contributions. */ if (fepvals->n_lambda > 0 && (flags & GMX_FORCE_DHDL) && fepvals->sc_alpha != 0) { for (i = 0; i < enerd->n_lambda; i++) { real lam_i[efptNR]; for (j = 0; j < efptNR; j++) { lam_i[j] = (i == 0 ? lambda[j] : fepvals->all_lambda[j][i-1]); } reset_foreign_enerdata(enerd); do_nonbonded(fr, x, f, f_longrange, md, excl, &(enerd->foreign_grpp), nrnb, lam_i, dvdl_dum, -1, -1, (donb_flags & ~GMX_NONBONDED_DO_FORCE) | GMX_NONBONDED_DO_FOREIGNLAMBDA); sum_epot(&(enerd->foreign_grpp), enerd->foreign_term); enerd->enerpart_lambda[i] += enerd->foreign_term[F_EPOT]; } } wallcycle_sub_stop(wcycle, ewcsNONBONDED); where(); } /* If we are doing GB, calculate bonded forces and apply corrections * to the solvation forces */ /* MRS: Eventually, many need to include free energy contribution here! */ if (ir->implicit_solvent) { wallcycle_sub_start(wcycle, ewcsLISTED); calc_gb_forces(cr, md, born, top, x, f, fr, idef, ir->gb_algorithm, ir->sa_algorithm, nrnb, &pbc, graph, enerd); wallcycle_sub_stop(wcycle, ewcsLISTED); } #ifdef GMX_MPI if (TAKETIME) { t1 = MPI_Wtime(); fr->t_fnbf += t1-t0; } #endif if (fepvals->sc_alpha != 0) { enerd->dvdl_nonlin[efptVDW] += dvdl_nb[efptVDW]; } else { enerd->dvdl_lin[efptVDW] += dvdl_nb[efptVDW]; } if (fepvals->sc_alpha != 0) /* even though coulomb part is linear, we already added it, beacuse we need to go through the vdw calculation anyway */ { enerd->dvdl_nonlin[efptCOUL] += dvdl_nb[efptCOUL]; } else { enerd->dvdl_lin[efptCOUL] += dvdl_nb[efptCOUL]; } debug_gmx(); if (debug) { pr_rvecs(debug, 0, "fshift after SR", fr->fshift, SHIFTS); } /* Shift the coordinates. Must be done before listed forces and PPPM, * but is also necessary for SHAKE and update, therefore it can NOT * go when no listed forces have to be evaluated. * * The shifting and PBC code is deliberately not timed, since with * the Verlet scheme it only takes non-zero time with triclinic * boxes, and even then the time is around a factor of 100 less * than the next smallest counter. */ /* Here sometimes we would not need to shift with NBFonly, * but we do so anyhow for consistency of the returned coordinates. */ if (graph) { shift_self(graph, box, x); if (TRICLINIC(box)) { inc_nrnb(nrnb, eNR_SHIFTX, 2*graph->nnodes); } else { inc_nrnb(nrnb, eNR_SHIFTX, graph->nnodes); } } /* Check whether we need to do listed interactions or correct for exclusions */ if (fr->bMolPBC && ((flags & GMX_FORCE_LISTED) || EEL_RF(fr->eeltype) || EEL_FULL(fr->eeltype) || EVDW_PME(fr->vdwtype))) { /* TODO There are no electrostatics methods that require this transformation, when using the Verlet scheme, so update the above conditional. */ /* Since all atoms are in the rectangular or triclinic unit-cell, * only single box vector shifts (2 in x) are required. */ set_pbc_dd(&pbc, fr->ePBC, cr->dd, TRUE, box); } debug_gmx(); do_force_listed(wcycle, box, ir->fepvals, cr->ms, idef, (const rvec *) x, hist, f, fr, &pbc, graph, enerd, nrnb, lambda, md, fcd, DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL, flags); where(); *cycles_pme = 0; clear_mat(fr->vir_el_recip); clear_mat(fr->vir_lj_recip); /* Do long-range electrostatics and/or LJ-PME, including related short-range * corrections. */ if (EEL_FULL(fr->eeltype) || EVDW_PME(fr->vdwtype)) { int status = 0; real Vlr_q = 0, Vlr_lj = 0, Vcorr_q = 0, Vcorr_lj = 0; real dvdl_long_range_q = 0, dvdl_long_range_lj = 0; bSB = (ir->nwall == 2); if (bSB) { copy_mat(box, boxs); svmul(ir->wall_ewald_zfac, boxs[ZZ], boxs[ZZ]); box_size[ZZ] *= ir->wall_ewald_zfac; } if (EEL_PME_EWALD(fr->eeltype) || EVDW_PME(fr->vdwtype)) { real dvdl_long_range_correction_q = 0; real dvdl_long_range_correction_lj = 0; /* With the Verlet scheme exclusion forces are calculated * in the non-bonded kernel. */ /* The TPI molecule does not have exclusions with the rest * of the system and no intra-molecular PME grid * contributions will be calculated in * gmx_pme_calc_energy. */ if ((ir->cutoff_scheme == ecutsGROUP && fr->n_tpi == 0) || ir->ewald_geometry != eewg3D || ir->epsilon_surface != 0) { int nthreads, t; wallcycle_sub_start(wcycle, ewcsEWALD_CORRECTION); if (fr->n_tpi > 0) { gmx_fatal(FARGS, "TPI with PME currently only works in a 3D geometry with tin-foil boundary conditions"); } nthreads = gmx_omp_nthreads_get(emntBonded); #pragma omp parallel for num_threads(nthreads) schedule(static) for (t = 0; t < nthreads; t++) { int i; rvec *fnv; tensor *vir_q, *vir_lj; real *Vcorrt_q, *Vcorrt_lj, *dvdlt_q, *dvdlt_lj; if (t == 0) { fnv = fr->f_novirsum; vir_q = &fr->vir_el_recip; vir_lj = &fr->vir_lj_recip; Vcorrt_q = &Vcorr_q; Vcorrt_lj = &Vcorr_lj; dvdlt_q = &dvdl_long_range_correction_q; dvdlt_lj = &dvdl_long_range_correction_lj; } else { fnv = fr->f_t[t].f; vir_q = &fr->f_t[t].vir_q; vir_lj = &fr->f_t[t].vir_lj; Vcorrt_q = &fr->f_t[t].Vcorr_q; Vcorrt_lj = &fr->f_t[t].Vcorr_lj; dvdlt_q = &fr->f_t[t].dvdl[efptCOUL]; dvdlt_lj = &fr->f_t[t].dvdl[efptVDW]; for (i = 0; i < fr->natoms_force; i++) { clear_rvec(fnv[i]); } clear_mat(*vir_q); clear_mat(*vir_lj); } *dvdlt_q = 0; *dvdlt_lj = 0; ewald_LRcorrection(fr->excl_load[t], fr->excl_load[t+1], cr, t, fr, md->chargeA, md->chargeB, md->sqrt_c6A, md->sqrt_c6B, md->sigmaA, md->sigmaB, md->sigma3A, md->sigma3B, md->nChargePerturbed || md->nTypePerturbed, ir->cutoff_scheme != ecutsVERLET, excl, x, bSB ? boxs : box, mu_tot, ir->ewald_geometry, ir->epsilon_surface, fnv, *vir_q, *vir_lj, Vcorrt_q, Vcorrt_lj, lambda[efptCOUL], lambda[efptVDW], dvdlt_q, dvdlt_lj); } if (nthreads > 1) { reduce_thread_forces(fr->natoms_force, fr->f_novirsum, fr->vir_el_recip, fr->vir_lj_recip, &Vcorr_q, &Vcorr_lj, &dvdl_long_range_correction_q, &dvdl_long_range_correction_lj, nthreads, fr->f_t); } wallcycle_sub_stop(wcycle, ewcsEWALD_CORRECTION); } if (EEL_PME_EWALD(fr->eeltype) && fr->n_tpi == 0) { /* This is not in a subcounter because it takes a negligible and constant-sized amount of time */ Vcorr_q += ewald_charge_correction(cr, fr, lambda[efptCOUL], box, &dvdl_long_range_correction_q, fr->vir_el_recip); } enerd->dvdl_lin[efptCOUL] += dvdl_long_range_correction_q; enerd->dvdl_lin[efptVDW] += dvdl_long_range_correction_lj; if ((EEL_PME(fr->eeltype) || EVDW_PME(fr->vdwtype)) && (cr->duty & DUTY_PME)) { /* Do reciprocal PME for Coulomb and/or LJ. */ assert(fr->n_tpi >= 0); if (fr->n_tpi == 0 || (flags & GMX_FORCE_STATECHANGED)) { pme_flags = GMX_PME_SPREAD | GMX_PME_SOLVE; if (EEL_PME(fr->eeltype)) { pme_flags |= GMX_PME_DO_COULOMB; } if (EVDW_PME(fr->vdwtype)) { pme_flags |= GMX_PME_DO_LJ; } if (flags & GMX_FORCE_FORCES) { pme_flags |= GMX_PME_CALC_F; } if (flags & GMX_FORCE_VIRIAL) { pme_flags |= GMX_PME_CALC_ENER_VIR; } if (fr->n_tpi > 0) { /* We don't calculate f, but we do want the potential */ pme_flags |= GMX_PME_CALC_POT; } wallcycle_start(wcycle, ewcPMEMESH); status = gmx_pme_do(fr->pmedata, 0, md->homenr - fr->n_tpi, x, fr->f_novirsum, md->chargeA, md->chargeB, md->sqrt_c6A, md->sqrt_c6B, md->sigmaA, md->sigmaB, bSB ? boxs : box, cr, DOMAINDECOMP(cr) ? dd_pme_maxshift_x(cr->dd) : 0, DOMAINDECOMP(cr) ? dd_pme_maxshift_y(cr->dd) : 0, nrnb, wcycle, fr->vir_el_recip, fr->ewaldcoeff_q, fr->vir_lj_recip, fr->ewaldcoeff_lj, &Vlr_q, &Vlr_lj, lambda[efptCOUL], lambda[efptVDW], &dvdl_long_range_q, &dvdl_long_range_lj, pme_flags); *cycles_pme = wallcycle_stop(wcycle, ewcPMEMESH); if (status != 0) { gmx_fatal(FARGS, "Error %d in reciprocal PME routine", status); } /* We should try to do as little computation after * this as possible, because parallel PME synchronizes * the nodes, so we want all load imbalance of the * rest of the force calculation to be before the PME * call. DD load balancing is done on the whole time * of the force call (without PME). */ } if (fr->n_tpi > 0) { if (EVDW_PME(ir->vdwtype)) { gmx_fatal(FARGS, "Test particle insertion not implemented with LJ-PME"); } /* Determine the PME grid energy of the test molecule * with the PME grid potential of the other charges. */ gmx_pme_calc_energy(fr->pmedata, fr->n_tpi, x + md->homenr - fr->n_tpi, md->chargeA + md->homenr - fr->n_tpi, &Vlr_q); } } } if (!EEL_PME(fr->eeltype) && EEL_PME_EWALD(fr->eeltype)) { Vlr_q = do_ewald(ir, x, fr->f_novirsum, md->chargeA, md->chargeB, box_size, cr, md->homenr, fr->vir_el_recip, fr->ewaldcoeff_q, lambda[efptCOUL], &dvdl_long_range_q, fr->ewald_table); } /* Note that with separate PME nodes we get the real energies later */ enerd->dvdl_lin[efptCOUL] += dvdl_long_range_q; enerd->dvdl_lin[efptVDW] += dvdl_long_range_lj; enerd->term[F_COUL_RECIP] = Vlr_q + Vcorr_q; enerd->term[F_LJ_RECIP] = Vlr_lj + Vcorr_lj; if (debug) { fprintf(debug, "Vlr_q = %g, Vcorr_q = %g, Vlr_corr_q = %g\n", Vlr_q, Vcorr_q, enerd->term[F_COUL_RECIP]); pr_rvecs(debug, 0, "vir_el_recip after corr", fr->vir_el_recip, DIM); pr_rvecs(debug, 0, "fshift after LR Corrections", fr->fshift, SHIFTS); fprintf(debug, "Vlr_lj: %g, Vcorr_lj = %g, Vlr_corr_lj = %g\n", Vlr_lj, Vcorr_lj, enerd->term[F_LJ_RECIP]); pr_rvecs(debug, 0, "vir_lj_recip after corr", fr->vir_lj_recip, DIM); } } else { /* Is there a reaction-field exclusion correction needed? */ if (EEL_RF(fr->eeltype) && eelRF_NEC != fr->eeltype) { /* With the Verlet scheme, exclusion forces are calculated * in the non-bonded kernel. */ if (ir->cutoff_scheme != ecutsVERLET) { real dvdl_rf_excl = 0; enerd->term[F_RF_EXCL] = RF_excl_correction(fr, graph, md, excl, x, f, fr->fshift, &pbc, lambda[efptCOUL], &dvdl_rf_excl); enerd->dvdl_lin[efptCOUL] += dvdl_rf_excl; } } } where(); debug_gmx(); if (debug) { print_nrnb(debug, nrnb); } debug_gmx(); #ifdef GMX_MPI if (TAKETIME) { t2 = MPI_Wtime(); MPI_Barrier(cr->mpi_comm_mygroup); t3 = MPI_Wtime(); fr->t_wait += t3-t2; if (fr->timesteps == 11) { char buf[22]; fprintf(stderr, "* PP load balancing info: rank %d, step %s, rel wait time=%3.0f%% , load string value: %7.2f\n", cr->nodeid, gmx_step_str(fr->timesteps, buf), 100*fr->t_wait/(fr->t_wait+fr->t_fnbf), (fr->t_fnbf+fr->t_wait)/fr->t_fnbf); } fr->timesteps++; } #endif if (debug) { pr_rvecs(debug, 0, "fshift after bondeds", fr->fshift, SHIFTS); } }
void calc_listed(const t_commrec *cr, const gmx_multisim_t *ms, struct gmx_wallcycle *wcycle, const t_idef *idef, const rvec x[], history_t *hist, rvec f[], gmx::ForceWithVirial *forceWithVirial, const t_forcerec *fr, const struct t_pbc *pbc, const struct t_pbc *pbc_full, const struct t_graph *g, gmx_enerdata_t *enerd, t_nrnb *nrnb, const real *lambda, const t_mdatoms *md, t_fcdata *fcd, int *global_atom_index, int force_flags) { gmx_bool bCalcEnerVir; const t_pbc *pbc_null; bonded_threading_t *bt = fr->bondedThreading; bCalcEnerVir = ((force_flags & (GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY)) != 0); if (fr->bMolPBC) { pbc_null = pbc; } else { pbc_null = nullptr; } if ((idef->il[F_POSRES].nr > 0) || (idef->il[F_FBPOSRES].nr > 0) || fcd->orires.nr > 0 || fcd->disres.nres > 0) { /* TODO Use of restraints triggers further function calls inside the loop over calc_one_bond(), but those are too awkward to account to this subtimer properly in the present code. We don't test / care much about performance with restraints, anyway. */ wallcycle_sub_start(wcycle, ewcsRESTRAINTS); if (idef->il[F_POSRES].nr > 0) { posres_wrapper(nrnb, idef, pbc_full, x, enerd, lambda, fr, forceWithVirial); } if (idef->il[F_FBPOSRES].nr > 0) { fbposres_wrapper(nrnb, idef, pbc_full, x, enerd, fr, forceWithVirial); } /* Do pre force calculation stuff which might require communication */ if (fcd->orires.nr > 0) { /* This assertion is to ensure we have whole molecules. * Unfortunately we do not have an mdrun state variable that tells * us if molecules in x are not broken over PBC, so we have to make * do with checking graph!=nullptr, which should tell us if we made * molecules whole before calling the current function. */ GMX_RELEASE_ASSERT(fr->ePBC == epbcNONE || g != nullptr, "With orientation restraints molecules should be whole"); enerd->term[F_ORIRESDEV] = calc_orires_dev(ms, idef->il[F_ORIRES].nr, idef->il[F_ORIRES].iatoms, idef->iparams, md, x, pbc_null, fcd, hist); } if (fcd->disres.nres > 0) { calc_disres_R_6(cr, ms, idef->il[F_DISRES].nr, idef->il[F_DISRES].iatoms, x, pbc_null, fcd, hist); } wallcycle_sub_stop(wcycle, ewcsRESTRAINTS); } if (bt->haveBondeds) { wallcycle_sub_start(wcycle, ewcsLISTED); /* The dummy array is to have a place to store the dhdl at other values of lambda, which will be thrown away in the end */ real dvdl[efptNR] = {0}; calcBondedForces(idef, x, fr, pbc_null, g, enerd, nrnb, lambda, dvdl, md, fcd, bCalcEnerVir, global_atom_index); wallcycle_sub_stop(wcycle, ewcsLISTED); wallcycle_sub_start(wcycle, ewcsLISTED_BUF_OPS); reduce_thread_output(fr->natoms_force, f, fr->fshift, enerd->term, &enerd->grpp, dvdl, bt, bCalcEnerVir, (force_flags & GMX_FORCE_DHDL) != 0); if (force_flags & GMX_FORCE_DHDL) { for (int i = 0; i < efptNR; i++) { enerd->dvdl_nonlin[i] += dvdl[i]; } } wallcycle_sub_stop(wcycle, ewcsLISTED_BUF_OPS); } /* Copy the sum of violations for the distance restraints from fcd */ if (fcd) { enerd->term[F_DISRESVIOL] = fcd->disres.sumviol; } }