void pme_gpu_launch_complex_transforms(gmx_pme_t *pme, gmx_wallcycle *wcycle) { PmeGpu *pmeGpu = pme->gpu; const bool computeEnergyAndVirial = (pmeGpu->settings.currentFlags & GMX_PME_CALC_ENER_VIR) != 0; const bool performBackFFT = (pmeGpu->settings.currentFlags & (GMX_PME_CALC_F | GMX_PME_CALC_POT)) != 0; const unsigned int gridIndex = 0; t_complex *cfftgrid = pme->cfftgrid[gridIndex]; if (pmeGpu->settings.currentFlags & GMX_PME_SPREAD) { if (!pme_gpu_performs_FFT(pmeGpu)) { wallcycle_start(wcycle, ewcWAIT_GPU_PME_SPREAD); pme_gpu_sync_spread_grid(pme->gpu); wallcycle_stop(wcycle, ewcWAIT_GPU_PME_SPREAD); } } try { if (pmeGpu->settings.currentFlags & GMX_PME_SOLVE) { /* do R2C 3D-FFT */ parallel_3dfft_execute_gpu_wrapper(pme, gridIndex, GMX_FFT_REAL_TO_COMPLEX, wcycle); /* solve in k-space for our local cells */ if (pme_gpu_performs_solve(pmeGpu)) { const auto gridOrdering = pme_gpu_uses_dd(pmeGpu) ? GridOrdering::YZX : GridOrdering::XYZ; wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU); wallcycle_sub_start_nocount(wcycle, ewcsLAUNCH_GPU_PME); pme_gpu_solve(pmeGpu, cfftgrid, gridOrdering, computeEnergyAndVirial); wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_PME); wallcycle_stop(wcycle, ewcLAUNCH_GPU); } else { wallcycle_start(wcycle, ewcPME_SOLVE_MIXED_MODE); #pragma omp parallel for num_threads(pme->nthread) schedule(static) for (int thread = 0; thread < pme->nthread; thread++) { solve_pme_yzx(pme, cfftgrid, pme->boxVolume, computeEnergyAndVirial, pme->nthread, thread); } wallcycle_stop(wcycle, ewcPME_SOLVE_MIXED_MODE); } } if (performBackFFT) { parallel_3dfft_execute_gpu_wrapper(pme, gridIndex, GMX_FFT_COMPLEX_TO_REAL, wcycle); } } GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; }
/*! \brief * A convenience wrapper for launching either the GPU or CPU FFT. * * \param[in] pme The PME structure. * \param[in] gridIndex The grid index - should currently always be 0. * \param[in] dir The FFT direction enum. * \param[in] wcycle The wallclock counter. */ void inline parallel_3dfft_execute_gpu_wrapper(gmx_pme_t *pme, const int gridIndex, enum gmx_fft_direction dir, gmx_wallcycle_t wcycle) { GMX_ASSERT(gridIndex == 0, "Only single grid supported"); if (pme_gpu_performs_FFT(pme->gpu)) { wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU); wallcycle_sub_start_nocount(wcycle, ewcsLAUNCH_GPU_PME); pme_gpu_3dfft(pme->gpu, dir, gridIndex); wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_PME); wallcycle_stop(wcycle, ewcLAUNCH_GPU); } else { wallcycle_start(wcycle, ewcPME_FFT_MIXED_MODE); #pragma omp parallel for num_threads(pme->nthread) schedule(static) for (int thread = 0; thread < pme->nthread; thread++) { gmx_parallel_3dfft_execute(pme->pfft_setup[gridIndex], dir, thread, wcycle); } wallcycle_stop(wcycle, ewcPME_FFT_MIXED_MODE); } }
void pme_gpu_launch_spread(gmx_pme_t *pme, const rvec *x, gmx_wallcycle *wcycle) { GMX_ASSERT(pme_gpu_active(pme), "This should be a GPU run of PME but it is not enabled."); PmeGpu *pmeGpu = pme->gpu; // The only spot of PME GPU where LAUNCH_GPU counter increases call-count wallcycle_start(wcycle, ewcLAUNCH_GPU); // The only spot of PME GPU where ewcsLAUNCH_GPU_PME subcounter increases call-count wallcycle_sub_start(wcycle, ewcsLAUNCH_GPU_PME); pme_gpu_copy_input_coordinates(pmeGpu, x); wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_PME); wallcycle_stop(wcycle, ewcLAUNCH_GPU); const unsigned int gridIndex = 0; real *fftgrid = pme->fftgrid[gridIndex]; if (pmeGpu->settings.currentFlags & GMX_PME_SPREAD) { /* Spread the coefficients on a grid */ const bool computeSplines = true; const bool spreadCharges = true; wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU); wallcycle_sub_start_nocount(wcycle, ewcsLAUNCH_GPU_PME); pme_gpu_spread(pmeGpu, gridIndex, fftgrid, computeSplines, spreadCharges); wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_PME); wallcycle_stop(wcycle, ewcLAUNCH_GPU); } }
void mdoutf_tng_close(gmx_mdoutf_t of) { if (of->tng || of->tng_low_prec) { wallcycle_start(of->wcycle, ewcTRAJ); gmx_tng_close(&of->tng); gmx_tng_close(&of->tng_low_prec); wallcycle_stop(of->wcycle, ewcTRAJ); } }
void wallcycle_start_nocount(gmx_wallcycle_t wc, int ewc) { if (wc == NULL) { return; } wallcycle_start(wc, ewc); wc->wcc[ewc].n--; }
static void reset_pmeonly_counters(gmx_wallcycle_t wcycle, gmx_walltime_accounting_t walltime_accounting, t_nrnb *nrnb, int64_t step, bool useGpuForPme) { /* Reset all the counters related to performance over the run */ wallcycle_stop(wcycle, ewcRUN); wallcycle_reset_all(wcycle); init_nrnb(nrnb); wallcycle_start(wcycle, ewcRUN); walltime_accounting_reset_time(walltime_accounting, step); if (useGpuForPme) { resetGpuProfiler(); } }
static void reset_pmeonly_counters(gmx_wallcycle_t wcycle, gmx_walltime_accounting_t walltime_accounting, t_nrnb *nrnb, t_inputrec *ir, gmx_int64_t step) { /* Reset all the counters related to performance over the run */ wallcycle_stop(wcycle, ewcRUN); wallcycle_reset_all(wcycle); init_nrnb(nrnb); if (ir->nsteps >= 0) { /* ir->nsteps is not used here, but we update it for consistency */ ir->nsteps -= step - ir->init_step; } ir->init_step = step; wallcycle_start(wcycle, ewcRUN); walltime_accounting_start(walltime_accounting); }
bool pme_gpu_try_finish_task(const gmx_pme_t *pme, gmx_wallcycle *wcycle, gmx::ArrayRef<const gmx::RVec> *forces, matrix virial, real *energy, GpuTaskCompletion completionKind) { GMX_ASSERT(pme_gpu_active(pme), "This should be a GPU run of PME but it is not enabled."); wallcycle_start_nocount(wcycle, ewcWAIT_GPU_PME_GATHER); if (completionKind == GpuTaskCompletion::Check) { // Query the PME stream for completion of all tasks enqueued and // if we're not done, stop the timer before early return. if (!pme_gpu_stream_query(pme->gpu)) { wallcycle_stop(wcycle, ewcWAIT_GPU_PME_GATHER); return false; } } else { // Synchronize the whole PME stream at once, including D2H result transfers. pme_gpu_synchronize(pme->gpu); } wallcycle_stop(wcycle, ewcWAIT_GPU_PME_GATHER); // Time the final staged data handling separately with a counting call to get // the call count right. wallcycle_start(wcycle, ewcWAIT_GPU_PME_GATHER); pme_gpu_update_timings(pme->gpu); pme_gpu_get_staged_results(pme, forces, virial, energy); wallcycle_stop(wcycle, ewcWAIT_GPU_PME_GATHER); return true; }
void do_md_trajectory_writing(FILE *fplog, t_commrec *cr, int nfile, const t_filenm fnm[], gmx_int64_t step, gmx_int64_t step_rel, double t, t_inputrec *ir, t_state *state, t_state *state_global, gmx_mtop_t *top_global, t_forcerec *fr, gmx_mdoutf_t outf, t_mdebin *mdebin, gmx_ekindata_t *ekind, rvec *f, rvec *f_global, int *nchkpt, gmx_bool bCPT, gmx_bool bRerunMD, gmx_bool bLastStep, gmx_bool bDoConfOut, gmx_bool bSumEkinhOld ) { int mdof_flags; mdof_flags = 0; if (do_per_step(step, ir->nstxout)) { mdof_flags |= MDOF_X; } if (do_per_step(step, ir->nstvout)) { mdof_flags |= MDOF_V; } if (do_per_step(step, ir->nstfout)) { mdof_flags |= MDOF_F; } if (do_per_step(step, ir->nstxout_compressed)) { mdof_flags |= MDOF_X_COMPRESSED; } if (bCPT) { mdof_flags |= MDOF_CPT; } ; #if defined(GMX_FAHCORE) || defined(GMX_WRITELASTSTEP) if (bLastStep) { /* Enforce writing positions and velocities at end of run */ mdof_flags |= (MDOF_X | MDOF_V); } #endif #ifdef GMX_FAHCORE if (MASTER(cr)) { fcReportProgress( ir->nsteps, step ); } #if defined(__native_client__) fcCheckin(MASTER(cr)); #endif /* sync bCPT and fc record-keeping */ if (bCPT && MASTER(cr)) { fcRequestCheckPoint(); } #endif if (mdof_flags != 0) { wallcycle_start(mdoutf_get_wcycle(outf), ewcTRAJ); if (bCPT) { if (MASTER(cr)) { if (bSumEkinhOld) { state_global->ekinstate.bUpToDate = FALSE; } else { update_ekinstate(&state_global->ekinstate, ekind); state_global->ekinstate.bUpToDate = TRUE; } update_energyhistory(&state_global->enerhist, mdebin); } } mdoutf_write_to_trajectory_files(fplog, cr, outf, mdof_flags, top_global, step, t, state, state_global, f, f_global); if (bCPT) { (*nchkpt)++; } debug_gmx(); if (bLastStep && step_rel == ir->nsteps && bDoConfOut && MASTER(cr) && !bRerunMD) { /* x and v have been collected in mdoutf_write_to_trajectory_files, * because a checkpoint file will always be written * at the last step. */ fprintf(stderr, "\nWriting final coordinates.\n"); if (fr->bMolPBC) { /* Make molecules whole only for confout writing */ do_pbc_mtop(fplog, ir->ePBC, state->box, top_global, state_global->x); } write_sto_conf_mtop(ftp2fn(efSTO, nfile, fnm), *top_global->name, top_global, state_global->x, state_global->v, ir->ePBC, state->box); debug_gmx(); } wallcycle_stop(mdoutf_get_wcycle(outf), ewcTRAJ); } }
void compute_globals(FILE *fplog, gmx_global_stat_t gstat, t_commrec *cr, t_inputrec *ir, t_forcerec *fr, gmx_ekindata_t *ekind, t_state *state, t_state *state_global, t_mdatoms *mdatoms, t_nrnb *nrnb, t_vcm *vcm, gmx_wallcycle_t wcycle, gmx_enerdata_t *enerd, tensor force_vir, tensor shake_vir, tensor total_vir, tensor pres, rvec mu_tot, gmx_constr_t constr, globsig_t *gs, gmx_bool bInterSimGS, matrix box, gmx_mtop_t *top_global, gmx_bool *bSumEkinhOld, int flags) { int i, gsi; real gs_buf[eglsNR]; tensor corr_vir, corr_pres; gmx_bool bEner, bPres, bTemp; gmx_bool bStopCM, bGStat, bReadEkin, bEkinAveVel, bScaleEkin, bConstrain; real prescorr, enercorr, dvdlcorr, dvdl_ekin; /* translate CGLO flags to gmx_booleans */ bStopCM = flags & CGLO_STOPCM; bGStat = flags & CGLO_GSTAT; bReadEkin = (flags & CGLO_READEKIN); bScaleEkin = (flags & CGLO_SCALEEKIN); bEner = flags & CGLO_ENERGY; bTemp = flags & CGLO_TEMPERATURE; bPres = (flags & CGLO_PRESSURE); bConstrain = (flags & CGLO_CONSTRAINT); /* we calculate a full state kinetic energy either with full-step velocity verlet or half step where we need the pressure */ bEkinAveVel = (ir->eI == eiVV || (ir->eI == eiVVAK && bPres) || bReadEkin); /* in initalization, it sums the shake virial in vv, and to sums ekinh_old in leapfrog (or if we are calculating ekinh_old) for other reasons */ /* ########## Kinetic energy ############## */ if (bTemp) { /* Non-equilibrium MD: this is parallellized, but only does communication * when there really is NEMD. */ if (PAR(cr) && (ekind->bNEMD)) { accumulate_u(cr, &(ir->opts), ekind); } debug_gmx(); if (bReadEkin) { restore_ekinstate_from_state(cr, ekind, &state_global->ekinstate); } else { calc_ke_part(state, &(ir->opts), mdatoms, ekind, nrnb, bEkinAveVel); } debug_gmx(); } /* Calculate center of mass velocity if necessary, also parallellized */ if (bStopCM) { calc_vcm_grp(0, mdatoms->homenr, mdatoms, state->x, state->v, vcm); } if (bTemp || bStopCM || bPres || bEner || bConstrain) { if (!bGStat) { /* We will not sum ekinh_old, * so signal that we still have to do it. */ *bSumEkinhOld = TRUE; } else { if (gs != NULL) { for (i = 0; i < eglsNR; i++) { gs_buf[i] = gs->sig[i]; } } if (PAR(cr)) { wallcycle_start(wcycle, ewcMoveE); global_stat(fplog, gstat, cr, enerd, force_vir, shake_vir, mu_tot, ir, ekind, constr, bStopCM ? vcm : NULL, gs != NULL ? eglsNR : 0, gs_buf, top_global, state, *bSumEkinhOld, flags); wallcycle_stop(wcycle, ewcMoveE); } if (gs != NULL) { if (MULTISIM(cr) && bInterSimGS) { if (MASTER(cr)) { /* Communicate the signals between the simulations */ gmx_sum_sim(eglsNR, gs_buf, cr->ms); } /* Communicate the signals form the master to the others */ gmx_bcast(eglsNR*sizeof(gs_buf[0]), gs_buf, cr); } for (i = 0; i < eglsNR; i++) { if (bInterSimGS || gs_simlocal[i]) { /* Set the communicated signal only when it is non-zero, * since signals might not be processed at each MD step. */ gsi = (gs_buf[i] >= 0 ? (int)(gs_buf[i] + 0.5) : (int)(gs_buf[i] - 0.5)); if (gsi != 0) { gs->set[i] = gsi; } /* Turn off the local signal */ gs->sig[i] = 0; } } } *bSumEkinhOld = FALSE; } } if (!ekind->bNEMD && debug && bTemp && (vcm->nr > 0)) { correct_ekin(debug, 0, mdatoms->homenr, state->v, vcm->group_p[0], mdatoms->massT, mdatoms->tmass, ekind->ekin); } /* Do center of mass motion removal */ if (bStopCM) { check_cm_grp(fplog, vcm, ir, 1); do_stopcm_grp(0, mdatoms->homenr, mdatoms->cVCM, state->x, state->v, vcm); inc_nrnb(nrnb, eNR_STOPCM, mdatoms->homenr); } if (bEner) { /* Calculate the amplitude of the cosine velocity profile */ ekind->cosacc.vcos = ekind->cosacc.mvcos/mdatoms->tmass; } if (bTemp) { /* Sum the kinetic energies of the groups & calc temp */ /* compute full step kinetic energies if vv, or if vv-avek and we are computing the pressure with IR_NPT_TROTTER */ /* three maincase: VV with AveVel (md-vv), vv with AveEkin (md-vv-avek), leap with AveEkin (md). Leap with AveVel is not supported; it's not clear that it will actually work. bEkinAveVel: If TRUE, we simply multiply ekin by ekinscale to get a full step kinetic energy. If FALSE, we average ekinh_old and ekinh*ekinscale_nhc to get an averaged half step kinetic energy. */ enerd->term[F_TEMP] = sum_ekin(&(ir->opts), ekind, &dvdl_ekin, bEkinAveVel, bScaleEkin); enerd->dvdl_lin[efptMASS] = (double) dvdl_ekin; enerd->term[F_EKIN] = trace(ekind->ekin); } /* ########## Long range energy information ###### */ if (bEner || bPres || bConstrain) { calc_dispcorr(ir, fr, top_global->natoms, box, state->lambda[efptVDW], corr_pres, corr_vir, &prescorr, &enercorr, &dvdlcorr); } if (bEner) { enerd->term[F_DISPCORR] = enercorr; enerd->term[F_EPOT] += enercorr; enerd->term[F_DVDL_VDW] += dvdlcorr; } /* ########## Now pressure ############## */ if (bPres || bConstrain) { m_add(force_vir, shake_vir, total_vir); /* Calculate pressure and apply LR correction if PPPM is used. * Use the box from last timestep since we already called update(). */ enerd->term[F_PRES] = calc_pres(fr->ePBC, ir->nwall, box, ekind->ekin, total_vir, pres); /* Calculate long range corrections to pressure and energy */ /* this adds to enerd->term[F_PRES] and enerd->term[F_ETOT], and computes enerd->term[F_DISPCORR]. Also modifies the total_vir and pres tesors */ m_add(total_vir, corr_vir, total_vir); m_add(pres, corr_pres, pres); enerd->term[F_PDISPCORR] = prescorr; enerd->term[F_PRES] += prescorr; } }
void do_md_trajectory_writing(FILE *fplog, t_commrec *cr, int nfile, const t_filenm fnm[], gmx_int64_t step, gmx_int64_t step_rel, double t, t_inputrec *ir, t_state *state, t_state *state_global, gmx_mtop_t *top_global, t_forcerec *fr, gmx_mdoutf_t outf, t_mdebin *mdebin, gmx_ekindata_t *ekind, rvec *f, rvec *f_global, int *nchkpt, gmx_bool bCPT, gmx_bool bRerunMD, gmx_bool bLastStep, gmx_bool bDoConfOut, gmx_bool bSumEkinhOld ) { int mdof_flags; rvec *x_for_confout = NULL; mdof_flags = 0; if (do_per_step(step, ir->nstxout)) { mdof_flags |= MDOF_X; } if (do_per_step(step, ir->nstvout)) { mdof_flags |= MDOF_V; } if (do_per_step(step, ir->nstfout)) { mdof_flags |= MDOF_F; } if (do_per_step(step, ir->nstxout_compressed)) { mdof_flags |= MDOF_X_COMPRESSED; } if (bCPT) { mdof_flags |= MDOF_CPT; } ; #if defined(GMX_FAHCORE) if (bLastStep) { /* Enforce writing positions and velocities at end of run */ mdof_flags |= (MDOF_X | MDOF_V); } if (MASTER(cr)) { fcReportProgress( ir->nsteps, step ); } #if defined(__native_client__) fcCheckin(MASTER(cr)); #endif /* sync bCPT and fc record-keeping */ if (bCPT && MASTER(cr)) { fcRequestCheckPoint(); } #endif if (mdof_flags != 0) { wallcycle_start(mdoutf_get_wcycle(outf), ewcTRAJ); if (bCPT) { if (MASTER(cr)) { if (bSumEkinhOld) { state_global->ekinstate.bUpToDate = FALSE; } else { update_ekinstate(&state_global->ekinstate, ekind); state_global->ekinstate.bUpToDate = TRUE; } update_energyhistory(state_global->enerhist, mdebin); } } mdoutf_write_to_trajectory_files(fplog, cr, outf, mdof_flags, top_global, step, t, state, state_global, f, f_global); if (bCPT) { (*nchkpt)++; } debug_gmx(); if (bLastStep && step_rel == ir->nsteps && bDoConfOut && MASTER(cr) && !bRerunMD) { if (fr->bMolPBC && state->x == state_global->x) { /* This (single-rank) run needs to allocate a temporary array of size natoms so that any periodicity removal for mdrun -confout does not perturb the update and thus the final .edr output. This makes .cpt restarts look binary identical, and makes .edr restarts binary identical. */ snew(x_for_confout, state_global->natoms); copy_rvecn(state_global->x, x_for_confout, 0, state_global->natoms); } else { /* With DD, or no bMolPBC, it doesn't matter if we change state_global->x */ x_for_confout = state_global->x; } /* x and v have been collected in mdoutf_write_to_trajectory_files, * because a checkpoint file will always be written * at the last step. */ fprintf(stderr, "\nWriting final coordinates.\n"); if (fr->bMolPBC) { /* Make molecules whole only for confout writing */ do_pbc_mtop(fplog, ir->ePBC, state->box, top_global, x_for_confout); } write_sto_conf_mtop(ftp2fn(efSTO, nfile, fnm), *top_global->name, top_global, x_for_confout, state_global->v, ir->ePBC, state->box); if (fr->bMolPBC && state->x == state_global->x) { sfree(x_for_confout); } debug_gmx(); } wallcycle_stop(mdoutf_get_wcycle(outf), ewcTRAJ); } }
void do_force_lowlevel(FILE *fplog, gmx_large_int_t step, t_forcerec *fr, t_inputrec *ir, t_idef *idef, t_commrec *cr, t_nrnb *nrnb, gmx_wallcycle_t wcycle, t_mdatoms *md, t_grpopts *opts, rvec x[], history_t *hist, rvec f[], rvec f_longrange[], gmx_enerdata_t *enerd, t_fcdata *fcd, gmx_mtop_t *mtop, gmx_localtop_t *top, gmx_genborn_t *born, t_atomtypes *atype, gmx_bool bBornRadii, matrix box, t_lambda *fepvals, real *lambda, t_graph *graph, t_blocka *excl, rvec mu_tot[], int flags, float *cycles_pme) { int i, j, status; int donb_flags; gmx_bool bDoEpot, bSepDVDL, bSB; int pme_flags; matrix boxs; rvec box_size; real Vsr, Vlr, Vcorr = 0; t_pbc pbc; real dvdgb; char buf[22]; double clam_i, vlam_i; real dvdl_dum[efptNR], dvdl, dvdl_nb[efptNR], lam_i[efptNR]; real dvdlsum; #ifdef GMX_MPI double t0 = 0.0, t1, t2, t3; /* time measurement for coarse load balancing */ #endif #define PRINT_SEPDVDL(s, v, dvdlambda) if (bSepDVDL) {fprintf(fplog, sepdvdlformat, s, v, dvdlambda); } GMX_MPE_LOG(ev_force_start); set_pbc(&pbc, fr->ePBC, box); /* reset free energy components */ for (i = 0; i < efptNR; i++) { dvdl_nb[i] = 0; dvdl_dum[i] = 0; } /* Reset box */ for (i = 0; (i < DIM); i++) { box_size[i] = box[i][i]; } bSepDVDL = (fr->bSepDVDL && do_per_step(step, ir->nstlog)); debug_gmx(); /* do QMMM first if requested */ if (fr->bQMMM) { enerd->term[F_EQM] = calculate_QMMM(cr, x, f, fr, md); } if (bSepDVDL) { fprintf(fplog, "Step %s: non-bonded V and dVdl for node %d:\n", gmx_step_str(step, buf), cr->nodeid); } /* Call the short range functions all in one go. */ GMX_MPE_LOG(ev_do_fnbf_start); #ifdef GMX_MPI /*#define TAKETIME ((cr->npmenodes) && (fr->timesteps < 12))*/ #define TAKETIME FALSE if (TAKETIME) { MPI_Barrier(cr->mpi_comm_mygroup); t0 = MPI_Wtime(); } #endif if (ir->nwall) { /* foreign lambda component for walls */ dvdl = do_walls(ir, fr, box, md, x, f, lambda[efptVDW], enerd->grpp.ener[egLJSR], nrnb); PRINT_SEPDVDL("Walls", 0.0, dvdl); enerd->dvdl_lin[efptVDW] += dvdl; } /* If doing GB, reset dvda and calculate the Born radii */ if (ir->implicit_solvent) { wallcycle_sub_start(wcycle, ewcsNONBONDED); for (i = 0; i < born->nr; i++) { fr->dvda[i] = 0; } if (bBornRadii) { calc_gb_rad(cr, fr, ir, top, atype, x, &(fr->gblist), born, md, nrnb); } wallcycle_sub_stop(wcycle, ewcsNONBONDED); } where(); /* We only do non-bonded calculation with group scheme here, the verlet * calls are done from do_force_cutsVERLET(). */ if (fr->cutoff_scheme == ecutsGROUP && (flags & GMX_FORCE_NONBONDED)) { donb_flags = 0; /* Add short-range interactions */ donb_flags |= GMX_NONBONDED_DO_SR; if (flags & GMX_FORCE_FORCES) { donb_flags |= GMX_NONBONDED_DO_FORCE; } if (flags & GMX_FORCE_ENERGY) { donb_flags |= GMX_NONBONDED_DO_POTENTIAL; } if (flags & GMX_FORCE_DO_LR) { donb_flags |= GMX_NONBONDED_DO_LR; } wallcycle_sub_start(wcycle, ewcsNONBONDED); do_nonbonded(cr, fr, x, f, f_longrange, md, excl, &enerd->grpp, box_size, nrnb, lambda, dvdl_nb, -1, -1, donb_flags); /* If we do foreign lambda and we have soft-core interactions * we have to recalculate the (non-linear) energies contributions. */ if (fepvals->n_lambda > 0 && (flags & GMX_FORCE_DHDL) && fepvals->sc_alpha != 0) { for (i = 0; i < enerd->n_lambda; i++) { for (j = 0; j < efptNR; j++) { lam_i[j] = (i == 0 ? lambda[j] : fepvals->all_lambda[j][i-1]); } reset_foreign_enerdata(enerd); do_nonbonded(cr, fr, x, f, f_longrange, md, excl, &(enerd->foreign_grpp), box_size, nrnb, lam_i, dvdl_dum, -1, -1, (donb_flags & ~GMX_NONBONDED_DO_FORCE) | GMX_NONBONDED_DO_FOREIGNLAMBDA); sum_epot(&ir->opts, &(enerd->foreign_grpp), enerd->foreign_term); enerd->enerpart_lambda[i] += enerd->foreign_term[F_EPOT]; } } wallcycle_sub_stop(wcycle, ewcsNONBONDED); where(); } /* If we are doing GB, calculate bonded forces and apply corrections * to the solvation forces */ /* MRS: Eventually, many need to include free energy contribution here! */ if (ir->implicit_solvent) { wallcycle_sub_start(wcycle, ewcsBONDED); calc_gb_forces(cr, md, born, top, atype, x, f, fr, idef, ir->gb_algorithm, ir->sa_algorithm, nrnb, bBornRadii, &pbc, graph, enerd); wallcycle_sub_stop(wcycle, ewcsBONDED); } #ifdef GMX_MPI if (TAKETIME) { t1 = MPI_Wtime(); fr->t_fnbf += t1-t0; } #endif if (fepvals->sc_alpha != 0) { enerd->dvdl_nonlin[efptVDW] += dvdl_nb[efptVDW]; } else { enerd->dvdl_lin[efptVDW] += dvdl_nb[efptVDW]; } if (fepvals->sc_alpha != 0) /* even though coulomb part is linear, we already added it, beacuse we need to go through the vdw calculation anyway */ { enerd->dvdl_nonlin[efptCOUL] += dvdl_nb[efptCOUL]; } else { enerd->dvdl_lin[efptCOUL] += dvdl_nb[efptCOUL]; } Vsr = 0; if (bSepDVDL) { for (i = 0; i < enerd->grpp.nener; i++) { Vsr += (fr->bBHAM ? enerd->grpp.ener[egBHAMSR][i] : enerd->grpp.ener[egLJSR][i]) + enerd->grpp.ener[egCOULSR][i] + enerd->grpp.ener[egGB][i]; } dvdlsum = dvdl_nb[efptVDW] + dvdl_nb[efptCOUL]; PRINT_SEPDVDL("VdW and Coulomb SR particle-p.", Vsr, dvdlsum); } debug_gmx(); GMX_MPE_LOG(ev_do_fnbf_finish); if (debug) { pr_rvecs(debug, 0, "fshift after SR", fr->fshift, SHIFTS); } /* Shift the coordinates. Must be done before bonded forces and PPPM, * but is also necessary for SHAKE and update, therefore it can NOT * go when no bonded forces have to be evaluated. */ /* Here sometimes we would not need to shift with NBFonly, * but we do so anyhow for consistency of the returned coordinates. */ if (graph) { shift_self(graph, box, x); if (TRICLINIC(box)) { inc_nrnb(nrnb, eNR_SHIFTX, 2*graph->nnodes); } else { inc_nrnb(nrnb, eNR_SHIFTX, graph->nnodes); } } /* Check whether we need to do bondeds or correct for exclusions */ if (fr->bMolPBC && ((flags & GMX_FORCE_BONDED) || EEL_RF(fr->eeltype) || EEL_FULL(fr->eeltype))) { /* Since all atoms are in the rectangular or triclinic unit-cell, * only single box vector shifts (2 in x) are required. */ set_pbc_dd(&pbc, fr->ePBC, cr->dd, TRUE, box); } debug_gmx(); if (flags & GMX_FORCE_BONDED) { GMX_MPE_LOG(ev_calc_bonds_start); wallcycle_sub_start(wcycle, ewcsBONDED); calc_bonds(fplog, cr->ms, idef, x, hist, f, fr, &pbc, graph, enerd, nrnb, lambda, md, fcd, DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL, atype, born, flags, fr->bSepDVDL && do_per_step(step, ir->nstlog), step); /* Check if we have to determine energy differences * at foreign lambda's. */ if (fepvals->n_lambda > 0 && (flags & GMX_FORCE_DHDL) && idef->ilsort != ilsortNO_FE) { if (idef->ilsort != ilsortFE_SORTED) { gmx_incons("The bonded interactions are not sorted for free energy"); } for (i = 0; i < enerd->n_lambda; i++) { reset_foreign_enerdata(enerd); for (j = 0; j < efptNR; j++) { lam_i[j] = (i == 0 ? lambda[j] : fepvals->all_lambda[j][i-1]); } calc_bonds_lambda(fplog, idef, x, fr, &pbc, graph, &(enerd->foreign_grpp), enerd->foreign_term, nrnb, lam_i, md, fcd, DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL); sum_epot(&ir->opts, &(enerd->foreign_grpp), enerd->foreign_term); enerd->enerpart_lambda[i] += enerd->foreign_term[F_EPOT]; } } debug_gmx(); GMX_MPE_LOG(ev_calc_bonds_finish); wallcycle_sub_stop(wcycle, ewcsBONDED); } where(); *cycles_pme = 0; if (EEL_FULL(fr->eeltype)) { bSB = (ir->nwall == 2); if (bSB) { copy_mat(box, boxs); svmul(ir->wall_ewald_zfac, boxs[ZZ], boxs[ZZ]); box_size[ZZ] *= ir->wall_ewald_zfac; } clear_mat(fr->vir_el_recip); if (fr->bEwald) { Vcorr = 0; dvdl = 0; /* With the Verlet scheme exclusion forces are calculated * in the non-bonded kernel. */ /* The TPI molecule does not have exclusions with the rest * of the system and no intra-molecular PME grid contributions * will be calculated in gmx_pme_calc_energy. */ if ((ir->cutoff_scheme == ecutsGROUP && fr->n_tpi == 0) || ir->ewald_geometry != eewg3D || ir->epsilon_surface != 0) { int nthreads, t; wallcycle_sub_start(wcycle, ewcsEWALD_CORRECTION); if (fr->n_tpi > 0) { gmx_fatal(FARGS, "TPI with PME currently only works in a 3D geometry with tin-foil boundary conditions"); } nthreads = gmx_omp_nthreads_get(emntBonded); #pragma omp parallel for num_threads(nthreads) schedule(static) for (t = 0; t < nthreads; t++) { int s, e, i; rvec *fnv; tensor *vir; real *Vcorrt, *dvdlt; if (t == 0) { fnv = fr->f_novirsum; vir = &fr->vir_el_recip; Vcorrt = &Vcorr; dvdlt = &dvdl; } else { fnv = fr->f_t[t].f; vir = &fr->f_t[t].vir; Vcorrt = &fr->f_t[t].Vcorr; dvdlt = &fr->f_t[t].dvdl[efptCOUL]; for (i = 0; i < fr->natoms_force; i++) { clear_rvec(fnv[i]); } clear_mat(*vir); } *dvdlt = 0; *Vcorrt = ewald_LRcorrection(fplog, fr->excl_load[t], fr->excl_load[t+1], cr, t, fr, md->chargeA, md->nChargePerturbed ? md->chargeB : NULL, ir->cutoff_scheme != ecutsVERLET, excl, x, bSB ? boxs : box, mu_tot, ir->ewald_geometry, ir->epsilon_surface, fnv, *vir, lambda[efptCOUL], dvdlt); } if (nthreads > 1) { reduce_thread_forces(fr->natoms_force, fr->f_novirsum, fr->vir_el_recip, &Vcorr, efptCOUL, &dvdl, nthreads, fr->f_t); } wallcycle_sub_stop(wcycle, ewcsEWALD_CORRECTION); } if (fr->n_tpi == 0) { Vcorr += ewald_charge_correction(cr, fr, lambda[efptCOUL], box, &dvdl, fr->vir_el_recip); } PRINT_SEPDVDL("Ewald excl./charge/dip. corr.", Vcorr, dvdl); enerd->dvdl_lin[efptCOUL] += dvdl; } status = 0; Vlr = 0; dvdl = 0; switch (fr->eeltype) { case eelPME: case eelPMESWITCH: case eelPMEUSER: case eelPMEUSERSWITCH: case eelP3M_AD: if (cr->duty & DUTY_PME) { assert(fr->n_tpi >= 0); if (fr->n_tpi == 0 || (flags & GMX_FORCE_STATECHANGED)) { pme_flags = GMX_PME_SPREAD_Q | GMX_PME_SOLVE; if (flags & GMX_FORCE_FORCES) { pme_flags |= GMX_PME_CALC_F; } if (flags & (GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY)) { pme_flags |= GMX_PME_CALC_ENER_VIR; } if (fr->n_tpi > 0) { /* We don't calculate f, but we do want the potential */ pme_flags |= GMX_PME_CALC_POT; } wallcycle_start(wcycle, ewcPMEMESH); status = gmx_pme_do(fr->pmedata, md->start, md->homenr - fr->n_tpi, x, fr->f_novirsum, md->chargeA, md->chargeB, bSB ? boxs : box, cr, DOMAINDECOMP(cr) ? dd_pme_maxshift_x(cr->dd) : 0, DOMAINDECOMP(cr) ? dd_pme_maxshift_y(cr->dd) : 0, nrnb, wcycle, fr->vir_el_recip, fr->ewaldcoeff, &Vlr, lambda[efptCOUL], &dvdl, pme_flags); *cycles_pme = wallcycle_stop(wcycle, ewcPMEMESH); /* We should try to do as little computation after * this as possible, because parallel PME synchronizes * the nodes, so we want all load imbalance of the rest * of the force calculation to be before the PME call. * DD load balancing is done on the whole time of * the force call (without PME). */ } if (fr->n_tpi > 0) { /* Determine the PME grid energy of the test molecule * with the PME grid potential of the other charges. */ gmx_pme_calc_energy(fr->pmedata, fr->n_tpi, x + md->homenr - fr->n_tpi, md->chargeA + md->homenr - fr->n_tpi, &Vlr); } PRINT_SEPDVDL("PME mesh", Vlr, dvdl); } break; case eelEWALD: Vlr = do_ewald(fplog, FALSE, ir, x, fr->f_novirsum, md->chargeA, md->chargeB, box_size, cr, md->homenr, fr->vir_el_recip, fr->ewaldcoeff, lambda[efptCOUL], &dvdl, fr->ewald_table); PRINT_SEPDVDL("Ewald long-range", Vlr, dvdl); break; default: gmx_fatal(FARGS, "No such electrostatics method implemented %s", eel_names[fr->eeltype]); } if (status != 0) { gmx_fatal(FARGS, "Error %d in long range electrostatics routine %s", status, EELTYPE(fr->eeltype)); } /* Note that with separate PME nodes we get the real energies later */ enerd->dvdl_lin[efptCOUL] += dvdl; enerd->term[F_COUL_RECIP] = Vlr + Vcorr; if (debug) { fprintf(debug, "Vlr = %g, Vcorr = %g, Vlr_corr = %g\n", Vlr, Vcorr, enerd->term[F_COUL_RECIP]); pr_rvecs(debug, 0, "vir_el_recip after corr", fr->vir_el_recip, DIM); pr_rvecs(debug, 0, "fshift after LR Corrections", fr->fshift, SHIFTS); } } else { if (EEL_RF(fr->eeltype)) { /* With the Verlet scheme exclusion forces are calculated * in the non-bonded kernel. */ if (ir->cutoff_scheme != ecutsVERLET && fr->eeltype != eelRF_NEC) { dvdl = 0; enerd->term[F_RF_EXCL] = RF_excl_correction(fplog, fr, graph, md, excl, x, f, fr->fshift, &pbc, lambda[efptCOUL], &dvdl); } enerd->dvdl_lin[efptCOUL] += dvdl; PRINT_SEPDVDL("RF exclusion correction", enerd->term[F_RF_EXCL], dvdl); } } where(); debug_gmx(); if (debug) { print_nrnb(debug, nrnb); } debug_gmx(); #ifdef GMX_MPI if (TAKETIME) { t2 = MPI_Wtime(); MPI_Barrier(cr->mpi_comm_mygroup); t3 = MPI_Wtime(); fr->t_wait += t3-t2; if (fr->timesteps == 11) { fprintf(stderr, "* PP load balancing info: node %d, step %s, rel wait time=%3.0f%% , load string value: %7.2f\n", cr->nodeid, gmx_step_str(fr->timesteps, buf), 100*fr->t_wait/(fr->t_wait+fr->t_fnbf), (fr->t_fnbf+fr->t_wait)/fr->t_fnbf); } fr->timesteps++; } #endif if (debug) { pr_rvecs(debug, 0, "fshift after bondeds", fr->fshift, SHIFTS); } GMX_MPE_LOG(ev_force_finish); }
int gmx_pmeonly(struct gmx_pme_t *pme, t_commrec *cr, t_nrnb *mynrnb, gmx_wallcycle_t wcycle, gmx_walltime_accounting_t walltime_accounting, real ewaldcoeff_q, real ewaldcoeff_lj, t_inputrec *ir) { int npmedata; struct gmx_pme_t **pmedata; gmx_pme_pp_t pme_pp; int ret; int natoms; matrix box; rvec *x_pp = NULL, *f_pp = NULL; real *chargeA = NULL, *chargeB = NULL; real *c6A = NULL, *c6B = NULL; real *sigmaA = NULL, *sigmaB = NULL; real lambda_q = 0; real lambda_lj = 0; int maxshift_x = 0, maxshift_y = 0; real energy_q, energy_lj, dvdlambda_q, dvdlambda_lj; matrix vir_q, vir_lj; float cycles; int count; gmx_bool bEnerVir; int pme_flags; gmx_int64_t step, step_rel; ivec grid_switch; /* This data will only use with PME tuning, i.e. switching PME grids */ npmedata = 1; snew(pmedata, npmedata); pmedata[0] = pme; pme_pp = gmx_pme_pp_init(cr); init_nrnb(mynrnb); count = 0; do /****** this is a quasi-loop over time steps! */ { /* The reason for having a loop here is PME grid tuning/switching */ do { /* Domain decomposition */ ret = gmx_pme_recv_coeffs_coords(pme_pp, &natoms, &chargeA, &chargeB, &c6A, &c6B, &sigmaA, &sigmaB, box, &x_pp, &f_pp, &maxshift_x, &maxshift_y, &pme->bFEP_q, &pme->bFEP_lj, &lambda_q, &lambda_lj, &bEnerVir, &pme_flags, &step, grid_switch, &ewaldcoeff_q, &ewaldcoeff_lj); if (ret == pmerecvqxSWITCHGRID) { /* Switch the PME grid to grid_switch */ gmx_pmeonly_switch(&npmedata, &pmedata, grid_switch, cr, ir, &pme); } if (ret == pmerecvqxRESETCOUNTERS) { /* Reset the cycle and flop counters */ reset_pmeonly_counters(wcycle, walltime_accounting, mynrnb, ir, step); } } while (ret == pmerecvqxSWITCHGRID || ret == pmerecvqxRESETCOUNTERS); if (ret == pmerecvqxFINISH) { /* We should stop: break out of the loop */ break; } step_rel = step - ir->init_step; if (count == 0) { wallcycle_start(wcycle, ewcRUN); walltime_accounting_start(walltime_accounting); } wallcycle_start(wcycle, ewcPMEMESH); dvdlambda_q = 0; dvdlambda_lj = 0; clear_mat(vir_q); clear_mat(vir_lj); gmx_pme_do(pme, 0, natoms, x_pp, f_pp, chargeA, chargeB, c6A, c6B, sigmaA, sigmaB, box, cr, maxshift_x, maxshift_y, mynrnb, wcycle, vir_q, ewaldcoeff_q, vir_lj, ewaldcoeff_lj, &energy_q, &energy_lj, lambda_q, lambda_lj, &dvdlambda_q, &dvdlambda_lj, pme_flags | GMX_PME_DO_ALL_F | (bEnerVir ? GMX_PME_CALC_ENER_VIR : 0)); cycles = wallcycle_stop(wcycle, ewcPMEMESH); gmx_pme_send_force_vir_ener(pme_pp, f_pp, vir_q, energy_q, vir_lj, energy_lj, dvdlambda_q, dvdlambda_lj, cycles); count++; } /***** end of quasi-loop, we stop with the break above */ while (TRUE); walltime_accounting_end(walltime_accounting); return 0; }
int gmx_pmeonly(struct gmx_pme_t *pme, const t_commrec *cr, t_nrnb *mynrnb, gmx_wallcycle *wcycle, gmx_walltime_accounting_t walltime_accounting, t_inputrec *ir, PmeRunMode runMode) { int ret; int natoms = 0; matrix box; real lambda_q = 0; real lambda_lj = 0; int maxshift_x = 0, maxshift_y = 0; real energy_q, energy_lj, dvdlambda_q, dvdlambda_lj; matrix vir_q, vir_lj; float cycles; int count; gmx_bool bEnerVir = FALSE; int64_t step; /* This data will only use with PME tuning, i.e. switching PME grids */ std::vector<gmx_pme_t *> pmedata; pmedata.push_back(pme); auto pme_pp = gmx_pme_pp_init(cr); //TODO the variable below should be queried from the task assignment info const bool useGpuForPme = (runMode == PmeRunMode::GPU) || (runMode == PmeRunMode::Mixed); if (useGpuForPme) { changePinningPolicy(&pme_pp->chargeA, pme_get_pinning_policy()); changePinningPolicy(&pme_pp->x, pme_get_pinning_policy()); } init_nrnb(mynrnb); count = 0; do /****** this is a quasi-loop over time steps! */ { /* The reason for having a loop here is PME grid tuning/switching */ do { /* Domain decomposition */ ivec newGridSize; bool atomSetChanged = false; real ewaldcoeff_q = 0, ewaldcoeff_lj = 0; ret = gmx_pme_recv_coeffs_coords(pme_pp.get(), &natoms, box, &maxshift_x, &maxshift_y, &lambda_q, &lambda_lj, &bEnerVir, &step, &newGridSize, &ewaldcoeff_q, &ewaldcoeff_lj, &atomSetChanged); if (ret == pmerecvqxSWITCHGRID) { /* Switch the PME grid to newGridSize */ pme = gmx_pmeonly_switch(&pmedata, newGridSize, ewaldcoeff_q, ewaldcoeff_lj, cr, ir); } if (atomSetChanged) { gmx_pme_reinit_atoms(pme, natoms, pme_pp->chargeA.data()); } if (ret == pmerecvqxRESETCOUNTERS) { /* Reset the cycle and flop counters */ reset_pmeonly_counters(wcycle, walltime_accounting, mynrnb, step, useGpuForPme); } } while (ret == pmerecvqxSWITCHGRID || ret == pmerecvqxRESETCOUNTERS); if (ret == pmerecvqxFINISH) { /* We should stop: break out of the loop */ break; } if (count == 0) { wallcycle_start(wcycle, ewcRUN); walltime_accounting_start_time(walltime_accounting); } wallcycle_start(wcycle, ewcPMEMESH); dvdlambda_q = 0; dvdlambda_lj = 0; clear_mat(vir_q); clear_mat(vir_lj); energy_q = 0; energy_lj = 0; // TODO Make a struct of array refs onto these per-atom fields // of pme_pp (maybe box, energy and virial, too; and likewise // from mdatoms for the other call to gmx_pme_do), so we have // fewer lines of code and less parameter passing. const int pmeFlags = GMX_PME_DO_ALL_F | (bEnerVir ? GMX_PME_CALC_ENER_VIR : 0); gmx::ArrayRef<const gmx::RVec> forces; if (useGpuForPme) { const bool boxChanged = false; //TODO this should be set properly by gmx_pme_recv_coeffs_coords, // or maybe use inputrecDynamicBox(ir), at the very least - change this when this codepath is tested! pme_gpu_prepare_computation(pme, boxChanged, box, wcycle, pmeFlags); pme_gpu_launch_spread(pme, pme_pp->x.rvec_array(), wcycle); pme_gpu_launch_complex_transforms(pme, wcycle); pme_gpu_launch_gather(pme, wcycle, PmeForceOutputHandling::Set); pme_gpu_wait_finish_task(pme, wcycle, &forces, vir_q, &energy_q); pme_gpu_reinit_computation(pme, wcycle); } else { gmx_pme_do(pme, 0, natoms, pme_pp->x.rvec_array(), as_rvec_array(pme_pp->f.data()), pme_pp->chargeA.data(), pme_pp->chargeB.data(), pme_pp->sqrt_c6A.data(), pme_pp->sqrt_c6B.data(), pme_pp->sigmaA.data(), pme_pp->sigmaB.data(), box, cr, maxshift_x, maxshift_y, mynrnb, wcycle, vir_q, vir_lj, &energy_q, &energy_lj, lambda_q, lambda_lj, &dvdlambda_q, &dvdlambda_lj, pmeFlags); forces = pme_pp->f; } cycles = wallcycle_stop(wcycle, ewcPMEMESH); gmx_pme_send_force_vir_ener(pme_pp.get(), as_rvec_array(forces.data()), vir_q, energy_q, vir_lj, energy_lj, dvdlambda_q, dvdlambda_lj, cycles); count++; } /***** end of quasi-loop, we stop with the break above */ while (TRUE); walltime_accounting_end_time(walltime_accounting); return 0; }
double do_tpi(FILE *fplog, t_commrec *cr, int nfile, const t_filenm fnm[], const output_env_t oenv, gmx_bool bVerbose, gmx_bool gmx_unused bCompact, int gmx_unused nstglobalcomm, gmx_vsite_t gmx_unused *vsite, gmx_constr_t gmx_unused constr, int gmx_unused stepout, t_inputrec *inputrec, gmx_mtop_t *top_global, t_fcdata *fcd, t_state *state, t_mdatoms *mdatoms, t_nrnb *nrnb, gmx_wallcycle_t wcycle, gmx_edsam_t gmx_unused ed, t_forcerec *fr, int gmx_unused repl_ex_nst, int gmx_unused repl_ex_nex, int gmx_unused repl_ex_seed, gmx_membed_t gmx_unused membed, real gmx_unused cpt_period, real gmx_unused max_hours, const char gmx_unused *deviceOptions, int gmx_unused imdport, unsigned long gmx_unused Flags, gmx_walltime_accounting_t walltime_accounting) { const char *TPI = "Test Particle Insertion"; gmx_localtop_t *top; gmx_groups_t *groups; gmx_enerdata_t *enerd; rvec *f; real lambda, t, temp, beta, drmax, epot; double embU, sum_embU, *sum_UgembU, V, V_all, VembU_all; t_trxstatus *status; t_trxframe rerun_fr; gmx_bool bDispCorr, bCharge, bRFExcl, bNotLastFrame, bStateChanged, bNS; tensor force_vir, shake_vir, vir, pres; int cg_tp, a_tp0, a_tp1, ngid, gid_tp, nener, e; rvec *x_mol; rvec mu_tot, x_init, dx, x_tp; int nnodes, frame; gmx_int64_t frame_step_prev, frame_step; gmx_int64_t nsteps, stepblocksize = 0, step; gmx_int64_t rnd_count_stride, rnd_count; gmx_int64_t seed; double rnd[4]; int i, start, end; FILE *fp_tpi = NULL; char *ptr, *dump_pdb, **leg, str[STRLEN], str2[STRLEN]; double dbl, dump_ener; gmx_bool bCavity; int nat_cavity = 0, d; real *mass_cavity = NULL, mass_tot; int nbin; double invbinw, *bin, refvolshift, logV, bUlogV; real dvdl, prescorr, enercorr, dvdlcorr; gmx_bool bEnergyOutOfBounds; const char *tpid_leg[2] = {"direct", "reweighted"}; /* Since there is no upper limit to the insertion energies, * we need to set an upper limit for the distribution output. */ real bU_bin_limit = 50; real bU_logV_bin_limit = bU_bin_limit + 10; nnodes = cr->nnodes; top = gmx_mtop_generate_local_top(top_global, inputrec); groups = &top_global->groups; bCavity = (inputrec->eI == eiTPIC); if (bCavity) { ptr = getenv("GMX_TPIC_MASSES"); if (ptr == NULL) { nat_cavity = 1; } else { /* Read (multiple) masses from env var GMX_TPIC_MASSES, * The center of mass of the last atoms is then used for TPIC. */ nat_cavity = 0; while (sscanf(ptr, "%lf%n", &dbl, &i) > 0) { srenew(mass_cavity, nat_cavity+1); mass_cavity[nat_cavity] = dbl; fprintf(fplog, "mass[%d] = %f\n", nat_cavity+1, mass_cavity[nat_cavity]); nat_cavity++; ptr += i; } if (nat_cavity == 0) { gmx_fatal(FARGS, "Found %d masses in GMX_TPIC_MASSES", nat_cavity); } } } /* init_em(fplog,TPI,inputrec,&lambda,nrnb,mu_tot, state->box,fr,mdatoms,top,cr,nfile,fnm,NULL,NULL);*/ /* We never need full pbc for TPI */ fr->ePBC = epbcXYZ; /* Determine the temperature for the Boltzmann weighting */ temp = inputrec->opts.ref_t[0]; if (fplog) { for (i = 1; (i < inputrec->opts.ngtc); i++) { if (inputrec->opts.ref_t[i] != temp) { fprintf(fplog, "\nWARNING: The temperatures of the different temperature coupling groups are not identical\n\n"); fprintf(stderr, "\nWARNING: The temperatures of the different temperature coupling groups are not identical\n\n"); } } fprintf(fplog, "\n The temperature for test particle insertion is %.3f K\n\n", temp); } beta = 1.0/(BOLTZ*temp); /* Number of insertions per frame */ nsteps = inputrec->nsteps; /* Use the same neighborlist with more insertions points * in a sphere of radius drmax around the initial point */ /* This should be a proper mdp parameter */ drmax = inputrec->rtpi; /* An environment variable can be set to dump all configurations * to pdb with an insertion energy <= this value. */ dump_pdb = getenv("GMX_TPI_DUMP"); dump_ener = 0; if (dump_pdb) { sscanf(dump_pdb, "%lf", &dump_ener); } atoms2md(top_global, inputrec, 0, NULL, top_global->natoms, mdatoms); update_mdatoms(mdatoms, inputrec->fepvals->init_lambda); snew(enerd, 1); init_enerdata(groups->grps[egcENER].nr, inputrec->fepvals->n_lambda, enerd); snew(f, top_global->natoms); /* Print to log file */ walltime_accounting_start(walltime_accounting); wallcycle_start(wcycle, ewcRUN); print_start(fplog, cr, walltime_accounting, "Test Particle Insertion"); /* The last charge group is the group to be inserted */ cg_tp = top->cgs.nr - 1; a_tp0 = top->cgs.index[cg_tp]; a_tp1 = top->cgs.index[cg_tp+1]; if (debug) { fprintf(debug, "TPI cg %d, atoms %d-%d\n", cg_tp, a_tp0, a_tp1); } if (a_tp1 - a_tp0 > 1 && (inputrec->rlist < inputrec->rcoulomb || inputrec->rlist < inputrec->rvdw)) { gmx_fatal(FARGS, "Can not do TPI for multi-atom molecule with a twin-range cut-off"); } snew(x_mol, a_tp1-a_tp0); bDispCorr = (inputrec->eDispCorr != edispcNO); bCharge = FALSE; for (i = a_tp0; i < a_tp1; i++) { /* Copy the coordinates of the molecule to be insterted */ copy_rvec(state->x[i], x_mol[i-a_tp0]); /* Check if we need to print electrostatic energies */ bCharge |= (mdatoms->chargeA[i] != 0 || (mdatoms->chargeB && mdatoms->chargeB[i] != 0)); } bRFExcl = (bCharge && EEL_RF(fr->eeltype) && fr->eeltype != eelRF_NEC); calc_cgcm(fplog, cg_tp, cg_tp+1, &(top->cgs), state->x, fr->cg_cm); if (bCavity) { if (norm(fr->cg_cm[cg_tp]) > 0.5*inputrec->rlist && fplog) { fprintf(fplog, "WARNING: Your TPI molecule is not centered at 0,0,0\n"); fprintf(stderr, "WARNING: Your TPI molecule is not centered at 0,0,0\n"); } } else { /* Center the molecule to be inserted at zero */ for (i = 0; i < a_tp1-a_tp0; i++) { rvec_dec(x_mol[i], fr->cg_cm[cg_tp]); } } if (fplog) { fprintf(fplog, "\nWill insert %d atoms %s partial charges\n", a_tp1-a_tp0, bCharge ? "with" : "without"); fprintf(fplog, "\nWill insert %d times in each frame of %s\n", (int)nsteps, opt2fn("-rerun", nfile, fnm)); } if (!bCavity) { if (inputrec->nstlist > 1) { if (drmax == 0 && a_tp1-a_tp0 == 1) { gmx_fatal(FARGS, "Re-using the neighborlist %d times for insertions of a single atom in a sphere of radius %f does not make sense", inputrec->nstlist, drmax); } if (fplog) { fprintf(fplog, "Will use the same neighborlist for %d insertions in a sphere of radius %f\n", inputrec->nstlist, drmax); } } } else { if (fplog) { fprintf(fplog, "Will insert randomly in a sphere of radius %f around the center of the cavity\n", drmax); } } ngid = groups->grps[egcENER].nr; gid_tp = GET_CGINFO_GID(fr->cginfo[cg_tp]); nener = 1 + ngid; if (bDispCorr) { nener += 1; } if (bCharge) { nener += ngid; if (bRFExcl) { nener += 1; } if (EEL_FULL(fr->eeltype)) { nener += 1; } } snew(sum_UgembU, nener); /* Copy the random seed set by the user */ seed = inputrec->ld_seed; /* We use the frame step number as one random counter. * The second counter use the insertion (step) count. But we * need multiple random numbers per insertion. This number is * not fixed, since we generate random locations in a sphere * by putting locations in a cube and some of these fail. * A count of 20 is already extremely unlikely, so 10000 is * a safe margin for random numbers per insertion. */ rnd_count_stride = 10000; if (MASTER(cr)) { fp_tpi = xvgropen(opt2fn("-tpi", nfile, fnm), "TPI energies", "Time (ps)", "(kJ mol\\S-1\\N) / (nm\\S3\\N)", oenv); xvgr_subtitle(fp_tpi, "f. are averages over one frame", oenv); snew(leg, 4+nener); e = 0; sprintf(str, "-kT log(<Ve\\S-\\betaU\\N>/<V>)"); leg[e++] = strdup(str); sprintf(str, "f. -kT log<e\\S-\\betaU\\N>"); leg[e++] = strdup(str); sprintf(str, "f. <e\\S-\\betaU\\N>"); leg[e++] = strdup(str); sprintf(str, "f. V"); leg[e++] = strdup(str); sprintf(str, "f. <Ue\\S-\\betaU\\N>"); leg[e++] = strdup(str); for (i = 0; i < ngid; i++) { sprintf(str, "f. <U\\sVdW %s\\Ne\\S-\\betaU\\N>", *(groups->grpname[groups->grps[egcENER].nm_ind[i]])); leg[e++] = strdup(str); } if (bDispCorr) { sprintf(str, "f. <U\\sdisp c\\Ne\\S-\\betaU\\N>"); leg[e++] = strdup(str); } if (bCharge) { for (i = 0; i < ngid; i++) { sprintf(str, "f. <U\\sCoul %s\\Ne\\S-\\betaU\\N>", *(groups->grpname[groups->grps[egcENER].nm_ind[i]])); leg[e++] = strdup(str); } if (bRFExcl) { sprintf(str, "f. <U\\sRF excl\\Ne\\S-\\betaU\\N>"); leg[e++] = strdup(str); } if (EEL_FULL(fr->eeltype)) { sprintf(str, "f. <U\\sCoul recip\\Ne\\S-\\betaU\\N>"); leg[e++] = strdup(str); } } xvgr_legend(fp_tpi, 4+nener, (const char**)leg, oenv); for (i = 0; i < 4+nener; i++) { sfree(leg[i]); } sfree(leg); } clear_rvec(x_init); V_all = 0; VembU_all = 0; invbinw = 10; nbin = 10; snew(bin, nbin); /* Avoid frame step numbers <= -1 */ frame_step_prev = -1; bNotLastFrame = read_first_frame(oenv, &status, opt2fn("-rerun", nfile, fnm), &rerun_fr, TRX_NEED_X); frame = 0; if (rerun_fr.natoms - (bCavity ? nat_cavity : 0) != mdatoms->nr - (a_tp1 - a_tp0)) { gmx_fatal(FARGS, "Number of atoms in trajectory (%d)%s " "is not equal the number in the run input file (%d) " "minus the number of atoms to insert (%d)\n", rerun_fr.natoms, bCavity ? " minus one" : "", mdatoms->nr, a_tp1-a_tp0); } refvolshift = log(det(rerun_fr.box)); switch (inputrec->eI) { case eiTPI: stepblocksize = inputrec->nstlist; break; case eiTPIC: stepblocksize = 1; break; default: gmx_fatal(FARGS, "Unknown integrator %s", ei_names[inputrec->eI]); } #ifdef GMX_SIMD /* Make sure we don't detect SIMD overflow generated before this point */ gmx_simd_check_and_reset_overflow(); #endif while (bNotLastFrame) { frame_step = rerun_fr.step; if (frame_step <= frame_step_prev) { /* We don't have step number in the trajectory file, * or we have constant or decreasing step numbers. * Ensure we have increasing step numbers, since we use * the step numbers as a counter for random numbers. */ frame_step = frame_step_prev + 1; } frame_step_prev = frame_step; lambda = rerun_fr.lambda; t = rerun_fr.time; sum_embU = 0; for (e = 0; e < nener; e++) { sum_UgembU[e] = 0; } /* Copy the coordinates from the input trajectory */ for (i = 0; i < rerun_fr.natoms; i++) { copy_rvec(rerun_fr.x[i], state->x[i]); } copy_mat(rerun_fr.box, state->box); V = det(state->box); logV = log(V); bStateChanged = TRUE; bNS = TRUE; step = cr->nodeid*stepblocksize; while (step < nsteps) { /* Initialize the second counter for random numbers using * the insertion step index. This ensures that we get * the same random numbers independently of how many * MPI ranks we use. Also for the same seed, we get * the same initial random sequence for different nsteps. */ rnd_count = step*rnd_count_stride; if (!bCavity) { /* Random insertion in the whole volume */ bNS = (step % inputrec->nstlist == 0); if (bNS) { /* Generate a random position in the box */ gmx_rng_cycle_2uniform(frame_step, rnd_count++, seed, RND_SEED_TPI, rnd); gmx_rng_cycle_2uniform(frame_step, rnd_count++, seed, RND_SEED_TPI, rnd+2); for (d = 0; d < DIM; d++) { x_init[d] = rnd[d]*state->box[d][d]; } } if (inputrec->nstlist == 1) { copy_rvec(x_init, x_tp); } else { /* Generate coordinates within |dx|=drmax of x_init */ do { gmx_rng_cycle_2uniform(frame_step, rnd_count++, seed, RND_SEED_TPI, rnd); gmx_rng_cycle_2uniform(frame_step, rnd_count++, seed, RND_SEED_TPI, rnd+2); for (d = 0; d < DIM; d++) { dx[d] = (2*rnd[d] - 1)*drmax; } } while (norm2(dx) > drmax*drmax); rvec_add(x_init, dx, x_tp); } } else { /* Random insertion around a cavity location * given by the last coordinate of the trajectory. */ if (step == 0) { if (nat_cavity == 1) { /* Copy the location of the cavity */ copy_rvec(rerun_fr.x[rerun_fr.natoms-1], x_init); } else { /* Determine the center of mass of the last molecule */ clear_rvec(x_init); mass_tot = 0; for (i = 0; i < nat_cavity; i++) { for (d = 0; d < DIM; d++) { x_init[d] += mass_cavity[i]*rerun_fr.x[rerun_fr.natoms-nat_cavity+i][d]; } mass_tot += mass_cavity[i]; } for (d = 0; d < DIM; d++) { x_init[d] /= mass_tot; } } } /* Generate coordinates within |dx|=drmax of x_init */ do { gmx_rng_cycle_2uniform(frame_step, rnd_count++, seed, RND_SEED_TPI, rnd); gmx_rng_cycle_2uniform(frame_step, rnd_count++, seed, RND_SEED_TPI, rnd+2); for (d = 0; d < DIM; d++) { dx[d] = (2*rnd[d] - 1)*drmax; } } while (norm2(dx) > drmax*drmax); rvec_add(x_init, dx, x_tp); } if (a_tp1 - a_tp0 == 1) { /* Insert a single atom, just copy the insertion location */ copy_rvec(x_tp, state->x[a_tp0]); } else { /* Copy the coordinates from the top file */ for (i = a_tp0; i < a_tp1; i++) { copy_rvec(x_mol[i-a_tp0], state->x[i]); } /* Rotate the molecule randomly */ gmx_rng_cycle_2uniform(frame_step, rnd_count++, seed, RND_SEED_TPI, rnd); gmx_rng_cycle_2uniform(frame_step, rnd_count++, seed, RND_SEED_TPI, rnd+2); rotate_conf(a_tp1-a_tp0, state->x+a_tp0, NULL, 2*M_PI*rnd[0], 2*M_PI*rnd[1], 2*M_PI*rnd[2]); /* Shift to the insertion location */ for (i = a_tp0; i < a_tp1; i++) { rvec_inc(state->x[i], x_tp); } } /* Clear some matrix variables */ clear_mat(force_vir); clear_mat(shake_vir); clear_mat(vir); clear_mat(pres); /* Set the charge group center of mass of the test particle */ copy_rvec(x_init, fr->cg_cm[top->cgs.nr-1]); /* Calc energy (no forces) on new positions. * Since we only need the intermolecular energy * and the RF exclusion terms of the inserted molecule occur * within a single charge group we can pass NULL for the graph. * This also avoids shifts that would move charge groups * out of the box. * * Some checks above ensure than we can not have * twin-range interactions together with nstlist > 1, * therefore we do not need to remember the LR energies. */ /* Make do_force do a single node force calculation */ cr->nnodes = 1; do_force(fplog, cr, inputrec, step, nrnb, wcycle, top, &top_global->groups, state->box, state->x, &state->hist, f, force_vir, mdatoms, enerd, fcd, state->lambda, NULL, fr, NULL, mu_tot, t, NULL, NULL, FALSE, GMX_FORCE_NONBONDED | GMX_FORCE_ENERGY | (bNS ? GMX_FORCE_DYNAMICBOX | GMX_FORCE_NS | GMX_FORCE_DO_LR : 0) | (bStateChanged ? GMX_FORCE_STATECHANGED : 0)); cr->nnodes = nnodes; bStateChanged = FALSE; bNS = FALSE; /* Calculate long range corrections to pressure and energy */ calc_dispcorr(fplog, inputrec, fr, step, top_global->natoms, state->box, lambda, pres, vir, &prescorr, &enercorr, &dvdlcorr); /* figure out how to rearrange the next 4 lines MRS 8/4/2009 */ enerd->term[F_DISPCORR] = enercorr; enerd->term[F_EPOT] += enercorr; enerd->term[F_PRES] += prescorr; enerd->term[F_DVDL_VDW] += dvdlcorr; epot = enerd->term[F_EPOT]; bEnergyOutOfBounds = FALSE; #ifdef GMX_SIMD_X86_SSE2_OR_HIGHER /* With SSE the energy can overflow, check for this */ if (gmx_mm_check_and_reset_overflow()) { if (debug) { fprintf(debug, "Found an SSE overflow, assuming the energy is out of bounds\n"); } bEnergyOutOfBounds = TRUE; } #endif /* If the compiler doesn't optimize this check away * we catch the NAN energies. * The epot>GMX_REAL_MAX check catches inf values, * which should nicely result in embU=0 through the exp below, * but it does not hurt to check anyhow. */ /* Non-bonded Interaction usually diverge at r=0. * With tabulated interaction functions the first few entries * should be capped in a consistent fashion between * repulsion, dispersion and Coulomb to avoid accidental * negative values in the total energy. * The table generation code in tables.c does this. * With user tbales the user should take care of this. */ if (epot != epot || epot > GMX_REAL_MAX) { bEnergyOutOfBounds = TRUE; } if (bEnergyOutOfBounds) { if (debug) { fprintf(debug, "\n time %.3f, step %d: non-finite energy %f, using exp(-bU)=0\n", t, (int)step, epot); } embU = 0; } else { embU = exp(-beta*epot); sum_embU += embU; /* Determine the weighted energy contributions of each energy group */ e = 0; sum_UgembU[e++] += epot*embU; if (fr->bBHAM) { for (i = 0; i < ngid; i++) { sum_UgembU[e++] += (enerd->grpp.ener[egBHAMSR][GID(i, gid_tp, ngid)] + enerd->grpp.ener[egBHAMLR][GID(i, gid_tp, ngid)])*embU; } } else { for (i = 0; i < ngid; i++) { sum_UgembU[e++] += (enerd->grpp.ener[egLJSR][GID(i, gid_tp, ngid)] + enerd->grpp.ener[egLJLR][GID(i, gid_tp, ngid)])*embU; } } if (bDispCorr) { sum_UgembU[e++] += enerd->term[F_DISPCORR]*embU; } if (bCharge) { for (i = 0; i < ngid; i++) { sum_UgembU[e++] += (enerd->grpp.ener[egCOULSR][GID(i, gid_tp, ngid)] + enerd->grpp.ener[egCOULLR][GID(i, gid_tp, ngid)])*embU; } if (bRFExcl) { sum_UgembU[e++] += enerd->term[F_RF_EXCL]*embU; } if (EEL_FULL(fr->eeltype)) { sum_UgembU[e++] += enerd->term[F_COUL_RECIP]*embU; } } } if (embU == 0 || beta*epot > bU_bin_limit) { bin[0]++; } else { i = (int)((bU_logV_bin_limit - (beta*epot - logV + refvolshift))*invbinw + 0.5); if (i < 0) { i = 0; } if (i >= nbin) { realloc_bins(&bin, &nbin, i+10); } bin[i]++; } if (debug) { fprintf(debug, "TPI %7d %12.5e %12.5f %12.5f %12.5f\n", (int)step, epot, x_tp[XX], x_tp[YY], x_tp[ZZ]); } if (dump_pdb && epot <= dump_ener) { sprintf(str, "t%g_step%d.pdb", t, (int)step); sprintf(str2, "t: %f step %d ener: %f", t, (int)step, epot); write_sto_conf_mtop(str, str2, top_global, state->x, state->v, inputrec->ePBC, state->box); } step++; if ((step/stepblocksize) % cr->nnodes != cr->nodeid) { /* Skip all steps assigned to the other MPI ranks */ step += (cr->nnodes - 1)*stepblocksize; } } if (PAR(cr)) { /* When running in parallel sum the energies over the processes */ gmx_sumd(1, &sum_embU, cr); gmx_sumd(nener, sum_UgembU, cr); } frame++; V_all += V; VembU_all += V*sum_embU/nsteps; if (fp_tpi) { if (bVerbose || frame%10 == 0 || frame < 10) { fprintf(stderr, "mu %10.3e <mu> %10.3e\n", -log(sum_embU/nsteps)/beta, -log(VembU_all/V_all)/beta); } fprintf(fp_tpi, "%10.3f %12.5e %12.5e %12.5e %12.5e", t, VembU_all == 0 ? 20/beta : -log(VembU_all/V_all)/beta, sum_embU == 0 ? 20/beta : -log(sum_embU/nsteps)/beta, sum_embU/nsteps, V); for (e = 0; e < nener; e++) { fprintf(fp_tpi, " %12.5e", sum_UgembU[e]/nsteps); } fprintf(fp_tpi, "\n"); fflush(fp_tpi); } bNotLastFrame = read_next_frame(oenv, status, &rerun_fr); } /* End of the loop */ walltime_accounting_end(walltime_accounting); close_trj(status); if (fp_tpi != NULL) { gmx_fio_fclose(fp_tpi); } if (fplog != NULL) { fprintf(fplog, "\n"); fprintf(fplog, " <V> = %12.5e nm^3\n", V_all/frame); fprintf(fplog, " <mu> = %12.5e kJ/mol\n", -log(VembU_all/V_all)/beta); } /* Write the Boltzmann factor histogram */ if (PAR(cr)) { /* When running in parallel sum the bins over the processes */ i = nbin; global_max(cr, &i); realloc_bins(&bin, &nbin, i); gmx_sumd(nbin, bin, cr); } if (MASTER(cr)) { fp_tpi = xvgropen(opt2fn("-tpid", nfile, fnm), "TPI energy distribution", "\\betaU - log(V/<V>)", "count", oenv); sprintf(str, "number \\betaU > %g: %9.3e", bU_bin_limit, bin[0]); xvgr_subtitle(fp_tpi, str, oenv); xvgr_legend(fp_tpi, 2, (const char **)tpid_leg, oenv); for (i = nbin-1; i > 0; i--) { bUlogV = -i/invbinw + bU_logV_bin_limit - refvolshift + log(V_all/frame); fprintf(fp_tpi, "%6.2f %10d %12.5e\n", bUlogV, (int)(bin[i]+0.5), bin[i]*exp(-bUlogV)*V_all/VembU_all); } gmx_fio_fclose(fp_tpi); } sfree(bin); sfree(sum_UgembU); walltime_accounting_set_nsteps_done(walltime_accounting, frame*inputrec->nsteps); return 0; }
double do_md_openmm(FILE *fplog,t_commrec *cr,int nfile,const t_filenm fnm[], const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact, int nstglobalcomm, gmx_vsite_t *vsite,gmx_constr_t constr, int stepout,t_inputrec *ir, gmx_mtop_t *top_global, t_fcdata *fcd, t_state *state_global, t_mdatoms *mdatoms, t_nrnb *nrnb,gmx_wallcycle_t wcycle, gmx_edsam_t ed,t_forcerec *fr, int repl_ex_nst,int repl_ex_seed, real cpt_period,real max_hours, const char *deviceOptions, unsigned long Flags, gmx_runtime_t *runtime) { gmx_mdoutf_t *outf; gmx_large_int_t step,step_rel; double run_time; double t,t0,lam0; gmx_bool bSimAnn, bFirstStep,bStateFromTPX,bLastStep,bStartingFromCpt; gmx_bool bInitStep=TRUE; gmx_bool do_ene,do_log, do_verbose, bX,bV,bF,bCPT; tensor force_vir,shake_vir,total_vir,pres; int i,m; int mdof_flags; rvec mu_tot; t_vcm *vcm; int nchkpt=1; gmx_localtop_t *top; t_mdebin *mdebin=NULL; t_state *state=NULL; rvec *f_global=NULL; int n_xtc=-1; rvec *x_xtc=NULL; gmx_enerdata_t *enerd; rvec *f=NULL; gmx_global_stat_t gstat; gmx_update_t upd=NULL; t_graph *graph=NULL; globsig_t gs; gmx_groups_t *groups; gmx_ekindata_t *ekind, *ekind_save; gmx_bool bAppend; int a0,a1; matrix lastbox; real reset_counters=0,reset_counters_now=0; char sbuf[STEPSTRSIZE],sbuf2[STEPSTRSIZE]; int handled_stop_condition=gmx_stop_cond_none; const char *ommOptions = NULL; void *openmmData; bAppend = (Flags & MD_APPENDFILES); check_ir_old_tpx_versions(cr,fplog,ir,top_global); groups = &top_global->groups; /* Initial values */ init_md(fplog,cr,ir,oenv,&t,&t0,&state_global->lambda,&lam0, nrnb,top_global,&upd, nfile,fnm,&outf,&mdebin, force_vir,shake_vir,mu_tot,&bSimAnn,&vcm,state_global,Flags); clear_mat(total_vir); clear_mat(pres); /* Energy terms and groups */ snew(enerd,1); init_enerdata(top_global->groups.grps[egcENER].nr,ir->n_flambda,enerd); snew(f,top_global->natoms); /* Kinetic energy data */ snew(ekind,1); init_ekindata(fplog,top_global,&(ir->opts),ekind); /* needed for iteration of constraints */ snew(ekind_save,1); init_ekindata(fplog,top_global,&(ir->opts),ekind_save); /* Copy the cos acceleration to the groups struct */ ekind->cosacc.cos_accel = ir->cos_accel; gstat = global_stat_init(ir); debug_gmx(); { double io = compute_io(ir,top_global->natoms,groups,mdebin->ebin->nener,1); if ((io > 2000) && MASTER(cr)) fprintf(stderr, "\nWARNING: This run will generate roughly %.0f Mb of data\n\n", io); } top = gmx_mtop_generate_local_top(top_global,ir); a0 = 0; a1 = top_global->natoms; state = partdec_init_local_state(cr,state_global); f_global = f; atoms2md(top_global,ir,0,NULL,a0,a1-a0,mdatoms); if (vsite) { set_vsite_top(vsite,top,mdatoms,cr); } if (ir->ePBC != epbcNONE && !ir->bPeriodicMols) { graph = mk_graph(fplog,&(top->idef),0,top_global->natoms,FALSE,FALSE); } update_mdatoms(mdatoms,state->lambda); if (deviceOptions[0]=='\0') { /* empty options, which should default to OpenMM in this build */ ommOptions=deviceOptions; } else { if (gmx_strncasecmp(deviceOptions,"OpenMM",6)!=0) { gmx_fatal(FARGS, "This Gromacs version currently only works with OpenMM. Use -device \"OpenMM:<options>\""); } else { ommOptions=strchr(deviceOptions,':'); if (NULL!=ommOptions) { /* Increase the pointer to skip the colon */ ommOptions++; } } } openmmData = openmm_init(fplog, ommOptions, ir, top_global, top, mdatoms, fr, state); please_cite(fplog,"Friedrichs2009"); if (MASTER(cr)) { /* Update mdebin with energy history if appending to output files */ if ( Flags & MD_APPENDFILES ) { restore_energyhistory_from_state(mdebin,&state_global->enerhist); } /* Set the initial energy history in state to zero by updating once */ update_energyhistory(&state_global->enerhist,mdebin); } if (constr) { set_constraints(constr,top,ir,mdatoms,cr); } if (!ir->bContinuation) { if (mdatoms->cFREEZE && (state->flags & (1<<estV))) { /* Set the velocities of frozen particles to zero */ for (i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) { for (m=0; m<DIM; m++) { if (ir->opts.nFreeze[mdatoms->cFREEZE[i]][m]) { state->v[i][m] = 0; } } } } if (constr) { /* Constrain the initial coordinates and velocities */ do_constrain_first(fplog,constr,ir,mdatoms,state,f, graph,cr,nrnb,fr,top,shake_vir); } if (vsite) { /* Construct the virtual sites for the initial configuration */ construct_vsites(fplog,vsite,state->x,nrnb,ir->delta_t,NULL, top->idef.iparams,top->idef.il, fr->ePBC,fr->bMolPBC,graph,cr,state->box); } } debug_gmx(); if (MASTER(cr)) { char tbuf[20]; fprintf(fplog,"Initial temperature: %g K\n",enerd->term[F_TEMP]); fprintf(stderr,"starting mdrun '%s'\n", *(top_global->name)); if (ir->nsteps >= 0) { sprintf(tbuf,"%8.1f",(ir->init_step+ir->nsteps)*ir->delta_t); } else { sprintf(tbuf,"%s","infinite"); } if (ir->init_step > 0) { fprintf(stderr,"%s steps, %s ps (continuing from step %s, %8.1f ps).\n", gmx_step_str(ir->init_step+ir->nsteps,sbuf),tbuf, gmx_step_str(ir->init_step,sbuf2), ir->init_step*ir->delta_t); } else { fprintf(stderr,"%s steps, %s ps.\n", gmx_step_str(ir->nsteps,sbuf),tbuf); } } fprintf(fplog,"\n"); /* Set and write start time */ runtime_start(runtime); print_date_and_time(fplog,cr->nodeid,"Started mdrun",runtime); wallcycle_start(wcycle,ewcRUN); if (fplog) fprintf(fplog,"\n"); /* safest point to do file checkpointing is here. More general point would be immediately before integrator call */ debug_gmx(); /*********************************************************** * * Loop over MD steps * ************************************************************/ /* loop over MD steps or if rerunMD to end of input trajectory */ bFirstStep = TRUE; /* Skip the first Nose-Hoover integration when we get the state from tpx */ bStateFromTPX = !opt2bSet("-cpi",nfile,fnm); bInitStep = bFirstStep && bStateFromTPX; bStartingFromCpt = (Flags & MD_STARTFROMCPT) && bInitStep; bLastStep = FALSE; init_global_signals(&gs,cr,ir,repl_ex_nst); step = ir->init_step; step_rel = 0; while (!bLastStep) { wallcycle_start(wcycle,ewcSTEP); GMX_MPE_LOG(ev_timestep1); bLastStep = (step_rel == ir->nsteps); t = t0 + step*ir->delta_t; if (gs.set[eglsSTOPCOND] != 0) { bLastStep = TRUE; } do_log = do_per_step(step,ir->nstlog) || bFirstStep || bLastStep; do_verbose = bVerbose && (step % stepout == 0 || bFirstStep || bLastStep); if (MASTER(cr) && do_log) { print_ebin_header(fplog,step,t,state->lambda); } clear_mat(force_vir); GMX_MPE_LOG(ev_timestep2); /* We write a checkpoint at this MD step when: * either when we signalled through gs (in OpenMM NS works different), * or at the last step (but not when we do not want confout), * but never at the first step. */ bCPT = ((gs.set[eglsCHKPT] || (bLastStep && (Flags & MD_CONFOUT))) && step > ir->init_step ); if (bCPT) { gs.set[eglsCHKPT] = 0; } /* Now we have the energies and forces corresponding to the * coordinates at time t. We must output all of this before * the update. * for RerunMD t is read from input trajectory */ GMX_MPE_LOG(ev_output_start); mdof_flags = 0; if (do_per_step(step,ir->nstxout)) { mdof_flags |= MDOF_X; } if (do_per_step(step,ir->nstvout)) { mdof_flags |= MDOF_V; } if (do_per_step(step,ir->nstfout)) { mdof_flags |= MDOF_F; } if (do_per_step(step,ir->nstxtcout)) { mdof_flags |= MDOF_XTC; } if (bCPT) { mdof_flags |= MDOF_CPT; }; do_ene = (do_per_step(step,ir->nstenergy) || bLastStep); if (mdof_flags != 0 || do_ene || do_log) { wallcycle_start(wcycle,ewcTRAJ); bF = (mdof_flags & MDOF_F); bX = (mdof_flags & (MDOF_X | MDOF_XTC | MDOF_CPT)); bV = (mdof_flags & (MDOF_V | MDOF_CPT)); openmm_copy_state(openmmData, state, &t, f, enerd, bX, bV, bF, do_ene); upd_mdebin(mdebin, FALSE,TRUE, t,mdatoms->tmass,enerd,state,lastbox, shake_vir,force_vir,total_vir,pres, ekind,mu_tot,constr); print_ebin(outf->fp_ene,do_ene,FALSE,FALSE,do_log?fplog:NULL, step,t, eprNORMAL,bCompact,mdebin,fcd,groups,&(ir->opts)); write_traj(fplog,cr,outf,mdof_flags,top_global, step,t,state,state_global,f,f_global,&n_xtc,&x_xtc); if (bCPT) { nchkpt++; bCPT = FALSE; } debug_gmx(); if (bLastStep && step_rel == ir->nsteps && (Flags & MD_CONFOUT) && MASTER(cr)) { /* x and v have been collected in write_traj, * because a checkpoint file will always be written * at the last step. */ fprintf(stderr,"\nWriting final coordinates.\n"); if (ir->ePBC != epbcNONE && !ir->bPeriodicMols) { /* Make molecules whole only for confout writing */ do_pbc_mtop(fplog,ir->ePBC,state->box,top_global,state_global->x); } write_sto_conf_mtop(ftp2fn(efSTO,nfile,fnm), *top_global->name,top_global, state_global->x,state_global->v, ir->ePBC,state->box); debug_gmx(); } wallcycle_stop(wcycle,ewcTRAJ); } GMX_MPE_LOG(ev_output_finish); /* Determine the wallclock run time up till now */ run_time = gmx_gettime() - (double)runtime->real; /* Check whether everything is still allright */ if (((int)gmx_get_stop_condition() > handled_stop_condition) #ifdef GMX_THREADS && MASTER(cr) #endif ) { /* this is just make gs.sig compatible with the hack of sending signals around by MPI_Reduce with together with other floats */ /* NOTE: this only works for serial code. For code that allows MPI nodes to propagate their condition, see kernel/md.c*/ if ( gmx_get_stop_condition() == gmx_stop_cond_next_ns ) gs.set[eglsSTOPCOND]=1; if ( gmx_get_stop_condition() == gmx_stop_cond_next ) gs.set[eglsSTOPCOND]=1; /* < 0 means stop at next step, > 0 means stop at next NS step */ if (fplog) { fprintf(fplog, "\n\nReceived the %s signal, stopping at the next %sstep\n\n", gmx_get_signal_name(), gs.sig[eglsSTOPCOND]==1 ? "NS " : ""); fflush(fplog); } fprintf(stderr, "\n\nReceived the %s signal, stopping at the next %sstep\n\n", gmx_get_signal_name(), gs.sig[eglsSTOPCOND]==1 ? "NS " : ""); fflush(stderr); handled_stop_condition=(int)gmx_get_stop_condition(); } else if (MASTER(cr) && (max_hours > 0 && run_time > max_hours*60.0*60.0*0.99) && gs.set[eglsSTOPCOND] == 0) { /* Signal to terminate the run */ gs.set[eglsSTOPCOND] = 1; if (fplog) { fprintf(fplog,"\nStep %s: Run time exceeded %.3f hours, will terminate the run\n",gmx_step_str(step,sbuf),max_hours*0.99); } fprintf(stderr, "\nStep %s: Run time exceeded %.3f hours, will terminate the run\n",gmx_step_str(step,sbuf),max_hours*0.99); } /* checkpoints */ if (MASTER(cr) && (cpt_period >= 0 && (cpt_period == 0 || run_time >= nchkpt*cpt_period*60.0)) && gs.set[eglsCHKPT] == 0) { gs.set[eglsCHKPT] = 1; } /* Time for performance */ if (((step % stepout) == 0) || bLastStep) { runtime_upd_proc(runtime); } if (do_per_step(step,ir->nstlog)) { if (fflush(fplog) != 0) { gmx_fatal(FARGS,"Cannot flush logfile - maybe you are out of quota?"); } } /* Remaining runtime */ if (MULTIMASTER(cr) && (do_verbose || gmx_got_usr_signal() )) { print_time(stderr,runtime,step,ir,cr); } bFirstStep = FALSE; bInitStep = FALSE; bStartingFromCpt = FALSE; step++; step_rel++; openmm_take_one_step(openmmData); } /* End of main MD loop */ debug_gmx(); /* Stop the time */ runtime_end(runtime); if (MASTER(cr)) { if (ir->nstcalcenergy > 0) { print_ebin(outf->fp_ene,FALSE,FALSE,FALSE,fplog,step,t, eprAVER,FALSE,mdebin,fcd,groups,&(ir->opts)); } } openmm_cleanup(fplog, openmmData); done_mdoutf(outf); debug_gmx(); runtime->nsteps_done = step_rel; return 0; }
void do_force(FILE *fplog,t_commrec *cr, t_inputrec *inputrec, int step,t_nrnb *nrnb,gmx_wallcycle_t wcycle, gmx_localtop_t *top, gmx_groups_t *groups, matrix box,rvec x[],history_t *hist, rvec f[],rvec buf[], tensor vir_force, t_mdatoms *mdatoms, gmx_enerdata_t *enerd,t_fcdata *fcd, real lambda,t_graph *graph, t_forcerec *fr,gmx_vsite_t *vsite,rvec mu_tot, real t,FILE *field,gmx_edsam_t ed, int flags) { static rvec box_size; int cg0,cg1,i,j; int start,homenr; static double mu[2*DIM]; rvec mu_tot_AB[2]; bool bSepDVDL,bStateChanged,bNS,bFillGrid,bCalcCGCM,bBS,bDoForces; matrix boxs; real e,v,dvdl; t_pbc pbc; float cycles_ppdpme,cycles_pme,cycles_force; start = mdatoms->start; homenr = mdatoms->homenr; bSepDVDL = (fr->bSepDVDL && do_per_step(step,inputrec->nstlog)); clear_mat(vir_force); if (PARTDECOMP(cr)) { pd_cg_range(cr,&cg0,&cg1); } else { cg0 = 0; if (DOMAINDECOMP(cr)) cg1 = cr->dd->ncg_tot; else cg1 = top->cgs.nr; if (fr->n_tpi > 0) cg1--; } bStateChanged = (flags & GMX_FORCE_STATECHANGED); bNS = (flags & GMX_FORCE_NS); bFillGrid = (bNS && bStateChanged); bCalcCGCM = (bFillGrid && !DOMAINDECOMP(cr)); bDoForces = (flags & GMX_FORCE_FORCES); if (bStateChanged) { update_forcerec(fplog,fr,box); /* Calculate total (local) dipole moment in a temporary common array. * This makes it possible to sum them over nodes faster. */ calc_mu(start,homenr, x,mdatoms->chargeA,mdatoms->chargeB,mdatoms->nChargePerturbed, mu,mu+DIM); } if (fr->ePBC != epbcNONE) { /* Compute shift vectors every step, * because of pressure coupling or box deformation! */ if (DYNAMIC_BOX(*inputrec) && bStateChanged) calc_shifts(box,fr->shift_vec); if (bCalcCGCM) { put_charge_groups_in_box(fplog,cg0,cg1,fr->ePBC,box, &(top->cgs),x,fr->cg_cm); inc_nrnb(nrnb,eNR_CGCM,homenr); inc_nrnb(nrnb,eNR_RESETX,cg1-cg0); } else if (EI_ENERGY_MINIMIZATION(inputrec->eI) && graph) { unshift_self(graph,box,x); } } else if (bCalcCGCM) { calc_cgcm(fplog,cg0,cg1,&(top->cgs),x,fr->cg_cm); inc_nrnb(nrnb,eNR_CGCM,homenr); } if (bCalcCGCM) { if (PAR(cr)) { move_cgcm(fplog,cr,fr->cg_cm); } if (gmx_debug_at) pr_rvecs(debug,0,"cgcm",fr->cg_cm,top->cgs.nr); } #ifdef GMX_MPI if (!(cr->duty & DUTY_PME)) { /* Send particle coordinates to the pme nodes. * Since this is only implemented for domain decomposition * and domain decomposition does not use the graph, * we do not need to worry about shifting. */ wallcycle_start(wcycle,ewcPP_PMESENDX); GMX_MPE_LOG(ev_send_coordinates_start); bBS = (inputrec->nwall == 2); if (bBS) { copy_mat(box,boxs); svmul(inputrec->wall_ewald_zfac,boxs[ZZ],boxs[ZZ]); } gmx_pme_send_x(cr,bBS ? boxs : box,x,mdatoms->nChargePerturbed,lambda); GMX_MPE_LOG(ev_send_coordinates_finish); wallcycle_stop(wcycle,ewcPP_PMESENDX); } #endif /* GMX_MPI */ /* Communicate coordinates and sum dipole if necessary */ if (PAR(cr)) { wallcycle_start(wcycle,ewcMOVEX); if (DOMAINDECOMP(cr)) { dd_move_x(cr->dd,box,x,buf); } else { move_x(fplog,cr,GMX_LEFT,GMX_RIGHT,x,nrnb); } /* When we don't need the total dipole we sum it in global_stat */ if (NEED_MUTOT(*inputrec)) gmx_sumd(2*DIM,mu,cr); wallcycle_stop(wcycle,ewcMOVEX); } for(i=0; i<2; i++) for(j=0;j<DIM;j++) mu_tot_AB[i][j] = mu[i*DIM + j]; if (fr->efep == efepNO) copy_rvec(mu_tot_AB[0],mu_tot); else for(j=0; j<DIM; j++) mu_tot[j] = (1.0 - lambda)*mu_tot_AB[0][j] + lambda*mu_tot_AB[1][j]; /* Reset energies */ reset_energies(&(inputrec->opts),fr,bNS,enerd,MASTER(cr)); if (bNS) { wallcycle_start(wcycle,ewcNS); if (graph && bStateChanged) /* Calculate intramolecular shift vectors to make molecules whole */ mk_mshift(fplog,graph,fr->ePBC,box,x); /* Reset long range forces if necessary */ if (fr->bTwinRange) { clear_rvecs(fr->f_twin_n,fr->f_twin); clear_rvecs(SHIFTS,fr->fshift_twin); } /* Do the actual neighbour searching and if twin range electrostatics * also do the calculation of long range forces and energies. */ dvdl = 0; ns(fplog,fr,x,f,box,groups,&(inputrec->opts),top,mdatoms, cr,nrnb,step,lambda,&dvdl,&enerd->grpp,bFillGrid,bDoForces); if (bSepDVDL) fprintf(fplog,sepdvdlformat,"LR non-bonded",0,dvdl); enerd->dvdl_lr = dvdl; enerd->term[F_DVDL] += dvdl; wallcycle_stop(wcycle,ewcNS); } if (DOMAINDECOMP(cr)) { if (!(cr->duty & DUTY_PME)) { wallcycle_start(wcycle,ewcPPDURINGPME); dd_force_flop_start(cr->dd,nrnb); } } /* Start the force cycle counter. * This counter is stopped in do_forcelow_level. * No parallel communication should occur while this counter is running, * since that will interfere with the dynamic load balancing. */ wallcycle_start(wcycle,ewcFORCE); if (bDoForces) { /* Reset PME/Ewald forces if necessary */ if (fr->bF_NoVirSum) { GMX_BARRIER(cr->mpi_comm_mygroup); if (fr->bDomDec) clear_rvecs(fr->f_novirsum_n,fr->f_novirsum); else clear_rvecs(homenr,fr->f_novirsum+start); GMX_BARRIER(cr->mpi_comm_mygroup); } /* Copy long range forces into normal buffers */ if (fr->bTwinRange) { for(i=0; i<fr->f_twin_n; i++) copy_rvec(fr->f_twin[i],f[i]); for(i=0; i<SHIFTS; i++) copy_rvec(fr->fshift_twin[i],fr->fshift[i]); } else { if (DOMAINDECOMP(cr)) clear_rvecs(cr->dd->nat_tot,f); else clear_rvecs(mdatoms->nr,f); clear_rvecs(SHIFTS,fr->fshift); } clear_rvec(fr->vir_diag_posres); GMX_BARRIER(cr->mpi_comm_mygroup); } if (inputrec->ePull == epullCONSTRAINT) clear_pull_forces(inputrec->pull); /* update QMMMrec, if necessary */ if(fr->bQMMM) update_QMMMrec(cr,fr,x,mdatoms,box,top); if ((flags & GMX_FORCE_BONDED) && top->idef.il[F_POSRES].nr > 0) { /* Position restraints always require full pbc */ set_pbc(&pbc,inputrec->ePBC,box); v = posres(top->idef.il[F_POSRES].nr,top->idef.il[F_POSRES].iatoms, top->idef.iparams_posres, (const rvec*)x,fr->f_novirsum,fr->vir_diag_posres, inputrec->ePBC==epbcNONE ? NULL : &pbc,lambda,&dvdl, fr->rc_scaling,fr->ePBC,fr->posres_com,fr->posres_comB); if (bSepDVDL) { fprintf(fplog,sepdvdlformat, interaction_function[F_POSRES].longname,v,dvdl); } enerd->term[F_POSRES] += v; enerd->term[F_DVDL] += dvdl; inc_nrnb(nrnb,eNR_POSRES,top->idef.il[F_POSRES].nr/2); } /* Compute the bonded and non-bonded forces */ do_force_lowlevel(fplog,step,fr,inputrec,&(top->idef), cr,nrnb,wcycle,mdatoms,&(inputrec->opts), x,hist,f,enerd,fcd,box,lambda,graph,&(top->excls),mu_tot_AB, flags,&cycles_force); GMX_BARRIER(cr->mpi_comm_mygroup); if (ed) { do_flood(fplog,cr,x,f,ed,box,step); } if (DOMAINDECOMP(cr)) { dd_force_flop_stop(cr->dd,nrnb); if (wcycle) dd_cycles_add(cr->dd,cycles_force,ddCyclF); } if (bDoForces) { /* Compute forces due to electric field */ calc_f_el(MASTER(cr) ? field : NULL, start,homenr,mdatoms->chargeA,x,f,inputrec->ex,inputrec->et,t); /* When using PME/Ewald we compute the long range virial there. * otherwise we do it based on long range forces from twin range * cut-off based calculation (or not at all). */ /* Communicate the forces */ if (PAR(cr)) { wallcycle_start(wcycle,ewcMOVEF); if (DOMAINDECOMP(cr)) { dd_move_f(cr->dd,f,buf,fr->fshift); /* Position restraint do not introduce inter-cg forces */ if (EEL_FULL(fr->eeltype) && cr->dd->n_intercg_excl) dd_move_f(cr->dd,fr->f_novirsum,buf,NULL); } else { move_f(fplog,cr,GMX_LEFT,GMX_RIGHT,f,buf,nrnb); } wallcycle_stop(wcycle,ewcMOVEF); } } if (bDoForces) { if (vsite) { wallcycle_start(wcycle,ewcVSITESPREAD); spread_vsite_f(fplog,vsite,x,f,fr->fshift,nrnb, &top->idef,fr->ePBC,fr->bMolPBC,graph,box,cr); wallcycle_stop(wcycle,ewcVSITESPREAD); } /* Calculation of the virial must be done after vsites! */ calc_virial(fplog,mdatoms->start,mdatoms->homenr,x,f, vir_force,graph,box,nrnb,fr,inputrec->ePBC); } if (inputrec->ePull == epullUMBRELLA || inputrec->ePull == epullCONST_F) { /* Calculate the center of mass forces, this requires communication, * which is why pull_potential is called close to other communication. * The virial contribution is calculated directly, * which is why we call pull_potential after calc_virial. */ set_pbc(&pbc,inputrec->ePBC,box); dvdl = 0; enerd->term[F_COM_PULL] = pull_potential(inputrec->ePull,inputrec->pull,mdatoms,&pbc, cr,t,lambda,x,f,vir_force,&dvdl); if (bSepDVDL) fprintf(fplog,sepdvdlformat,"Com pull",enerd->term[F_COM_PULL],dvdl); enerd->term[F_DVDL] += dvdl; } if (!(cr->duty & DUTY_PME)) { cycles_ppdpme = wallcycle_stop(wcycle,ewcPPDURINGPME); dd_cycles_add(cr->dd,cycles_ppdpme,ddCyclPPduringPME); } #ifdef GMX_MPI if (PAR(cr) && !(cr->duty & DUTY_PME)) { /* In case of node-splitting, the PP nodes receive the long-range * forces, virial and energy from the PME nodes here. */ wallcycle_start(wcycle,ewcPP_PMEWAITRECVF); dvdl = 0; gmx_pme_receive_f(cr,fr->f_novirsum,fr->vir_el_recip,&e,&dvdl, &cycles_pme); if (bSepDVDL) fprintf(fplog,sepdvdlformat,"PME mesh",e,dvdl); enerd->term[F_COUL_RECIP] += e; enerd->term[F_DVDL] += dvdl; if (wcycle) dd_cycles_add(cr->dd,cycles_pme,ddCyclPME); wallcycle_stop(wcycle,ewcPP_PMEWAITRECVF); } #endif if (bDoForces && fr->bF_NoVirSum) { if (vsite) { /* Spread the mesh force on virtual sites to the other particles... * This is parallellized. MPI communication is performed * if the constructing atoms aren't local. */ wallcycle_start(wcycle,ewcVSITESPREAD); spread_vsite_f(fplog,vsite,x,fr->f_novirsum,NULL,nrnb, &top->idef,fr->ePBC,fr->bMolPBC,graph,box,cr); wallcycle_stop(wcycle,ewcVSITESPREAD); } /* Now add the forces, this is local */ if (fr->bDomDec) { sum_forces(0,fr->f_novirsum_n,f,fr->f_novirsum); } else { sum_forces(start,start+homenr,f,fr->f_novirsum); } if (EEL_FULL(fr->eeltype)) { /* Add the mesh contribution to the virial */ m_add(vir_force,fr->vir_el_recip,vir_force); } if (debug) pr_rvecs(debug,0,"vir_force",vir_force,DIM); } /* Sum the potential energy terms from group contributions */ sum_epot(&(inputrec->opts),enerd); if (fr->print_force >= 0 && bDoForces) print_large_forces(stderr,mdatoms,cr,step,fr->print_force,x,f); }
void do_force_lowlevel(FILE *fplog, gmx_large_int_t step, t_forcerec *fr, t_inputrec *ir, t_idef *idef, t_commrec *cr, t_nrnb *nrnb, gmx_wallcycle_t wcycle, t_mdatoms *md, t_grpopts *opts, rvec x[], history_t *hist, rvec f[], gmx_enerdata_t *enerd, t_fcdata *fcd, gmx_mtop_t *mtop, gmx_localtop_t *top, gmx_genborn_t *born, t_atomtypes *atype, gmx_bool bBornRadii, matrix box, real lambda, t_graph *graph, t_blocka *excl, rvec mu_tot[], int flags, float *cycles_pme) { int i,status; int donb_flags; gmx_bool bDoEpot,bSepDVDL,bSB; int pme_flags; matrix boxs; rvec box_size; real dvdlambda,Vsr,Vlr,Vcorr=0,vdip,vcharge; t_pbc pbc; real dvdgb; char buf[22]; gmx_enerdata_t ed_lam; double lam_i; real dvdl_dum; #ifdef GMX_MPI double t0=0.0,t1,t2,t3; /* time measurement for coarse load balancing */ #endif #define PRINT_SEPDVDL(s,v,dvdl) if (bSepDVDL) fprintf(fplog,sepdvdlformat,s,v,dvdl); GMX_MPE_LOG(ev_force_start); set_pbc(&pbc,fr->ePBC,box); /* Reset box */ for(i=0; (i<DIM); i++) { box_size[i]=box[i][i]; } bSepDVDL=(fr->bSepDVDL && do_per_step(step,ir->nstlog)); debug_gmx(); /* do QMMM first if requested */ if(fr->bQMMM) { enerd->term[F_EQM] = calculate_QMMM(cr,x,f,fr,md); } if (bSepDVDL) { fprintf(fplog,"Step %s: non-bonded V and dVdl for node %d:\n", gmx_step_str(step,buf),cr->nodeid); } /* Call the short range functions all in one go. */ GMX_MPE_LOG(ev_do_fnbf_start); dvdlambda = 0; #ifdef GMX_MPI /*#define TAKETIME ((cr->npmenodes) && (fr->timesteps < 12))*/ #define TAKETIME FALSE if (TAKETIME) { MPI_Barrier(cr->mpi_comm_mygroup); t0=MPI_Wtime(); } #endif if (ir->nwall) { dvdlambda = do_walls(ir,fr,box,md,x,f,lambda, enerd->grpp.ener[egLJSR],nrnb); PRINT_SEPDVDL("Walls",0.0,dvdlambda); enerd->dvdl_lin += dvdlambda; } /* If doing GB, reset dvda and calculate the Born radii */ if (ir->implicit_solvent) { /* wallcycle_start(wcycle,ewcGB); */ for(i=0; i<born->nr; i++) { fr->dvda[i]=0; } if(bBornRadii) { calc_gb_rad(cr,fr,ir,top,atype,x,&(fr->gblist),born,md,nrnb); } /* wallcycle_stop(wcycle, ewcGB); */ } where(); donb_flags = 0; if (flags & GMX_FORCE_FORCES) { donb_flags |= GMX_DONB_FORCES; } do_nonbonded(cr,fr,x,f,md,excl, fr->bBHAM ? enerd->grpp.ener[egBHAMSR] : enerd->grpp.ener[egLJSR], enerd->grpp.ener[egCOULSR], enerd->grpp.ener[egGB],box_size,nrnb, lambda,&dvdlambda,-1,-1,donb_flags); /* If we do foreign lambda and we have soft-core interactions * we have to recalculate the (non-linear) energies contributions. */ if (ir->n_flambda > 0 && (flags & GMX_FORCE_DHDL) && ir->sc_alpha != 0) { init_enerdata(mtop->groups.grps[egcENER].nr,ir->n_flambda,&ed_lam); for(i=0; i<enerd->n_lambda; i++) { lam_i = (i==0 ? lambda : ir->flambda[i-1]); dvdl_dum = 0; reset_enerdata(&ir->opts,fr,TRUE,&ed_lam,FALSE); do_nonbonded(cr,fr,x,f,md,excl, fr->bBHAM ? ed_lam.grpp.ener[egBHAMSR] : ed_lam.grpp.ener[egLJSR], ed_lam.grpp.ener[egCOULSR], enerd->grpp.ener[egGB], box_size,nrnb, lam_i,&dvdl_dum,-1,-1, GMX_DONB_FOREIGNLAMBDA); sum_epot(&ir->opts,&ed_lam); enerd->enerpart_lambda[i] += ed_lam.term[F_EPOT]; } destroy_enerdata(&ed_lam); } where(); /* If we are doing GB, calculate bonded forces and apply corrections * to the solvation forces */ if (ir->implicit_solvent) { calc_gb_forces(cr,md,born,top,atype,x,f,fr,idef, ir->gb_algorithm,ir->sa_algorithm,nrnb,bBornRadii,&pbc,graph,enerd); } #ifdef GMX_MPI if (TAKETIME) { t1=MPI_Wtime(); fr->t_fnbf += t1-t0; } #endif if (ir->sc_alpha != 0) { enerd->dvdl_nonlin += dvdlambda; } else { enerd->dvdl_lin += dvdlambda; } Vsr = 0; if (bSepDVDL) { for(i=0; i<enerd->grpp.nener; i++) { Vsr += (fr->bBHAM ? enerd->grpp.ener[egBHAMSR][i] : enerd->grpp.ener[egLJSR][i]) + enerd->grpp.ener[egCOULSR][i] + enerd->grpp.ener[egGB][i]; } } PRINT_SEPDVDL("VdW and Coulomb SR particle-p.",Vsr,dvdlambda); debug_gmx(); GMX_MPE_LOG(ev_do_fnbf_finish); if (debug) { pr_rvecs(debug,0,"fshift after SR",fr->fshift,SHIFTS); } /* Shift the coordinates. Must be done before bonded forces and PPPM, * but is also necessary for SHAKE and update, therefore it can NOT * go when no bonded forces have to be evaluated. */ /* Here sometimes we would not need to shift with NBFonly, * but we do so anyhow for consistency of the returned coordinates. */ if (graph) { shift_self(graph,box,x); if (TRICLINIC(box)) { inc_nrnb(nrnb,eNR_SHIFTX,2*graph->nnodes); } else { inc_nrnb(nrnb,eNR_SHIFTX,graph->nnodes); } } /* Check whether we need to do bondeds or correct for exclusions */ if (fr->bMolPBC && ((flags & GMX_FORCE_BONDED) || EEL_RF(fr->eeltype) || EEL_FULL(fr->eeltype))) { /* Since all atoms are in the rectangular or triclinic unit-cell, * only single box vector shifts (2 in x) are required. */ set_pbc_dd(&pbc,fr->ePBC,cr->dd,TRUE,box); } debug_gmx(); if (flags & GMX_FORCE_BONDED) { GMX_MPE_LOG(ev_calc_bonds_start); calc_bonds(fplog,cr->ms, idef,x,hist,f,fr,&pbc,graph,enerd,nrnb,lambda,md,fcd, DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL, atype, born, fr->bSepDVDL && do_per_step(step,ir->nstlog),step); /* Check if we have to determine energy differences * at foreign lambda's. */ if (ir->n_flambda > 0 && (flags & GMX_FORCE_DHDL) && idef->ilsort != ilsortNO_FE) { if (idef->ilsort != ilsortFE_SORTED) { gmx_incons("The bonded interactions are not sorted for free energy"); } init_enerdata(mtop->groups.grps[egcENER].nr,ir->n_flambda,&ed_lam); for(i=0; i<enerd->n_lambda; i++) { lam_i = (i==0 ? lambda : ir->flambda[i-1]); dvdl_dum = 0; reset_enerdata(&ir->opts,fr,TRUE,&ed_lam,FALSE); calc_bonds_lambda(fplog, idef,x,fr,&pbc,graph,&ed_lam,nrnb,lam_i,md, fcd, DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL); sum_epot(&ir->opts,&ed_lam); enerd->enerpart_lambda[i] += ed_lam.term[F_EPOT]; } destroy_enerdata(&ed_lam); } debug_gmx(); GMX_MPE_LOG(ev_calc_bonds_finish); } where(); *cycles_pme = 0; if (EEL_FULL(fr->eeltype)) { bSB = (ir->nwall == 2); if (bSB) { copy_mat(box,boxs); svmul(ir->wall_ewald_zfac,boxs[ZZ],boxs[ZZ]); box_size[ZZ] *= ir->wall_ewald_zfac; } clear_mat(fr->vir_el_recip); if (fr->bEwald) { if (fr->n_tpi == 0) { dvdlambda = 0; Vcorr = ewald_LRcorrection(fplog,md->start,md->start+md->homenr, cr,fr, md->chargeA, md->nChargePerturbed ? md->chargeB : NULL, excl,x,bSB ? boxs : box,mu_tot, ir->ewald_geometry, ir->epsilon_surface, lambda,&dvdlambda,&vdip,&vcharge); PRINT_SEPDVDL("Ewald excl./charge/dip. corr.",Vcorr,dvdlambda); enerd->dvdl_lin += dvdlambda; } else { if (ir->ewald_geometry != eewg3D || ir->epsilon_surface != 0) { gmx_fatal(FARGS,"TPI with PME currently only works in a 3D geometry with tin-foil boundary conditions"); } /* The TPI molecule does not have exclusions with the rest * of the system and no intra-molecular PME grid contributions * will be calculated in gmx_pme_calc_energy. */ Vcorr = 0; } } else { Vcorr = shift_LRcorrection(fplog,md->start,md->homenr,cr,fr, md->chargeA,excl,x,TRUE,box, fr->vir_el_recip); } dvdlambda = 0; status = 0; switch (fr->eeltype) { case eelPPPM: status = gmx_pppm_do(fplog,fr->pmedata,FALSE,x,fr->f_novirsum, md->chargeA, box_size,fr->phi,cr,md->start,md->homenr, nrnb,ir->pme_order,&Vlr); break; case eelPME: case eelPMESWITCH: case eelPMEUSER: case eelPMEUSERSWITCH: if (cr->duty & DUTY_PME) { if (fr->n_tpi == 0 || (flags & GMX_FORCE_STATECHANGED)) { pme_flags = GMX_PME_SPREAD_Q | GMX_PME_SOLVE; if (flags & GMX_FORCE_FORCES) { pme_flags |= GMX_PME_CALC_F; } if (flags & GMX_FORCE_VIRIAL) { pme_flags |= GMX_PME_CALC_ENER_VIR; } if (fr->n_tpi > 0) { /* We don't calculate f, but we do want the potential */ pme_flags |= GMX_PME_CALC_POT; } wallcycle_start(wcycle,ewcPMEMESH); status = gmx_pme_do(fr->pmedata, md->start,md->homenr - fr->n_tpi, x,fr->f_novirsum, md->chargeA,md->chargeB, bSB ? boxs : box,cr, DOMAINDECOMP(cr) ? dd_pme_maxshift_x(cr->dd) : 0, DOMAINDECOMP(cr) ? dd_pme_maxshift_y(cr->dd) : 0, nrnb,wcycle, fr->vir_el_recip,fr->ewaldcoeff, &Vlr,lambda,&dvdlambda, pme_flags); *cycles_pme = wallcycle_stop(wcycle,ewcPMEMESH); /* We should try to do as little computation after * this as possible, because parallel PME synchronizes * the nodes, so we want all load imbalance of the rest * of the force calculation to be before the PME call. * DD load balancing is done on the whole time of * the force call (without PME). */ } if (fr->n_tpi > 0) { /* Determine the PME grid energy of the test molecule * with the PME grid potential of the other charges. */ gmx_pme_calc_energy(fr->pmedata,fr->n_tpi, x + md->homenr - fr->n_tpi, md->chargeA + md->homenr - fr->n_tpi, &Vlr); } PRINT_SEPDVDL("PME mesh",Vlr,dvdlambda); } else { /* Energies and virial are obtained later from the PME nodes */ /* but values have to be zeroed out here */ Vlr=0.0; } break; case eelEWALD: Vlr = do_ewald(fplog,FALSE,ir,x,fr->f_novirsum, md->chargeA,md->chargeB, box_size,cr,md->homenr, fr->vir_el_recip,fr->ewaldcoeff, lambda,&dvdlambda,fr->ewald_table); PRINT_SEPDVDL("Ewald long-range",Vlr,dvdlambda); break; default: Vlr = 0; gmx_fatal(FARGS,"No such electrostatics method implemented %s", eel_names[fr->eeltype]); } if (status != 0) { gmx_fatal(FARGS,"Error %d in long range electrostatics routine %s", status,EELTYPE(fr->eeltype)); } enerd->dvdl_lin += dvdlambda; enerd->term[F_COUL_RECIP] = Vlr + Vcorr; if (debug) { fprintf(debug,"Vlr = %g, Vcorr = %g, Vlr_corr = %g\n", Vlr,Vcorr,enerd->term[F_COUL_RECIP]); pr_rvecs(debug,0,"vir_el_recip after corr",fr->vir_el_recip,DIM); pr_rvecs(debug,0,"fshift after LR Corrections",fr->fshift,SHIFTS); } } else { if (EEL_RF(fr->eeltype)) { dvdlambda = 0; if (fr->eeltype != eelRF_NEC) { enerd->term[F_RF_EXCL] = RF_excl_correction(fplog,fr,graph,md,excl,x,f, fr->fshift,&pbc,lambda,&dvdlambda); } enerd->dvdl_lin += dvdlambda; PRINT_SEPDVDL("RF exclusion correction", enerd->term[F_RF_EXCL],dvdlambda); } } where(); debug_gmx(); if (debug) { print_nrnb(debug,nrnb); } debug_gmx(); #ifdef GMX_MPI if (TAKETIME) { t2=MPI_Wtime(); MPI_Barrier(cr->mpi_comm_mygroup); t3=MPI_Wtime(); fr->t_wait += t3-t2; if (fr->timesteps == 11) { fprintf(stderr,"* PP load balancing info: node %d, step %s, rel wait time=%3.0f%% , load string value: %7.2f\n", cr->nodeid, gmx_step_str(fr->timesteps,buf), 100*fr->t_wait/(fr->t_wait+fr->t_fnbf), (fr->t_fnbf+fr->t_wait)/fr->t_fnbf); } fr->timesteps++; } #endif if (debug) { pr_rvecs(debug,0,"fshift after bondeds",fr->fshift,SHIFTS); } GMX_MPE_LOG(ev_force_finish); }
/* TODO Specialize this routine into init-time and loop-time versions? e.g. bReadEkin is only true when restoring from checkpoint */ void compute_globals(FILE *fplog, gmx_global_stat *gstat, t_commrec *cr, t_inputrec *ir, t_forcerec *fr, gmx_ekindata_t *ekind, t_state *state, t_mdatoms *mdatoms, t_nrnb *nrnb, t_vcm *vcm, gmx_wallcycle_t wcycle, gmx_enerdata_t *enerd, tensor force_vir, tensor shake_vir, tensor total_vir, tensor pres, rvec mu_tot, gmx_constr_t constr, gmx::SimulationSignaller *signalCoordinator, matrix box, int *totalNumberOfBondedInteractions, gmx_bool *bSumEkinhOld, int flags) { tensor corr_vir, corr_pres; gmx_bool bEner, bPres, bTemp; gmx_bool bStopCM, bGStat, bReadEkin, bEkinAveVel, bScaleEkin, bConstrain; real prescorr, enercorr, dvdlcorr, dvdl_ekin; /* translate CGLO flags to gmx_booleans */ bStopCM = flags & CGLO_STOPCM; bGStat = flags & CGLO_GSTAT; bReadEkin = (flags & CGLO_READEKIN); bScaleEkin = (flags & CGLO_SCALEEKIN); bEner = flags & CGLO_ENERGY; bTemp = flags & CGLO_TEMPERATURE; bPres = (flags & CGLO_PRESSURE); bConstrain = (flags & CGLO_CONSTRAINT); /* we calculate a full state kinetic energy either with full-step velocity verlet or half step where we need the pressure */ bEkinAveVel = (ir->eI == eiVV || (ir->eI == eiVVAK && bPres) || bReadEkin); /* in initalization, it sums the shake virial in vv, and to sums ekinh_old in leapfrog (or if we are calculating ekinh_old) for other reasons */ /* ########## Kinetic energy ############## */ if (bTemp) { /* Non-equilibrium MD: this is parallellized, but only does communication * when there really is NEMD. */ if (PAR(cr) && (ekind->bNEMD)) { accumulate_u(cr, &(ir->opts), ekind); } if (!bReadEkin) { calc_ke_part(state, &(ir->opts), mdatoms, ekind, nrnb, bEkinAveVel); } } /* Calculate center of mass velocity if necessary, also parallellized */ if (bStopCM) { calc_vcm_grp(0, mdatoms->homenr, mdatoms, state->x, state->v, vcm); } if (bTemp || bStopCM || bPres || bEner || bConstrain) { if (!bGStat) { /* We will not sum ekinh_old, * so signal that we still have to do it. */ *bSumEkinhOld = TRUE; } else { gmx::ArrayRef<real> signalBuffer = signalCoordinator->getCommunicationBuffer(); if (PAR(cr)) { wallcycle_start(wcycle, ewcMoveE); global_stat(gstat, cr, enerd, force_vir, shake_vir, mu_tot, ir, ekind, constr, bStopCM ? vcm : NULL, signalBuffer.size(), signalBuffer.data(), totalNumberOfBondedInteractions, *bSumEkinhOld, flags); wallcycle_stop(wcycle, ewcMoveE); } signalCoordinator->finalizeSignals(); *bSumEkinhOld = FALSE; } } if (!ekind->bNEMD && debug && bTemp && (vcm->nr > 0)) { correct_ekin(debug, 0, mdatoms->homenr, state->v, vcm->group_p[0], mdatoms->massT, mdatoms->tmass, ekind->ekin); } /* Do center of mass motion removal */ if (bStopCM) { check_cm_grp(fplog, vcm, ir, 1); do_stopcm_grp(0, mdatoms->homenr, mdatoms->cVCM, state->x, state->v, vcm); inc_nrnb(nrnb, eNR_STOPCM, mdatoms->homenr); } if (bEner) { /* Calculate the amplitude of the cosine velocity profile */ ekind->cosacc.vcos = ekind->cosacc.mvcos/mdatoms->tmass; } if (bTemp) { /* Sum the kinetic energies of the groups & calc temp */ /* compute full step kinetic energies if vv, or if vv-avek and we are computing the pressure with inputrecNptTrotter */ /* three maincase: VV with AveVel (md-vv), vv with AveEkin (md-vv-avek), leap with AveEkin (md). Leap with AveVel is not supported; it's not clear that it will actually work. bEkinAveVel: If TRUE, we simply multiply ekin by ekinscale to get a full step kinetic energy. If FALSE, we average ekinh_old and ekinh*ekinscale_nhc to get an averaged half step kinetic energy. */ enerd->term[F_TEMP] = sum_ekin(&(ir->opts), ekind, &dvdl_ekin, bEkinAveVel, bScaleEkin); enerd->dvdl_lin[efptMASS] = (double) dvdl_ekin; enerd->term[F_EKIN] = trace(ekind->ekin); } /* ########## Long range energy information ###### */ if (bEner || bPres || bConstrain) { calc_dispcorr(ir, fr, box, state->lambda[efptVDW], corr_pres, corr_vir, &prescorr, &enercorr, &dvdlcorr); } if (bEner) { enerd->term[F_DISPCORR] = enercorr; enerd->term[F_EPOT] += enercorr; enerd->term[F_DVDL_VDW] += dvdlcorr; } /* ########## Now pressure ############## */ if (bPres || bConstrain) { m_add(force_vir, shake_vir, total_vir); /* Calculate pressure and apply LR correction if PPPM is used. * Use the box from last timestep since we already called update(). */ enerd->term[F_PRES] = calc_pres(fr->ePBC, ir->nwall, box, ekind->ekin, total_vir, pres); /* Calculate long range corrections to pressure and energy */ /* this adds to enerd->term[F_PRES] and enerd->term[F_ETOT], and computes enerd->term[F_DISPCORR]. Also modifies the total_vir and pres tesors */ m_add(total_vir, corr_vir, total_vir); m_add(pres, corr_pres, pres); enerd->term[F_PDISPCORR] = prescorr; enerd->term[F_PRES] += prescorr; } }
void do_force_lowlevel(t_forcerec *fr, t_inputrec *ir, t_idef *idef, t_commrec *cr, t_nrnb *nrnb, gmx_wallcycle_t wcycle, t_mdatoms *md, rvec x[], history_t *hist, rvec f[], rvec f_longrange[], gmx_enerdata_t *enerd, t_fcdata *fcd, gmx_localtop_t *top, gmx_genborn_t *born, gmx_bool bBornRadii, matrix box, t_lambda *fepvals, real *lambda, t_graph *graph, t_blocka *excl, rvec mu_tot[], int flags, float *cycles_pme) { int i, j; int donb_flags; gmx_bool bSB; int pme_flags; matrix boxs; rvec box_size; t_pbc pbc; real dvdl_dum[efptNR], dvdl_nb[efptNR]; #ifdef GMX_MPI double t0 = 0.0, t1, t2, t3; /* time measurement for coarse load balancing */ #endif set_pbc(&pbc, fr->ePBC, box); /* reset free energy components */ for (i = 0; i < efptNR; i++) { dvdl_nb[i] = 0; dvdl_dum[i] = 0; } /* Reset box */ for (i = 0; (i < DIM); i++) { box_size[i] = box[i][i]; } debug_gmx(); /* do QMMM first if requested */ if (fr->bQMMM) { enerd->term[F_EQM] = calculate_QMMM(cr, x, f, fr); } /* Call the short range functions all in one go. */ #ifdef GMX_MPI /*#define TAKETIME ((cr->npmenodes) && (fr->timesteps < 12))*/ #define TAKETIME FALSE if (TAKETIME) { MPI_Barrier(cr->mpi_comm_mygroup); t0 = MPI_Wtime(); } #endif if (ir->nwall) { /* foreign lambda component for walls */ real dvdl_walls = do_walls(ir, fr, box, md, x, f, lambda[efptVDW], enerd->grpp.ener[egLJSR], nrnb); enerd->dvdl_lin[efptVDW] += dvdl_walls; } /* If doing GB, reset dvda and calculate the Born radii */ if (ir->implicit_solvent) { wallcycle_sub_start(wcycle, ewcsNONBONDED); for (i = 0; i < born->nr; i++) { fr->dvda[i] = 0; } if (bBornRadii) { calc_gb_rad(cr, fr, ir, top, x, &(fr->gblist), born, md, nrnb); } wallcycle_sub_stop(wcycle, ewcsNONBONDED); } where(); /* We only do non-bonded calculation with group scheme here, the verlet * calls are done from do_force_cutsVERLET(). */ if (fr->cutoff_scheme == ecutsGROUP && (flags & GMX_FORCE_NONBONDED)) { donb_flags = 0; /* Add short-range interactions */ donb_flags |= GMX_NONBONDED_DO_SR; /* Currently all group scheme kernels always calculate (shift-)forces */ if (flags & GMX_FORCE_FORCES) { donb_flags |= GMX_NONBONDED_DO_FORCE; } if (flags & GMX_FORCE_VIRIAL) { donb_flags |= GMX_NONBONDED_DO_SHIFTFORCE; } if (flags & GMX_FORCE_ENERGY) { donb_flags |= GMX_NONBONDED_DO_POTENTIAL; } if (flags & GMX_FORCE_DO_LR) { donb_flags |= GMX_NONBONDED_DO_LR; } wallcycle_sub_start(wcycle, ewcsNONBONDED); do_nonbonded(fr, x, f, f_longrange, md, excl, &enerd->grpp, nrnb, lambda, dvdl_nb, -1, -1, donb_flags); /* If we do foreign lambda and we have soft-core interactions * we have to recalculate the (non-linear) energies contributions. */ if (fepvals->n_lambda > 0 && (flags & GMX_FORCE_DHDL) && fepvals->sc_alpha != 0) { for (i = 0; i < enerd->n_lambda; i++) { real lam_i[efptNR]; for (j = 0; j < efptNR; j++) { lam_i[j] = (i == 0 ? lambda[j] : fepvals->all_lambda[j][i-1]); } reset_foreign_enerdata(enerd); do_nonbonded(fr, x, f, f_longrange, md, excl, &(enerd->foreign_grpp), nrnb, lam_i, dvdl_dum, -1, -1, (donb_flags & ~GMX_NONBONDED_DO_FORCE) | GMX_NONBONDED_DO_FOREIGNLAMBDA); sum_epot(&(enerd->foreign_grpp), enerd->foreign_term); enerd->enerpart_lambda[i] += enerd->foreign_term[F_EPOT]; } } wallcycle_sub_stop(wcycle, ewcsNONBONDED); where(); } /* If we are doing GB, calculate bonded forces and apply corrections * to the solvation forces */ /* MRS: Eventually, many need to include free energy contribution here! */ if (ir->implicit_solvent) { wallcycle_sub_start(wcycle, ewcsLISTED); calc_gb_forces(cr, md, born, top, x, f, fr, idef, ir->gb_algorithm, ir->sa_algorithm, nrnb, &pbc, graph, enerd); wallcycle_sub_stop(wcycle, ewcsLISTED); } #ifdef GMX_MPI if (TAKETIME) { t1 = MPI_Wtime(); fr->t_fnbf += t1-t0; } #endif if (fepvals->sc_alpha != 0) { enerd->dvdl_nonlin[efptVDW] += dvdl_nb[efptVDW]; } else { enerd->dvdl_lin[efptVDW] += dvdl_nb[efptVDW]; } if (fepvals->sc_alpha != 0) /* even though coulomb part is linear, we already added it, beacuse we need to go through the vdw calculation anyway */ { enerd->dvdl_nonlin[efptCOUL] += dvdl_nb[efptCOUL]; } else { enerd->dvdl_lin[efptCOUL] += dvdl_nb[efptCOUL]; } debug_gmx(); if (debug) { pr_rvecs(debug, 0, "fshift after SR", fr->fshift, SHIFTS); } /* Shift the coordinates. Must be done before listed forces and PPPM, * but is also necessary for SHAKE and update, therefore it can NOT * go when no listed forces have to be evaluated. * * The shifting and PBC code is deliberately not timed, since with * the Verlet scheme it only takes non-zero time with triclinic * boxes, and even then the time is around a factor of 100 less * than the next smallest counter. */ /* Here sometimes we would not need to shift with NBFonly, * but we do so anyhow for consistency of the returned coordinates. */ if (graph) { shift_self(graph, box, x); if (TRICLINIC(box)) { inc_nrnb(nrnb, eNR_SHIFTX, 2*graph->nnodes); } else { inc_nrnb(nrnb, eNR_SHIFTX, graph->nnodes); } } /* Check whether we need to do listed interactions or correct for exclusions */ if (fr->bMolPBC && ((flags & GMX_FORCE_LISTED) || EEL_RF(fr->eeltype) || EEL_FULL(fr->eeltype) || EVDW_PME(fr->vdwtype))) { /* TODO There are no electrostatics methods that require this transformation, when using the Verlet scheme, so update the above conditional. */ /* Since all atoms are in the rectangular or triclinic unit-cell, * only single box vector shifts (2 in x) are required. */ set_pbc_dd(&pbc, fr->ePBC, cr->dd, TRUE, box); } debug_gmx(); do_force_listed(wcycle, box, ir->fepvals, cr->ms, idef, (const rvec *) x, hist, f, fr, &pbc, graph, enerd, nrnb, lambda, md, fcd, DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL, flags); where(); *cycles_pme = 0; clear_mat(fr->vir_el_recip); clear_mat(fr->vir_lj_recip); /* Do long-range electrostatics and/or LJ-PME, including related short-range * corrections. */ if (EEL_FULL(fr->eeltype) || EVDW_PME(fr->vdwtype)) { int status = 0; real Vlr_q = 0, Vlr_lj = 0, Vcorr_q = 0, Vcorr_lj = 0; real dvdl_long_range_q = 0, dvdl_long_range_lj = 0; bSB = (ir->nwall == 2); if (bSB) { copy_mat(box, boxs); svmul(ir->wall_ewald_zfac, boxs[ZZ], boxs[ZZ]); box_size[ZZ] *= ir->wall_ewald_zfac; } if (EEL_PME_EWALD(fr->eeltype) || EVDW_PME(fr->vdwtype)) { real dvdl_long_range_correction_q = 0; real dvdl_long_range_correction_lj = 0; /* With the Verlet scheme exclusion forces are calculated * in the non-bonded kernel. */ /* The TPI molecule does not have exclusions with the rest * of the system and no intra-molecular PME grid * contributions will be calculated in * gmx_pme_calc_energy. */ if ((ir->cutoff_scheme == ecutsGROUP && fr->n_tpi == 0) || ir->ewald_geometry != eewg3D || ir->epsilon_surface != 0) { int nthreads, t; wallcycle_sub_start(wcycle, ewcsEWALD_CORRECTION); if (fr->n_tpi > 0) { gmx_fatal(FARGS, "TPI with PME currently only works in a 3D geometry with tin-foil boundary conditions"); } nthreads = gmx_omp_nthreads_get(emntBonded); #pragma omp parallel for num_threads(nthreads) schedule(static) for (t = 0; t < nthreads; t++) { int i; rvec *fnv; tensor *vir_q, *vir_lj; real *Vcorrt_q, *Vcorrt_lj, *dvdlt_q, *dvdlt_lj; if (t == 0) { fnv = fr->f_novirsum; vir_q = &fr->vir_el_recip; vir_lj = &fr->vir_lj_recip; Vcorrt_q = &Vcorr_q; Vcorrt_lj = &Vcorr_lj; dvdlt_q = &dvdl_long_range_correction_q; dvdlt_lj = &dvdl_long_range_correction_lj; } else { fnv = fr->f_t[t].f; vir_q = &fr->f_t[t].vir_q; vir_lj = &fr->f_t[t].vir_lj; Vcorrt_q = &fr->f_t[t].Vcorr_q; Vcorrt_lj = &fr->f_t[t].Vcorr_lj; dvdlt_q = &fr->f_t[t].dvdl[efptCOUL]; dvdlt_lj = &fr->f_t[t].dvdl[efptVDW]; for (i = 0; i < fr->natoms_force; i++) { clear_rvec(fnv[i]); } clear_mat(*vir_q); clear_mat(*vir_lj); } *dvdlt_q = 0; *dvdlt_lj = 0; ewald_LRcorrection(fr->excl_load[t], fr->excl_load[t+1], cr, t, fr, md->chargeA, md->chargeB, md->sqrt_c6A, md->sqrt_c6B, md->sigmaA, md->sigmaB, md->sigma3A, md->sigma3B, md->nChargePerturbed || md->nTypePerturbed, ir->cutoff_scheme != ecutsVERLET, excl, x, bSB ? boxs : box, mu_tot, ir->ewald_geometry, ir->epsilon_surface, fnv, *vir_q, *vir_lj, Vcorrt_q, Vcorrt_lj, lambda[efptCOUL], lambda[efptVDW], dvdlt_q, dvdlt_lj); } if (nthreads > 1) { reduce_thread_forces(fr->natoms_force, fr->f_novirsum, fr->vir_el_recip, fr->vir_lj_recip, &Vcorr_q, &Vcorr_lj, &dvdl_long_range_correction_q, &dvdl_long_range_correction_lj, nthreads, fr->f_t); } wallcycle_sub_stop(wcycle, ewcsEWALD_CORRECTION); } if (EEL_PME_EWALD(fr->eeltype) && fr->n_tpi == 0) { /* This is not in a subcounter because it takes a negligible and constant-sized amount of time */ Vcorr_q += ewald_charge_correction(cr, fr, lambda[efptCOUL], box, &dvdl_long_range_correction_q, fr->vir_el_recip); } enerd->dvdl_lin[efptCOUL] += dvdl_long_range_correction_q; enerd->dvdl_lin[efptVDW] += dvdl_long_range_correction_lj; if ((EEL_PME(fr->eeltype) || EVDW_PME(fr->vdwtype)) && (cr->duty & DUTY_PME)) { /* Do reciprocal PME for Coulomb and/or LJ. */ assert(fr->n_tpi >= 0); if (fr->n_tpi == 0 || (flags & GMX_FORCE_STATECHANGED)) { pme_flags = GMX_PME_SPREAD | GMX_PME_SOLVE; if (EEL_PME(fr->eeltype)) { pme_flags |= GMX_PME_DO_COULOMB; } if (EVDW_PME(fr->vdwtype)) { pme_flags |= GMX_PME_DO_LJ; } if (flags & GMX_FORCE_FORCES) { pme_flags |= GMX_PME_CALC_F; } if (flags & GMX_FORCE_VIRIAL) { pme_flags |= GMX_PME_CALC_ENER_VIR; } if (fr->n_tpi > 0) { /* We don't calculate f, but we do want the potential */ pme_flags |= GMX_PME_CALC_POT; } wallcycle_start(wcycle, ewcPMEMESH); status = gmx_pme_do(fr->pmedata, 0, md->homenr - fr->n_tpi, x, fr->f_novirsum, md->chargeA, md->chargeB, md->sqrt_c6A, md->sqrt_c6B, md->sigmaA, md->sigmaB, bSB ? boxs : box, cr, DOMAINDECOMP(cr) ? dd_pme_maxshift_x(cr->dd) : 0, DOMAINDECOMP(cr) ? dd_pme_maxshift_y(cr->dd) : 0, nrnb, wcycle, fr->vir_el_recip, fr->ewaldcoeff_q, fr->vir_lj_recip, fr->ewaldcoeff_lj, &Vlr_q, &Vlr_lj, lambda[efptCOUL], lambda[efptVDW], &dvdl_long_range_q, &dvdl_long_range_lj, pme_flags); *cycles_pme = wallcycle_stop(wcycle, ewcPMEMESH); if (status != 0) { gmx_fatal(FARGS, "Error %d in reciprocal PME routine", status); } /* We should try to do as little computation after * this as possible, because parallel PME synchronizes * the nodes, so we want all load imbalance of the * rest of the force calculation to be before the PME * call. DD load balancing is done on the whole time * of the force call (without PME). */ } if (fr->n_tpi > 0) { if (EVDW_PME(ir->vdwtype)) { gmx_fatal(FARGS, "Test particle insertion not implemented with LJ-PME"); } /* Determine the PME grid energy of the test molecule * with the PME grid potential of the other charges. */ gmx_pme_calc_energy(fr->pmedata, fr->n_tpi, x + md->homenr - fr->n_tpi, md->chargeA + md->homenr - fr->n_tpi, &Vlr_q); } } } if (!EEL_PME(fr->eeltype) && EEL_PME_EWALD(fr->eeltype)) { Vlr_q = do_ewald(ir, x, fr->f_novirsum, md->chargeA, md->chargeB, box_size, cr, md->homenr, fr->vir_el_recip, fr->ewaldcoeff_q, lambda[efptCOUL], &dvdl_long_range_q, fr->ewald_table); } /* Note that with separate PME nodes we get the real energies later */ enerd->dvdl_lin[efptCOUL] += dvdl_long_range_q; enerd->dvdl_lin[efptVDW] += dvdl_long_range_lj; enerd->term[F_COUL_RECIP] = Vlr_q + Vcorr_q; enerd->term[F_LJ_RECIP] = Vlr_lj + Vcorr_lj; if (debug) { fprintf(debug, "Vlr_q = %g, Vcorr_q = %g, Vlr_corr_q = %g\n", Vlr_q, Vcorr_q, enerd->term[F_COUL_RECIP]); pr_rvecs(debug, 0, "vir_el_recip after corr", fr->vir_el_recip, DIM); pr_rvecs(debug, 0, "fshift after LR Corrections", fr->fshift, SHIFTS); fprintf(debug, "Vlr_lj: %g, Vcorr_lj = %g, Vlr_corr_lj = %g\n", Vlr_lj, Vcorr_lj, enerd->term[F_LJ_RECIP]); pr_rvecs(debug, 0, "vir_lj_recip after corr", fr->vir_lj_recip, DIM); } } else { /* Is there a reaction-field exclusion correction needed? */ if (EEL_RF(fr->eeltype) && eelRF_NEC != fr->eeltype) { /* With the Verlet scheme, exclusion forces are calculated * in the non-bonded kernel. */ if (ir->cutoff_scheme != ecutsVERLET) { real dvdl_rf_excl = 0; enerd->term[F_RF_EXCL] = RF_excl_correction(fr, graph, md, excl, x, f, fr->fshift, &pbc, lambda[efptCOUL], &dvdl_rf_excl); enerd->dvdl_lin[efptCOUL] += dvdl_rf_excl; } } } where(); debug_gmx(); if (debug) { print_nrnb(debug, nrnb); } debug_gmx(); #ifdef GMX_MPI if (TAKETIME) { t2 = MPI_Wtime(); MPI_Barrier(cr->mpi_comm_mygroup); t3 = MPI_Wtime(); fr->t_wait += t3-t2; if (fr->timesteps == 11) { char buf[22]; fprintf(stderr, "* PP load balancing info: rank %d, step %s, rel wait time=%3.0f%% , load string value: %7.2f\n", cr->nodeid, gmx_step_str(fr->timesteps, buf), 100*fr->t_wait/(fr->t_wait+fr->t_fnbf), (fr->t_fnbf+fr->t_wait)/fr->t_fnbf); } fr->timesteps++; } #endif if (debug) { pr_rvecs(debug, 0, "fshift after bondeds", fr->fshift, SHIFTS); } }