/*! \brief * A convenience wrapper for launching either the GPU or CPU FFT. * * \param[in] pme The PME structure. * \param[in] gridIndex The grid index - should currently always be 0. * \param[in] dir The FFT direction enum. * \param[in] wcycle The wallclock counter. */ void inline parallel_3dfft_execute_gpu_wrapper(gmx_pme_t *pme, const int gridIndex, enum gmx_fft_direction dir, gmx_wallcycle_t wcycle) { GMX_ASSERT(gridIndex == 0, "Only single grid supported"); if (pme_gpu_performs_FFT(pme->gpu)) { wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU); wallcycle_sub_start_nocount(wcycle, ewcsLAUNCH_GPU_PME); pme_gpu_3dfft(pme->gpu, dir, gridIndex); wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_PME); wallcycle_stop(wcycle, ewcLAUNCH_GPU); } else { wallcycle_start(wcycle, ewcPME_FFT_MIXED_MODE); #pragma omp parallel for num_threads(pme->nthread) schedule(static) for (int thread = 0; thread < pme->nthread; thread++) { gmx_parallel_3dfft_execute(pme->pfft_setup[gridIndex], dir, thread, wcycle); } wallcycle_stop(wcycle, ewcPME_FFT_MIXED_MODE); } }
void pme_gpu_launch_spread(gmx_pme_t *pme, const rvec *x, gmx_wallcycle *wcycle) { GMX_ASSERT(pme_gpu_active(pme), "This should be a GPU run of PME but it is not enabled."); PmeGpu *pmeGpu = pme->gpu; // The only spot of PME GPU where LAUNCH_GPU counter increases call-count wallcycle_start(wcycle, ewcLAUNCH_GPU); // The only spot of PME GPU where ewcsLAUNCH_GPU_PME subcounter increases call-count wallcycle_sub_start(wcycle, ewcsLAUNCH_GPU_PME); pme_gpu_copy_input_coordinates(pmeGpu, x); wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_PME); wallcycle_stop(wcycle, ewcLAUNCH_GPU); const unsigned int gridIndex = 0; real *fftgrid = pme->fftgrid[gridIndex]; if (pmeGpu->settings.currentFlags & GMX_PME_SPREAD) { /* Spread the coefficients on a grid */ const bool computeSplines = true; const bool spreadCharges = true; wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU); wallcycle_sub_start_nocount(wcycle, ewcsLAUNCH_GPU_PME); pme_gpu_spread(pmeGpu, gridIndex, fftgrid, computeSplines, spreadCharges); wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_PME); wallcycle_stop(wcycle, ewcLAUNCH_GPU); } }
void pme_gpu_launch_complex_transforms(gmx_pme_t *pme, gmx_wallcycle *wcycle) { PmeGpu *pmeGpu = pme->gpu; const bool computeEnergyAndVirial = (pmeGpu->settings.currentFlags & GMX_PME_CALC_ENER_VIR) != 0; const bool performBackFFT = (pmeGpu->settings.currentFlags & (GMX_PME_CALC_F | GMX_PME_CALC_POT)) != 0; const unsigned int gridIndex = 0; t_complex *cfftgrid = pme->cfftgrid[gridIndex]; if (pmeGpu->settings.currentFlags & GMX_PME_SPREAD) { if (!pme_gpu_performs_FFT(pmeGpu)) { wallcycle_start(wcycle, ewcWAIT_GPU_PME_SPREAD); pme_gpu_sync_spread_grid(pme->gpu); wallcycle_stop(wcycle, ewcWAIT_GPU_PME_SPREAD); } } try { if (pmeGpu->settings.currentFlags & GMX_PME_SOLVE) { /* do R2C 3D-FFT */ parallel_3dfft_execute_gpu_wrapper(pme, gridIndex, GMX_FFT_REAL_TO_COMPLEX, wcycle); /* solve in k-space for our local cells */ if (pme_gpu_performs_solve(pmeGpu)) { const auto gridOrdering = pme_gpu_uses_dd(pmeGpu) ? GridOrdering::YZX : GridOrdering::XYZ; wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU); wallcycle_sub_start_nocount(wcycle, ewcsLAUNCH_GPU_PME); pme_gpu_solve(pmeGpu, cfftgrid, gridOrdering, computeEnergyAndVirial); wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_PME); wallcycle_stop(wcycle, ewcLAUNCH_GPU); } else { wallcycle_start(wcycle, ewcPME_SOLVE_MIXED_MODE); #pragma omp parallel for num_threads(pme->nthread) schedule(static) for (int thread = 0; thread < pme->nthread; thread++) { solve_pme_yzx(pme, cfftgrid, pme->boxVolume, computeEnergyAndVirial, pme->nthread, thread); } wallcycle_stop(wcycle, ewcPME_SOLVE_MIXED_MODE); } } if (performBackFFT) { parallel_3dfft_execute_gpu_wrapper(pme, gridIndex, GMX_FFT_COMPLEX_TO_REAL, wcycle); } } GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; }
void mdoutf_tng_close(gmx_mdoutf_t of) { if (of->tng || of->tng_low_prec) { wallcycle_start(of->wcycle, ewcTRAJ); gmx_tng_close(&of->tng); gmx_tng_close(&of->tng_low_prec); wallcycle_stop(of->wcycle, ewcTRAJ); } }
void pme_gpu_reinit_computation(const gmx_pme_t *pme, gmx_wallcycle *wcycle) { GMX_ASSERT(pme_gpu_active(pme), "This should be a GPU run of PME but it is not enabled."); wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU); wallcycle_sub_start_nocount(wcycle, ewcsLAUNCH_GPU_PME); pme_gpu_clear_grids(pme->gpu); pme_gpu_clear_energy_virial(pme->gpu); wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_PME); wallcycle_stop(wcycle, ewcLAUNCH_GPU); }
bool pme_gpu_try_finish_task(const gmx_pme_t *pme, gmx_wallcycle *wcycle, gmx::ArrayRef<const gmx::RVec> *forces, matrix virial, real *energy, GpuTaskCompletion completionKind) { GMX_ASSERT(pme_gpu_active(pme), "This should be a GPU run of PME but it is not enabled."); wallcycle_start_nocount(wcycle, ewcWAIT_GPU_PME_GATHER); if (completionKind == GpuTaskCompletion::Check) { // Query the PME stream for completion of all tasks enqueued and // if we're not done, stop the timer before early return. if (!pme_gpu_stream_query(pme->gpu)) { wallcycle_stop(wcycle, ewcWAIT_GPU_PME_GATHER); return false; } } else { // Synchronize the whole PME stream at once, including D2H result transfers. pme_gpu_synchronize(pme->gpu); } wallcycle_stop(wcycle, ewcWAIT_GPU_PME_GATHER); // Time the final staged data handling separately with a counting call to get // the call count right. wallcycle_start(wcycle, ewcWAIT_GPU_PME_GATHER); pme_gpu_update_timings(pme->gpu); pme_gpu_get_staged_results(pme, forces, virial, energy); wallcycle_stop(wcycle, ewcWAIT_GPU_PME_GATHER); return true; }
static void reset_pmeonly_counters(gmx_wallcycle_t wcycle, gmx_walltime_accounting_t walltime_accounting, t_nrnb *nrnb, int64_t step, bool useGpuForPme) { /* Reset all the counters related to performance over the run */ wallcycle_stop(wcycle, ewcRUN); wallcycle_reset_all(wcycle); init_nrnb(nrnb); wallcycle_start(wcycle, ewcRUN); walltime_accounting_reset_time(walltime_accounting, step); if (useGpuForPme) { resetGpuProfiler(); } }
static void reset_pmeonly_counters(gmx_wallcycle_t wcycle, gmx_walltime_accounting_t walltime_accounting, t_nrnb *nrnb, t_inputrec *ir, gmx_int64_t step) { /* Reset all the counters related to performance over the run */ wallcycle_stop(wcycle, ewcRUN); wallcycle_reset_all(wcycle); init_nrnb(nrnb); if (ir->nsteps >= 0) { /* ir->nsteps is not used here, but we update it for consistency */ ir->nsteps -= step - ir->init_step; } ir->init_step = step; wallcycle_start(wcycle, ewcRUN); walltime_accounting_start(walltime_accounting); }
void pme_gpu_launch_gather(const gmx_pme_t *pme, gmx_wallcycle gmx_unused *wcycle, PmeForceOutputHandling forceTreatment) { GMX_ASSERT(pme_gpu_active(pme), "This should be a GPU run of PME but it is not enabled."); if (!pme_gpu_performs_gather(pme->gpu)) { return; } wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU); wallcycle_sub_start_nocount(wcycle, ewcsLAUNCH_GPU_PME); const unsigned int gridIndex = 0; real *fftgrid = pme->fftgrid[gridIndex]; pme_gpu_gather(pme->gpu, forceTreatment, reinterpret_cast<float *>(fftgrid)); wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_PME); wallcycle_stop(wcycle, ewcLAUNCH_GPU); }
void pme_gpu_prepare_computation(gmx_pme_t *pme, bool needToUpdateBox, const matrix box, gmx_wallcycle *wcycle, int flags) { GMX_ASSERT(pme_gpu_active(pme), "This should be a GPU run of PME but it is not enabled."); GMX_ASSERT(pme->nnodes > 0, ""); GMX_ASSERT(pme->nnodes == 1 || pme->ndecompdim > 0, ""); PmeGpu *pmeGpu = pme->gpu; pmeGpu->settings.currentFlags = flags; // TODO these flags are only here to honor the CPU PME code, and probably should be removed bool shouldUpdateBox = false; for (int i = 0; i < DIM; ++i) { for (int j = 0; j <= i; ++j) { shouldUpdateBox |= (pmeGpu->common->previousBox[i][j] != box[i][j]); pmeGpu->common->previousBox[i][j] = box[i][j]; } } if (needToUpdateBox || shouldUpdateBox) // || is to make the first computation always update { wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU); wallcycle_sub_start_nocount(wcycle, ewcsLAUNCH_GPU_PME); pme_gpu_update_input_box(pmeGpu, box); wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_PME); wallcycle_stop(wcycle, ewcLAUNCH_GPU); if (!pme_gpu_performs_solve(pmeGpu)) { // TODO remove code duplication and add test coverage matrix scaledBox; pmeGpu->common->boxScaler->scaleBox(box, scaledBox); gmx::invertBoxMatrix(scaledBox, pme->recipbox); pme->boxVolume = scaledBox[XX][XX] * scaledBox[YY][YY] * scaledBox[ZZ][ZZ]; } } }
int gmx_pmeonly(struct gmx_pme_t *pme, t_commrec *cr, t_nrnb *mynrnb, gmx_wallcycle_t wcycle, gmx_walltime_accounting_t walltime_accounting, real ewaldcoeff_q, real ewaldcoeff_lj, t_inputrec *ir) { int npmedata; struct gmx_pme_t **pmedata; gmx_pme_pp_t pme_pp; int ret; int natoms; matrix box; rvec *x_pp = NULL, *f_pp = NULL; real *chargeA = NULL, *chargeB = NULL; real *c6A = NULL, *c6B = NULL; real *sigmaA = NULL, *sigmaB = NULL; real lambda_q = 0; real lambda_lj = 0; int maxshift_x = 0, maxshift_y = 0; real energy_q, energy_lj, dvdlambda_q, dvdlambda_lj; matrix vir_q, vir_lj; float cycles; int count; gmx_bool bEnerVir; int pme_flags; gmx_int64_t step, step_rel; ivec grid_switch; /* This data will only use with PME tuning, i.e. switching PME grids */ npmedata = 1; snew(pmedata, npmedata); pmedata[0] = pme; pme_pp = gmx_pme_pp_init(cr); init_nrnb(mynrnb); count = 0; do /****** this is a quasi-loop over time steps! */ { /* The reason for having a loop here is PME grid tuning/switching */ do { /* Domain decomposition */ ret = gmx_pme_recv_coeffs_coords(pme_pp, &natoms, &chargeA, &chargeB, &c6A, &c6B, &sigmaA, &sigmaB, box, &x_pp, &f_pp, &maxshift_x, &maxshift_y, &pme->bFEP_q, &pme->bFEP_lj, &lambda_q, &lambda_lj, &bEnerVir, &pme_flags, &step, grid_switch, &ewaldcoeff_q, &ewaldcoeff_lj); if (ret == pmerecvqxSWITCHGRID) { /* Switch the PME grid to grid_switch */ gmx_pmeonly_switch(&npmedata, &pmedata, grid_switch, cr, ir, &pme); } if (ret == pmerecvqxRESETCOUNTERS) { /* Reset the cycle and flop counters */ reset_pmeonly_counters(wcycle, walltime_accounting, mynrnb, ir, step); } } while (ret == pmerecvqxSWITCHGRID || ret == pmerecvqxRESETCOUNTERS); if (ret == pmerecvqxFINISH) { /* We should stop: break out of the loop */ break; } step_rel = step - ir->init_step; if (count == 0) { wallcycle_start(wcycle, ewcRUN); walltime_accounting_start(walltime_accounting); } wallcycle_start(wcycle, ewcPMEMESH); dvdlambda_q = 0; dvdlambda_lj = 0; clear_mat(vir_q); clear_mat(vir_lj); gmx_pme_do(pme, 0, natoms, x_pp, f_pp, chargeA, chargeB, c6A, c6B, sigmaA, sigmaB, box, cr, maxshift_x, maxshift_y, mynrnb, wcycle, vir_q, ewaldcoeff_q, vir_lj, ewaldcoeff_lj, &energy_q, &energy_lj, lambda_q, lambda_lj, &dvdlambda_q, &dvdlambda_lj, pme_flags | GMX_PME_DO_ALL_F | (bEnerVir ? GMX_PME_CALC_ENER_VIR : 0)); cycles = wallcycle_stop(wcycle, ewcPMEMESH); gmx_pme_send_force_vir_ener(pme_pp, f_pp, vir_q, energy_q, vir_lj, energy_lj, dvdlambda_q, dvdlambda_lj, cycles); count++; } /***** end of quasi-loop, we stop with the break above */ while (TRUE); walltime_accounting_end(walltime_accounting); return 0; }
int gmx_pmeonly(struct gmx_pme_t *pme, const t_commrec *cr, t_nrnb *mynrnb, gmx_wallcycle *wcycle, gmx_walltime_accounting_t walltime_accounting, t_inputrec *ir, PmeRunMode runMode) { int ret; int natoms = 0; matrix box; real lambda_q = 0; real lambda_lj = 0; int maxshift_x = 0, maxshift_y = 0; real energy_q, energy_lj, dvdlambda_q, dvdlambda_lj; matrix vir_q, vir_lj; float cycles; int count; gmx_bool bEnerVir = FALSE; int64_t step; /* This data will only use with PME tuning, i.e. switching PME grids */ std::vector<gmx_pme_t *> pmedata; pmedata.push_back(pme); auto pme_pp = gmx_pme_pp_init(cr); //TODO the variable below should be queried from the task assignment info const bool useGpuForPme = (runMode == PmeRunMode::GPU) || (runMode == PmeRunMode::Mixed); if (useGpuForPme) { changePinningPolicy(&pme_pp->chargeA, pme_get_pinning_policy()); changePinningPolicy(&pme_pp->x, pme_get_pinning_policy()); } init_nrnb(mynrnb); count = 0; do /****** this is a quasi-loop over time steps! */ { /* The reason for having a loop here is PME grid tuning/switching */ do { /* Domain decomposition */ ivec newGridSize; bool atomSetChanged = false; real ewaldcoeff_q = 0, ewaldcoeff_lj = 0; ret = gmx_pme_recv_coeffs_coords(pme_pp.get(), &natoms, box, &maxshift_x, &maxshift_y, &lambda_q, &lambda_lj, &bEnerVir, &step, &newGridSize, &ewaldcoeff_q, &ewaldcoeff_lj, &atomSetChanged); if (ret == pmerecvqxSWITCHGRID) { /* Switch the PME grid to newGridSize */ pme = gmx_pmeonly_switch(&pmedata, newGridSize, ewaldcoeff_q, ewaldcoeff_lj, cr, ir); } if (atomSetChanged) { gmx_pme_reinit_atoms(pme, natoms, pme_pp->chargeA.data()); } if (ret == pmerecvqxRESETCOUNTERS) { /* Reset the cycle and flop counters */ reset_pmeonly_counters(wcycle, walltime_accounting, mynrnb, step, useGpuForPme); } } while (ret == pmerecvqxSWITCHGRID || ret == pmerecvqxRESETCOUNTERS); if (ret == pmerecvqxFINISH) { /* We should stop: break out of the loop */ break; } if (count == 0) { wallcycle_start(wcycle, ewcRUN); walltime_accounting_start_time(walltime_accounting); } wallcycle_start(wcycle, ewcPMEMESH); dvdlambda_q = 0; dvdlambda_lj = 0; clear_mat(vir_q); clear_mat(vir_lj); energy_q = 0; energy_lj = 0; // TODO Make a struct of array refs onto these per-atom fields // of pme_pp (maybe box, energy and virial, too; and likewise // from mdatoms for the other call to gmx_pme_do), so we have // fewer lines of code and less parameter passing. const int pmeFlags = GMX_PME_DO_ALL_F | (bEnerVir ? GMX_PME_CALC_ENER_VIR : 0); gmx::ArrayRef<const gmx::RVec> forces; if (useGpuForPme) { const bool boxChanged = false; //TODO this should be set properly by gmx_pme_recv_coeffs_coords, // or maybe use inputrecDynamicBox(ir), at the very least - change this when this codepath is tested! pme_gpu_prepare_computation(pme, boxChanged, box, wcycle, pmeFlags); pme_gpu_launch_spread(pme, pme_pp->x.rvec_array(), wcycle); pme_gpu_launch_complex_transforms(pme, wcycle); pme_gpu_launch_gather(pme, wcycle, PmeForceOutputHandling::Set); pme_gpu_wait_finish_task(pme, wcycle, &forces, vir_q, &energy_q); pme_gpu_reinit_computation(pme, wcycle); } else { gmx_pme_do(pme, 0, natoms, pme_pp->x.rvec_array(), as_rvec_array(pme_pp->f.data()), pme_pp->chargeA.data(), pme_pp->chargeB.data(), pme_pp->sqrt_c6A.data(), pme_pp->sqrt_c6B.data(), pme_pp->sigmaA.data(), pme_pp->sigmaB.data(), box, cr, maxshift_x, maxshift_y, mynrnb, wcycle, vir_q, vir_lj, &energy_q, &energy_lj, lambda_q, lambda_lj, &dvdlambda_q, &dvdlambda_lj, pmeFlags); forces = pme_pp->f; } cycles = wallcycle_stop(wcycle, ewcPMEMESH); gmx_pme_send_force_vir_ener(pme_pp.get(), as_rvec_array(forces.data()), vir_q, energy_q, vir_lj, energy_lj, dvdlambda_q, dvdlambda_lj, cycles); count++; } /***** end of quasi-loop, we stop with the break above */ while (TRUE); walltime_accounting_end_time(walltime_accounting); return 0; }
void do_force_lowlevel(FILE *fplog, gmx_large_int_t step, t_forcerec *fr, t_inputrec *ir, t_idef *idef, t_commrec *cr, t_nrnb *nrnb, gmx_wallcycle_t wcycle, t_mdatoms *md, t_grpopts *opts, rvec x[], history_t *hist, rvec f[], gmx_enerdata_t *enerd, t_fcdata *fcd, gmx_mtop_t *mtop, gmx_localtop_t *top, gmx_genborn_t *born, t_atomtypes *atype, gmx_bool bBornRadii, matrix box, real lambda, t_graph *graph, t_blocka *excl, rvec mu_tot[], int flags, float *cycles_pme) { int i,status; int donb_flags; gmx_bool bDoEpot,bSepDVDL,bSB; int pme_flags; matrix boxs; rvec box_size; real dvdlambda,Vsr,Vlr,Vcorr=0,vdip,vcharge; t_pbc pbc; real dvdgb; char buf[22]; gmx_enerdata_t ed_lam; double lam_i; real dvdl_dum; #ifdef GMX_MPI double t0=0.0,t1,t2,t3; /* time measurement for coarse load balancing */ #endif #define PRINT_SEPDVDL(s,v,dvdl) if (bSepDVDL) fprintf(fplog,sepdvdlformat,s,v,dvdl); GMX_MPE_LOG(ev_force_start); set_pbc(&pbc,fr->ePBC,box); /* Reset box */ for(i=0; (i<DIM); i++) { box_size[i]=box[i][i]; } bSepDVDL=(fr->bSepDVDL && do_per_step(step,ir->nstlog)); debug_gmx(); /* do QMMM first if requested */ if(fr->bQMMM) { enerd->term[F_EQM] = calculate_QMMM(cr,x,f,fr,md); } if (bSepDVDL) { fprintf(fplog,"Step %s: non-bonded V and dVdl for node %d:\n", gmx_step_str(step,buf),cr->nodeid); } /* Call the short range functions all in one go. */ GMX_MPE_LOG(ev_do_fnbf_start); dvdlambda = 0; #ifdef GMX_MPI /*#define TAKETIME ((cr->npmenodes) && (fr->timesteps < 12))*/ #define TAKETIME FALSE if (TAKETIME) { MPI_Barrier(cr->mpi_comm_mygroup); t0=MPI_Wtime(); } #endif if (ir->nwall) { dvdlambda = do_walls(ir,fr,box,md,x,f,lambda, enerd->grpp.ener[egLJSR],nrnb); PRINT_SEPDVDL("Walls",0.0,dvdlambda); enerd->dvdl_lin += dvdlambda; } /* If doing GB, reset dvda and calculate the Born radii */ if (ir->implicit_solvent) { /* wallcycle_start(wcycle,ewcGB); */ for(i=0; i<born->nr; i++) { fr->dvda[i]=0; } if(bBornRadii) { calc_gb_rad(cr,fr,ir,top,atype,x,&(fr->gblist),born,md,nrnb); } /* wallcycle_stop(wcycle, ewcGB); */ } where(); donb_flags = 0; if (flags & GMX_FORCE_FORCES) { donb_flags |= GMX_DONB_FORCES; } do_nonbonded(cr,fr,x,f,md,excl, fr->bBHAM ? enerd->grpp.ener[egBHAMSR] : enerd->grpp.ener[egLJSR], enerd->grpp.ener[egCOULSR], enerd->grpp.ener[egGB],box_size,nrnb, lambda,&dvdlambda,-1,-1,donb_flags); /* If we do foreign lambda and we have soft-core interactions * we have to recalculate the (non-linear) energies contributions. */ if (ir->n_flambda > 0 && (flags & GMX_FORCE_DHDL) && ir->sc_alpha != 0) { init_enerdata(mtop->groups.grps[egcENER].nr,ir->n_flambda,&ed_lam); for(i=0; i<enerd->n_lambda; i++) { lam_i = (i==0 ? lambda : ir->flambda[i-1]); dvdl_dum = 0; reset_enerdata(&ir->opts,fr,TRUE,&ed_lam,FALSE); do_nonbonded(cr,fr,x,f,md,excl, fr->bBHAM ? ed_lam.grpp.ener[egBHAMSR] : ed_lam.grpp.ener[egLJSR], ed_lam.grpp.ener[egCOULSR], enerd->grpp.ener[egGB], box_size,nrnb, lam_i,&dvdl_dum,-1,-1, GMX_DONB_FOREIGNLAMBDA); sum_epot(&ir->opts,&ed_lam); enerd->enerpart_lambda[i] += ed_lam.term[F_EPOT]; } destroy_enerdata(&ed_lam); } where(); /* If we are doing GB, calculate bonded forces and apply corrections * to the solvation forces */ if (ir->implicit_solvent) { calc_gb_forces(cr,md,born,top,atype,x,f,fr,idef, ir->gb_algorithm,ir->sa_algorithm,nrnb,bBornRadii,&pbc,graph,enerd); } #ifdef GMX_MPI if (TAKETIME) { t1=MPI_Wtime(); fr->t_fnbf += t1-t0; } #endif if (ir->sc_alpha != 0) { enerd->dvdl_nonlin += dvdlambda; } else { enerd->dvdl_lin += dvdlambda; } Vsr = 0; if (bSepDVDL) { for(i=0; i<enerd->grpp.nener; i++) { Vsr += (fr->bBHAM ? enerd->grpp.ener[egBHAMSR][i] : enerd->grpp.ener[egLJSR][i]) + enerd->grpp.ener[egCOULSR][i] + enerd->grpp.ener[egGB][i]; } } PRINT_SEPDVDL("VdW and Coulomb SR particle-p.",Vsr,dvdlambda); debug_gmx(); GMX_MPE_LOG(ev_do_fnbf_finish); if (debug) { pr_rvecs(debug,0,"fshift after SR",fr->fshift,SHIFTS); } /* Shift the coordinates. Must be done before bonded forces and PPPM, * but is also necessary for SHAKE and update, therefore it can NOT * go when no bonded forces have to be evaluated. */ /* Here sometimes we would not need to shift with NBFonly, * but we do so anyhow for consistency of the returned coordinates. */ if (graph) { shift_self(graph,box,x); if (TRICLINIC(box)) { inc_nrnb(nrnb,eNR_SHIFTX,2*graph->nnodes); } else { inc_nrnb(nrnb,eNR_SHIFTX,graph->nnodes); } } /* Check whether we need to do bondeds or correct for exclusions */ if (fr->bMolPBC && ((flags & GMX_FORCE_BONDED) || EEL_RF(fr->eeltype) || EEL_FULL(fr->eeltype))) { /* Since all atoms are in the rectangular or triclinic unit-cell, * only single box vector shifts (2 in x) are required. */ set_pbc_dd(&pbc,fr->ePBC,cr->dd,TRUE,box); } debug_gmx(); if (flags & GMX_FORCE_BONDED) { GMX_MPE_LOG(ev_calc_bonds_start); calc_bonds(fplog,cr->ms, idef,x,hist,f,fr,&pbc,graph,enerd,nrnb,lambda,md,fcd, DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL, atype, born, fr->bSepDVDL && do_per_step(step,ir->nstlog),step); /* Check if we have to determine energy differences * at foreign lambda's. */ if (ir->n_flambda > 0 && (flags & GMX_FORCE_DHDL) && idef->ilsort != ilsortNO_FE) { if (idef->ilsort != ilsortFE_SORTED) { gmx_incons("The bonded interactions are not sorted for free energy"); } init_enerdata(mtop->groups.grps[egcENER].nr,ir->n_flambda,&ed_lam); for(i=0; i<enerd->n_lambda; i++) { lam_i = (i==0 ? lambda : ir->flambda[i-1]); dvdl_dum = 0; reset_enerdata(&ir->opts,fr,TRUE,&ed_lam,FALSE); calc_bonds_lambda(fplog, idef,x,fr,&pbc,graph,&ed_lam,nrnb,lam_i,md, fcd, DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL); sum_epot(&ir->opts,&ed_lam); enerd->enerpart_lambda[i] += ed_lam.term[F_EPOT]; } destroy_enerdata(&ed_lam); } debug_gmx(); GMX_MPE_LOG(ev_calc_bonds_finish); } where(); *cycles_pme = 0; if (EEL_FULL(fr->eeltype)) { bSB = (ir->nwall == 2); if (bSB) { copy_mat(box,boxs); svmul(ir->wall_ewald_zfac,boxs[ZZ],boxs[ZZ]); box_size[ZZ] *= ir->wall_ewald_zfac; } clear_mat(fr->vir_el_recip); if (fr->bEwald) { if (fr->n_tpi == 0) { dvdlambda = 0; Vcorr = ewald_LRcorrection(fplog,md->start,md->start+md->homenr, cr,fr, md->chargeA, md->nChargePerturbed ? md->chargeB : NULL, excl,x,bSB ? boxs : box,mu_tot, ir->ewald_geometry, ir->epsilon_surface, lambda,&dvdlambda,&vdip,&vcharge); PRINT_SEPDVDL("Ewald excl./charge/dip. corr.",Vcorr,dvdlambda); enerd->dvdl_lin += dvdlambda; } else { if (ir->ewald_geometry != eewg3D || ir->epsilon_surface != 0) { gmx_fatal(FARGS,"TPI with PME currently only works in a 3D geometry with tin-foil boundary conditions"); } /* The TPI molecule does not have exclusions with the rest * of the system and no intra-molecular PME grid contributions * will be calculated in gmx_pme_calc_energy. */ Vcorr = 0; } } else { Vcorr = shift_LRcorrection(fplog,md->start,md->homenr,cr,fr, md->chargeA,excl,x,TRUE,box, fr->vir_el_recip); } dvdlambda = 0; status = 0; switch (fr->eeltype) { case eelPPPM: status = gmx_pppm_do(fplog,fr->pmedata,FALSE,x,fr->f_novirsum, md->chargeA, box_size,fr->phi,cr,md->start,md->homenr, nrnb,ir->pme_order,&Vlr); break; case eelPME: case eelPMESWITCH: case eelPMEUSER: case eelPMEUSERSWITCH: if (cr->duty & DUTY_PME) { if (fr->n_tpi == 0 || (flags & GMX_FORCE_STATECHANGED)) { pme_flags = GMX_PME_SPREAD_Q | GMX_PME_SOLVE; if (flags & GMX_FORCE_FORCES) { pme_flags |= GMX_PME_CALC_F; } if (flags & GMX_FORCE_VIRIAL) { pme_flags |= GMX_PME_CALC_ENER_VIR; } if (fr->n_tpi > 0) { /* We don't calculate f, but we do want the potential */ pme_flags |= GMX_PME_CALC_POT; } wallcycle_start(wcycle,ewcPMEMESH); status = gmx_pme_do(fr->pmedata, md->start,md->homenr - fr->n_tpi, x,fr->f_novirsum, md->chargeA,md->chargeB, bSB ? boxs : box,cr, DOMAINDECOMP(cr) ? dd_pme_maxshift_x(cr->dd) : 0, DOMAINDECOMP(cr) ? dd_pme_maxshift_y(cr->dd) : 0, nrnb,wcycle, fr->vir_el_recip,fr->ewaldcoeff, &Vlr,lambda,&dvdlambda, pme_flags); *cycles_pme = wallcycle_stop(wcycle,ewcPMEMESH); /* We should try to do as little computation after * this as possible, because parallel PME synchronizes * the nodes, so we want all load imbalance of the rest * of the force calculation to be before the PME call. * DD load balancing is done on the whole time of * the force call (without PME). */ } if (fr->n_tpi > 0) { /* Determine the PME grid energy of the test molecule * with the PME grid potential of the other charges. */ gmx_pme_calc_energy(fr->pmedata,fr->n_tpi, x + md->homenr - fr->n_tpi, md->chargeA + md->homenr - fr->n_tpi, &Vlr); } PRINT_SEPDVDL("PME mesh",Vlr,dvdlambda); } else { /* Energies and virial are obtained later from the PME nodes */ /* but values have to be zeroed out here */ Vlr=0.0; } break; case eelEWALD: Vlr = do_ewald(fplog,FALSE,ir,x,fr->f_novirsum, md->chargeA,md->chargeB, box_size,cr,md->homenr, fr->vir_el_recip,fr->ewaldcoeff, lambda,&dvdlambda,fr->ewald_table); PRINT_SEPDVDL("Ewald long-range",Vlr,dvdlambda); break; default: Vlr = 0; gmx_fatal(FARGS,"No such electrostatics method implemented %s", eel_names[fr->eeltype]); } if (status != 0) { gmx_fatal(FARGS,"Error %d in long range electrostatics routine %s", status,EELTYPE(fr->eeltype)); } enerd->dvdl_lin += dvdlambda; enerd->term[F_COUL_RECIP] = Vlr + Vcorr; if (debug) { fprintf(debug,"Vlr = %g, Vcorr = %g, Vlr_corr = %g\n", Vlr,Vcorr,enerd->term[F_COUL_RECIP]); pr_rvecs(debug,0,"vir_el_recip after corr",fr->vir_el_recip,DIM); pr_rvecs(debug,0,"fshift after LR Corrections",fr->fshift,SHIFTS); } } else { if (EEL_RF(fr->eeltype)) { dvdlambda = 0; if (fr->eeltype != eelRF_NEC) { enerd->term[F_RF_EXCL] = RF_excl_correction(fplog,fr,graph,md,excl,x,f, fr->fshift,&pbc,lambda,&dvdlambda); } enerd->dvdl_lin += dvdlambda; PRINT_SEPDVDL("RF exclusion correction", enerd->term[F_RF_EXCL],dvdlambda); } } where(); debug_gmx(); if (debug) { print_nrnb(debug,nrnb); } debug_gmx(); #ifdef GMX_MPI if (TAKETIME) { t2=MPI_Wtime(); MPI_Barrier(cr->mpi_comm_mygroup); t3=MPI_Wtime(); fr->t_wait += t3-t2; if (fr->timesteps == 11) { fprintf(stderr,"* PP load balancing info: node %d, step %s, rel wait time=%3.0f%% , load string value: %7.2f\n", cr->nodeid, gmx_step_str(fr->timesteps,buf), 100*fr->t_wait/(fr->t_wait+fr->t_fnbf), (fr->t_fnbf+fr->t_wait)/fr->t_fnbf); } fr->timesteps++; } #endif if (debug) { pr_rvecs(debug,0,"fshift after bondeds",fr->fshift,SHIFTS); } GMX_MPE_LOG(ev_force_finish); }
void compute_globals(FILE *fplog, gmx_global_stat_t gstat, t_commrec *cr, t_inputrec *ir, t_forcerec *fr, gmx_ekindata_t *ekind, t_state *state, t_state *state_global, t_mdatoms *mdatoms, t_nrnb *nrnb, t_vcm *vcm, gmx_wallcycle_t wcycle, gmx_enerdata_t *enerd, tensor force_vir, tensor shake_vir, tensor total_vir, tensor pres, rvec mu_tot, gmx_constr_t constr, globsig_t *gs, gmx_bool bInterSimGS, matrix box, gmx_mtop_t *top_global, gmx_bool *bSumEkinhOld, int flags) { int i, gsi; real gs_buf[eglsNR]; tensor corr_vir, corr_pres; gmx_bool bEner, bPres, bTemp; gmx_bool bStopCM, bGStat, bReadEkin, bEkinAveVel, bScaleEkin, bConstrain; real prescorr, enercorr, dvdlcorr, dvdl_ekin; /* translate CGLO flags to gmx_booleans */ bStopCM = flags & CGLO_STOPCM; bGStat = flags & CGLO_GSTAT; bReadEkin = (flags & CGLO_READEKIN); bScaleEkin = (flags & CGLO_SCALEEKIN); bEner = flags & CGLO_ENERGY; bTemp = flags & CGLO_TEMPERATURE; bPres = (flags & CGLO_PRESSURE); bConstrain = (flags & CGLO_CONSTRAINT); /* we calculate a full state kinetic energy either with full-step velocity verlet or half step where we need the pressure */ bEkinAveVel = (ir->eI == eiVV || (ir->eI == eiVVAK && bPres) || bReadEkin); /* in initalization, it sums the shake virial in vv, and to sums ekinh_old in leapfrog (or if we are calculating ekinh_old) for other reasons */ /* ########## Kinetic energy ############## */ if (bTemp) { /* Non-equilibrium MD: this is parallellized, but only does communication * when there really is NEMD. */ if (PAR(cr) && (ekind->bNEMD)) { accumulate_u(cr, &(ir->opts), ekind); } debug_gmx(); if (bReadEkin) { restore_ekinstate_from_state(cr, ekind, &state_global->ekinstate); } else { calc_ke_part(state, &(ir->opts), mdatoms, ekind, nrnb, bEkinAveVel); } debug_gmx(); } /* Calculate center of mass velocity if necessary, also parallellized */ if (bStopCM) { calc_vcm_grp(0, mdatoms->homenr, mdatoms, state->x, state->v, vcm); } if (bTemp || bStopCM || bPres || bEner || bConstrain) { if (!bGStat) { /* We will not sum ekinh_old, * so signal that we still have to do it. */ *bSumEkinhOld = TRUE; } else { if (gs != NULL) { for (i = 0; i < eglsNR; i++) { gs_buf[i] = gs->sig[i]; } } if (PAR(cr)) { wallcycle_start(wcycle, ewcMoveE); global_stat(fplog, gstat, cr, enerd, force_vir, shake_vir, mu_tot, ir, ekind, constr, bStopCM ? vcm : NULL, gs != NULL ? eglsNR : 0, gs_buf, top_global, state, *bSumEkinhOld, flags); wallcycle_stop(wcycle, ewcMoveE); } if (gs != NULL) { if (MULTISIM(cr) && bInterSimGS) { if (MASTER(cr)) { /* Communicate the signals between the simulations */ gmx_sum_sim(eglsNR, gs_buf, cr->ms); } /* Communicate the signals form the master to the others */ gmx_bcast(eglsNR*sizeof(gs_buf[0]), gs_buf, cr); } for (i = 0; i < eglsNR; i++) { if (bInterSimGS || gs_simlocal[i]) { /* Set the communicated signal only when it is non-zero, * since signals might not be processed at each MD step. */ gsi = (gs_buf[i] >= 0 ? (int)(gs_buf[i] + 0.5) : (int)(gs_buf[i] - 0.5)); if (gsi != 0) { gs->set[i] = gsi; } /* Turn off the local signal */ gs->sig[i] = 0; } } } *bSumEkinhOld = FALSE; } } if (!ekind->bNEMD && debug && bTemp && (vcm->nr > 0)) { correct_ekin(debug, 0, mdatoms->homenr, state->v, vcm->group_p[0], mdatoms->massT, mdatoms->tmass, ekind->ekin); } /* Do center of mass motion removal */ if (bStopCM) { check_cm_grp(fplog, vcm, ir, 1); do_stopcm_grp(0, mdatoms->homenr, mdatoms->cVCM, state->x, state->v, vcm); inc_nrnb(nrnb, eNR_STOPCM, mdatoms->homenr); } if (bEner) { /* Calculate the amplitude of the cosine velocity profile */ ekind->cosacc.vcos = ekind->cosacc.mvcos/mdatoms->tmass; } if (bTemp) { /* Sum the kinetic energies of the groups & calc temp */ /* compute full step kinetic energies if vv, or if vv-avek and we are computing the pressure with IR_NPT_TROTTER */ /* three maincase: VV with AveVel (md-vv), vv with AveEkin (md-vv-avek), leap with AveEkin (md). Leap with AveVel is not supported; it's not clear that it will actually work. bEkinAveVel: If TRUE, we simply multiply ekin by ekinscale to get a full step kinetic energy. If FALSE, we average ekinh_old and ekinh*ekinscale_nhc to get an averaged half step kinetic energy. */ enerd->term[F_TEMP] = sum_ekin(&(ir->opts), ekind, &dvdl_ekin, bEkinAveVel, bScaleEkin); enerd->dvdl_lin[efptMASS] = (double) dvdl_ekin; enerd->term[F_EKIN] = trace(ekind->ekin); } /* ########## Long range energy information ###### */ if (bEner || bPres || bConstrain) { calc_dispcorr(ir, fr, top_global->natoms, box, state->lambda[efptVDW], corr_pres, corr_vir, &prescorr, &enercorr, &dvdlcorr); } if (bEner) { enerd->term[F_DISPCORR] = enercorr; enerd->term[F_EPOT] += enercorr; enerd->term[F_DVDL_VDW] += dvdlcorr; } /* ########## Now pressure ############## */ if (bPres || bConstrain) { m_add(force_vir, shake_vir, total_vir); /* Calculate pressure and apply LR correction if PPPM is used. * Use the box from last timestep since we already called update(). */ enerd->term[F_PRES] = calc_pres(fr->ePBC, ir->nwall, box, ekind->ekin, total_vir, pres); /* Calculate long range corrections to pressure and energy */ /* this adds to enerd->term[F_PRES] and enerd->term[F_ETOT], and computes enerd->term[F_DISPCORR]. Also modifies the total_vir and pres tesors */ m_add(total_vir, corr_vir, total_vir); m_add(pres, corr_pres, pres); enerd->term[F_PDISPCORR] = prescorr; enerd->term[F_PRES] += prescorr; } }
void do_force_lowlevel(FILE *fplog, gmx_large_int_t step, t_forcerec *fr, t_inputrec *ir, t_idef *idef, t_commrec *cr, t_nrnb *nrnb, gmx_wallcycle_t wcycle, t_mdatoms *md, t_grpopts *opts, rvec x[], history_t *hist, rvec f[], rvec f_longrange[], gmx_enerdata_t *enerd, t_fcdata *fcd, gmx_mtop_t *mtop, gmx_localtop_t *top, gmx_genborn_t *born, t_atomtypes *atype, gmx_bool bBornRadii, matrix box, t_lambda *fepvals, real *lambda, t_graph *graph, t_blocka *excl, rvec mu_tot[], int flags, float *cycles_pme) { int i, j, status; int donb_flags; gmx_bool bDoEpot, bSepDVDL, bSB; int pme_flags; matrix boxs; rvec box_size; real Vsr, Vlr, Vcorr = 0; t_pbc pbc; real dvdgb; char buf[22]; double clam_i, vlam_i; real dvdl_dum[efptNR], dvdl, dvdl_nb[efptNR], lam_i[efptNR]; real dvdlsum; #ifdef GMX_MPI double t0 = 0.0, t1, t2, t3; /* time measurement for coarse load balancing */ #endif #define PRINT_SEPDVDL(s, v, dvdlambda) if (bSepDVDL) {fprintf(fplog, sepdvdlformat, s, v, dvdlambda); } GMX_MPE_LOG(ev_force_start); set_pbc(&pbc, fr->ePBC, box); /* reset free energy components */ for (i = 0; i < efptNR; i++) { dvdl_nb[i] = 0; dvdl_dum[i] = 0; } /* Reset box */ for (i = 0; (i < DIM); i++) { box_size[i] = box[i][i]; } bSepDVDL = (fr->bSepDVDL && do_per_step(step, ir->nstlog)); debug_gmx(); /* do QMMM first if requested */ if (fr->bQMMM) { enerd->term[F_EQM] = calculate_QMMM(cr, x, f, fr, md); } if (bSepDVDL) { fprintf(fplog, "Step %s: non-bonded V and dVdl for node %d:\n", gmx_step_str(step, buf), cr->nodeid); } /* Call the short range functions all in one go. */ GMX_MPE_LOG(ev_do_fnbf_start); #ifdef GMX_MPI /*#define TAKETIME ((cr->npmenodes) && (fr->timesteps < 12))*/ #define TAKETIME FALSE if (TAKETIME) { MPI_Barrier(cr->mpi_comm_mygroup); t0 = MPI_Wtime(); } #endif if (ir->nwall) { /* foreign lambda component for walls */ dvdl = do_walls(ir, fr, box, md, x, f, lambda[efptVDW], enerd->grpp.ener[egLJSR], nrnb); PRINT_SEPDVDL("Walls", 0.0, dvdl); enerd->dvdl_lin[efptVDW] += dvdl; } /* If doing GB, reset dvda and calculate the Born radii */ if (ir->implicit_solvent) { wallcycle_sub_start(wcycle, ewcsNONBONDED); for (i = 0; i < born->nr; i++) { fr->dvda[i] = 0; } if (bBornRadii) { calc_gb_rad(cr, fr, ir, top, atype, x, &(fr->gblist), born, md, nrnb); } wallcycle_sub_stop(wcycle, ewcsNONBONDED); } where(); /* We only do non-bonded calculation with group scheme here, the verlet * calls are done from do_force_cutsVERLET(). */ if (fr->cutoff_scheme == ecutsGROUP && (flags & GMX_FORCE_NONBONDED)) { donb_flags = 0; /* Add short-range interactions */ donb_flags |= GMX_NONBONDED_DO_SR; if (flags & GMX_FORCE_FORCES) { donb_flags |= GMX_NONBONDED_DO_FORCE; } if (flags & GMX_FORCE_ENERGY) { donb_flags |= GMX_NONBONDED_DO_POTENTIAL; } if (flags & GMX_FORCE_DO_LR) { donb_flags |= GMX_NONBONDED_DO_LR; } wallcycle_sub_start(wcycle, ewcsNONBONDED); do_nonbonded(cr, fr, x, f, f_longrange, md, excl, &enerd->grpp, box_size, nrnb, lambda, dvdl_nb, -1, -1, donb_flags); /* If we do foreign lambda and we have soft-core interactions * we have to recalculate the (non-linear) energies contributions. */ if (fepvals->n_lambda > 0 && (flags & GMX_FORCE_DHDL) && fepvals->sc_alpha != 0) { for (i = 0; i < enerd->n_lambda; i++) { for (j = 0; j < efptNR; j++) { lam_i[j] = (i == 0 ? lambda[j] : fepvals->all_lambda[j][i-1]); } reset_foreign_enerdata(enerd); do_nonbonded(cr, fr, x, f, f_longrange, md, excl, &(enerd->foreign_grpp), box_size, nrnb, lam_i, dvdl_dum, -1, -1, (donb_flags & ~GMX_NONBONDED_DO_FORCE) | GMX_NONBONDED_DO_FOREIGNLAMBDA); sum_epot(&ir->opts, &(enerd->foreign_grpp), enerd->foreign_term); enerd->enerpart_lambda[i] += enerd->foreign_term[F_EPOT]; } } wallcycle_sub_stop(wcycle, ewcsNONBONDED); where(); } /* If we are doing GB, calculate bonded forces and apply corrections * to the solvation forces */ /* MRS: Eventually, many need to include free energy contribution here! */ if (ir->implicit_solvent) { wallcycle_sub_start(wcycle, ewcsBONDED); calc_gb_forces(cr, md, born, top, atype, x, f, fr, idef, ir->gb_algorithm, ir->sa_algorithm, nrnb, bBornRadii, &pbc, graph, enerd); wallcycle_sub_stop(wcycle, ewcsBONDED); } #ifdef GMX_MPI if (TAKETIME) { t1 = MPI_Wtime(); fr->t_fnbf += t1-t0; } #endif if (fepvals->sc_alpha != 0) { enerd->dvdl_nonlin[efptVDW] += dvdl_nb[efptVDW]; } else { enerd->dvdl_lin[efptVDW] += dvdl_nb[efptVDW]; } if (fepvals->sc_alpha != 0) /* even though coulomb part is linear, we already added it, beacuse we need to go through the vdw calculation anyway */ { enerd->dvdl_nonlin[efptCOUL] += dvdl_nb[efptCOUL]; } else { enerd->dvdl_lin[efptCOUL] += dvdl_nb[efptCOUL]; } Vsr = 0; if (bSepDVDL) { for (i = 0; i < enerd->grpp.nener; i++) { Vsr += (fr->bBHAM ? enerd->grpp.ener[egBHAMSR][i] : enerd->grpp.ener[egLJSR][i]) + enerd->grpp.ener[egCOULSR][i] + enerd->grpp.ener[egGB][i]; } dvdlsum = dvdl_nb[efptVDW] + dvdl_nb[efptCOUL]; PRINT_SEPDVDL("VdW and Coulomb SR particle-p.", Vsr, dvdlsum); } debug_gmx(); GMX_MPE_LOG(ev_do_fnbf_finish); if (debug) { pr_rvecs(debug, 0, "fshift after SR", fr->fshift, SHIFTS); } /* Shift the coordinates. Must be done before bonded forces and PPPM, * but is also necessary for SHAKE and update, therefore it can NOT * go when no bonded forces have to be evaluated. */ /* Here sometimes we would not need to shift with NBFonly, * but we do so anyhow for consistency of the returned coordinates. */ if (graph) { shift_self(graph, box, x); if (TRICLINIC(box)) { inc_nrnb(nrnb, eNR_SHIFTX, 2*graph->nnodes); } else { inc_nrnb(nrnb, eNR_SHIFTX, graph->nnodes); } } /* Check whether we need to do bondeds or correct for exclusions */ if (fr->bMolPBC && ((flags & GMX_FORCE_BONDED) || EEL_RF(fr->eeltype) || EEL_FULL(fr->eeltype))) { /* Since all atoms are in the rectangular or triclinic unit-cell, * only single box vector shifts (2 in x) are required. */ set_pbc_dd(&pbc, fr->ePBC, cr->dd, TRUE, box); } debug_gmx(); if (flags & GMX_FORCE_BONDED) { GMX_MPE_LOG(ev_calc_bonds_start); wallcycle_sub_start(wcycle, ewcsBONDED); calc_bonds(fplog, cr->ms, idef, x, hist, f, fr, &pbc, graph, enerd, nrnb, lambda, md, fcd, DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL, atype, born, flags, fr->bSepDVDL && do_per_step(step, ir->nstlog), step); /* Check if we have to determine energy differences * at foreign lambda's. */ if (fepvals->n_lambda > 0 && (flags & GMX_FORCE_DHDL) && idef->ilsort != ilsortNO_FE) { if (idef->ilsort != ilsortFE_SORTED) { gmx_incons("The bonded interactions are not sorted for free energy"); } for (i = 0; i < enerd->n_lambda; i++) { reset_foreign_enerdata(enerd); for (j = 0; j < efptNR; j++) { lam_i[j] = (i == 0 ? lambda[j] : fepvals->all_lambda[j][i-1]); } calc_bonds_lambda(fplog, idef, x, fr, &pbc, graph, &(enerd->foreign_grpp), enerd->foreign_term, nrnb, lam_i, md, fcd, DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL); sum_epot(&ir->opts, &(enerd->foreign_grpp), enerd->foreign_term); enerd->enerpart_lambda[i] += enerd->foreign_term[F_EPOT]; } } debug_gmx(); GMX_MPE_LOG(ev_calc_bonds_finish); wallcycle_sub_stop(wcycle, ewcsBONDED); } where(); *cycles_pme = 0; if (EEL_FULL(fr->eeltype)) { bSB = (ir->nwall == 2); if (bSB) { copy_mat(box, boxs); svmul(ir->wall_ewald_zfac, boxs[ZZ], boxs[ZZ]); box_size[ZZ] *= ir->wall_ewald_zfac; } clear_mat(fr->vir_el_recip); if (fr->bEwald) { Vcorr = 0; dvdl = 0; /* With the Verlet scheme exclusion forces are calculated * in the non-bonded kernel. */ /* The TPI molecule does not have exclusions with the rest * of the system and no intra-molecular PME grid contributions * will be calculated in gmx_pme_calc_energy. */ if ((ir->cutoff_scheme == ecutsGROUP && fr->n_tpi == 0) || ir->ewald_geometry != eewg3D || ir->epsilon_surface != 0) { int nthreads, t; wallcycle_sub_start(wcycle, ewcsEWALD_CORRECTION); if (fr->n_tpi > 0) { gmx_fatal(FARGS, "TPI with PME currently only works in a 3D geometry with tin-foil boundary conditions"); } nthreads = gmx_omp_nthreads_get(emntBonded); #pragma omp parallel for num_threads(nthreads) schedule(static) for (t = 0; t < nthreads; t++) { int s, e, i; rvec *fnv; tensor *vir; real *Vcorrt, *dvdlt; if (t == 0) { fnv = fr->f_novirsum; vir = &fr->vir_el_recip; Vcorrt = &Vcorr; dvdlt = &dvdl; } else { fnv = fr->f_t[t].f; vir = &fr->f_t[t].vir; Vcorrt = &fr->f_t[t].Vcorr; dvdlt = &fr->f_t[t].dvdl[efptCOUL]; for (i = 0; i < fr->natoms_force; i++) { clear_rvec(fnv[i]); } clear_mat(*vir); } *dvdlt = 0; *Vcorrt = ewald_LRcorrection(fplog, fr->excl_load[t], fr->excl_load[t+1], cr, t, fr, md->chargeA, md->nChargePerturbed ? md->chargeB : NULL, ir->cutoff_scheme != ecutsVERLET, excl, x, bSB ? boxs : box, mu_tot, ir->ewald_geometry, ir->epsilon_surface, fnv, *vir, lambda[efptCOUL], dvdlt); } if (nthreads > 1) { reduce_thread_forces(fr->natoms_force, fr->f_novirsum, fr->vir_el_recip, &Vcorr, efptCOUL, &dvdl, nthreads, fr->f_t); } wallcycle_sub_stop(wcycle, ewcsEWALD_CORRECTION); } if (fr->n_tpi == 0) { Vcorr += ewald_charge_correction(cr, fr, lambda[efptCOUL], box, &dvdl, fr->vir_el_recip); } PRINT_SEPDVDL("Ewald excl./charge/dip. corr.", Vcorr, dvdl); enerd->dvdl_lin[efptCOUL] += dvdl; } status = 0; Vlr = 0; dvdl = 0; switch (fr->eeltype) { case eelPME: case eelPMESWITCH: case eelPMEUSER: case eelPMEUSERSWITCH: case eelP3M_AD: if (cr->duty & DUTY_PME) { assert(fr->n_tpi >= 0); if (fr->n_tpi == 0 || (flags & GMX_FORCE_STATECHANGED)) { pme_flags = GMX_PME_SPREAD_Q | GMX_PME_SOLVE; if (flags & GMX_FORCE_FORCES) { pme_flags |= GMX_PME_CALC_F; } if (flags & (GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY)) { pme_flags |= GMX_PME_CALC_ENER_VIR; } if (fr->n_tpi > 0) { /* We don't calculate f, but we do want the potential */ pme_flags |= GMX_PME_CALC_POT; } wallcycle_start(wcycle, ewcPMEMESH); status = gmx_pme_do(fr->pmedata, md->start, md->homenr - fr->n_tpi, x, fr->f_novirsum, md->chargeA, md->chargeB, bSB ? boxs : box, cr, DOMAINDECOMP(cr) ? dd_pme_maxshift_x(cr->dd) : 0, DOMAINDECOMP(cr) ? dd_pme_maxshift_y(cr->dd) : 0, nrnb, wcycle, fr->vir_el_recip, fr->ewaldcoeff, &Vlr, lambda[efptCOUL], &dvdl, pme_flags); *cycles_pme = wallcycle_stop(wcycle, ewcPMEMESH); /* We should try to do as little computation after * this as possible, because parallel PME synchronizes * the nodes, so we want all load imbalance of the rest * of the force calculation to be before the PME call. * DD load balancing is done on the whole time of * the force call (without PME). */ } if (fr->n_tpi > 0) { /* Determine the PME grid energy of the test molecule * with the PME grid potential of the other charges. */ gmx_pme_calc_energy(fr->pmedata, fr->n_tpi, x + md->homenr - fr->n_tpi, md->chargeA + md->homenr - fr->n_tpi, &Vlr); } PRINT_SEPDVDL("PME mesh", Vlr, dvdl); } break; case eelEWALD: Vlr = do_ewald(fplog, FALSE, ir, x, fr->f_novirsum, md->chargeA, md->chargeB, box_size, cr, md->homenr, fr->vir_el_recip, fr->ewaldcoeff, lambda[efptCOUL], &dvdl, fr->ewald_table); PRINT_SEPDVDL("Ewald long-range", Vlr, dvdl); break; default: gmx_fatal(FARGS, "No such electrostatics method implemented %s", eel_names[fr->eeltype]); } if (status != 0) { gmx_fatal(FARGS, "Error %d in long range electrostatics routine %s", status, EELTYPE(fr->eeltype)); } /* Note that with separate PME nodes we get the real energies later */ enerd->dvdl_lin[efptCOUL] += dvdl; enerd->term[F_COUL_RECIP] = Vlr + Vcorr; if (debug) { fprintf(debug, "Vlr = %g, Vcorr = %g, Vlr_corr = %g\n", Vlr, Vcorr, enerd->term[F_COUL_RECIP]); pr_rvecs(debug, 0, "vir_el_recip after corr", fr->vir_el_recip, DIM); pr_rvecs(debug, 0, "fshift after LR Corrections", fr->fshift, SHIFTS); } } else { if (EEL_RF(fr->eeltype)) { /* With the Verlet scheme exclusion forces are calculated * in the non-bonded kernel. */ if (ir->cutoff_scheme != ecutsVERLET && fr->eeltype != eelRF_NEC) { dvdl = 0; enerd->term[F_RF_EXCL] = RF_excl_correction(fplog, fr, graph, md, excl, x, f, fr->fshift, &pbc, lambda[efptCOUL], &dvdl); } enerd->dvdl_lin[efptCOUL] += dvdl; PRINT_SEPDVDL("RF exclusion correction", enerd->term[F_RF_EXCL], dvdl); } } where(); debug_gmx(); if (debug) { print_nrnb(debug, nrnb); } debug_gmx(); #ifdef GMX_MPI if (TAKETIME) { t2 = MPI_Wtime(); MPI_Barrier(cr->mpi_comm_mygroup); t3 = MPI_Wtime(); fr->t_wait += t3-t2; if (fr->timesteps == 11) { fprintf(stderr, "* PP load balancing info: node %d, step %s, rel wait time=%3.0f%% , load string value: %7.2f\n", cr->nodeid, gmx_step_str(fr->timesteps, buf), 100*fr->t_wait/(fr->t_wait+fr->t_fnbf), (fr->t_fnbf+fr->t_wait)/fr->t_fnbf); } fr->timesteps++; } #endif if (debug) { pr_rvecs(debug, 0, "fshift after bondeds", fr->fshift, SHIFTS); } GMX_MPE_LOG(ev_force_finish); }
double do_md_openmm(FILE *fplog,t_commrec *cr,int nfile,const t_filenm fnm[], const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact, int nstglobalcomm, gmx_vsite_t *vsite,gmx_constr_t constr, int stepout,t_inputrec *ir, gmx_mtop_t *top_global, t_fcdata *fcd, t_state *state_global, t_mdatoms *mdatoms, t_nrnb *nrnb,gmx_wallcycle_t wcycle, gmx_edsam_t ed,t_forcerec *fr, int repl_ex_nst,int repl_ex_seed, real cpt_period,real max_hours, const char *deviceOptions, unsigned long Flags, gmx_runtime_t *runtime) { gmx_mdoutf_t *outf; gmx_large_int_t step,step_rel; double run_time; double t,t0,lam0; gmx_bool bSimAnn, bFirstStep,bStateFromTPX,bLastStep,bStartingFromCpt; gmx_bool bInitStep=TRUE; gmx_bool do_ene,do_log, do_verbose, bX,bV,bF,bCPT; tensor force_vir,shake_vir,total_vir,pres; int i,m; int mdof_flags; rvec mu_tot; t_vcm *vcm; int nchkpt=1; gmx_localtop_t *top; t_mdebin *mdebin=NULL; t_state *state=NULL; rvec *f_global=NULL; int n_xtc=-1; rvec *x_xtc=NULL; gmx_enerdata_t *enerd; rvec *f=NULL; gmx_global_stat_t gstat; gmx_update_t upd=NULL; t_graph *graph=NULL; globsig_t gs; gmx_groups_t *groups; gmx_ekindata_t *ekind, *ekind_save; gmx_bool bAppend; int a0,a1; matrix lastbox; real reset_counters=0,reset_counters_now=0; char sbuf[STEPSTRSIZE],sbuf2[STEPSTRSIZE]; int handled_stop_condition=gmx_stop_cond_none; const char *ommOptions = NULL; void *openmmData; bAppend = (Flags & MD_APPENDFILES); check_ir_old_tpx_versions(cr,fplog,ir,top_global); groups = &top_global->groups; /* Initial values */ init_md(fplog,cr,ir,oenv,&t,&t0,&state_global->lambda,&lam0, nrnb,top_global,&upd, nfile,fnm,&outf,&mdebin, force_vir,shake_vir,mu_tot,&bSimAnn,&vcm,state_global,Flags); clear_mat(total_vir); clear_mat(pres); /* Energy terms and groups */ snew(enerd,1); init_enerdata(top_global->groups.grps[egcENER].nr,ir->n_flambda,enerd); snew(f,top_global->natoms); /* Kinetic energy data */ snew(ekind,1); init_ekindata(fplog,top_global,&(ir->opts),ekind); /* needed for iteration of constraints */ snew(ekind_save,1); init_ekindata(fplog,top_global,&(ir->opts),ekind_save); /* Copy the cos acceleration to the groups struct */ ekind->cosacc.cos_accel = ir->cos_accel; gstat = global_stat_init(ir); debug_gmx(); { double io = compute_io(ir,top_global->natoms,groups,mdebin->ebin->nener,1); if ((io > 2000) && MASTER(cr)) fprintf(stderr, "\nWARNING: This run will generate roughly %.0f Mb of data\n\n", io); } top = gmx_mtop_generate_local_top(top_global,ir); a0 = 0; a1 = top_global->natoms; state = partdec_init_local_state(cr,state_global); f_global = f; atoms2md(top_global,ir,0,NULL,a0,a1-a0,mdatoms); if (vsite) { set_vsite_top(vsite,top,mdatoms,cr); } if (ir->ePBC != epbcNONE && !ir->bPeriodicMols) { graph = mk_graph(fplog,&(top->idef),0,top_global->natoms,FALSE,FALSE); } update_mdatoms(mdatoms,state->lambda); if (deviceOptions[0]=='\0') { /* empty options, which should default to OpenMM in this build */ ommOptions=deviceOptions; } else { if (gmx_strncasecmp(deviceOptions,"OpenMM",6)!=0) { gmx_fatal(FARGS, "This Gromacs version currently only works with OpenMM. Use -device \"OpenMM:<options>\""); } else { ommOptions=strchr(deviceOptions,':'); if (NULL!=ommOptions) { /* Increase the pointer to skip the colon */ ommOptions++; } } } openmmData = openmm_init(fplog, ommOptions, ir, top_global, top, mdatoms, fr, state); please_cite(fplog,"Friedrichs2009"); if (MASTER(cr)) { /* Update mdebin with energy history if appending to output files */ if ( Flags & MD_APPENDFILES ) { restore_energyhistory_from_state(mdebin,&state_global->enerhist); } /* Set the initial energy history in state to zero by updating once */ update_energyhistory(&state_global->enerhist,mdebin); } if (constr) { set_constraints(constr,top,ir,mdatoms,cr); } if (!ir->bContinuation) { if (mdatoms->cFREEZE && (state->flags & (1<<estV))) { /* Set the velocities of frozen particles to zero */ for (i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) { for (m=0; m<DIM; m++) { if (ir->opts.nFreeze[mdatoms->cFREEZE[i]][m]) { state->v[i][m] = 0; } } } } if (constr) { /* Constrain the initial coordinates and velocities */ do_constrain_first(fplog,constr,ir,mdatoms,state,f, graph,cr,nrnb,fr,top,shake_vir); } if (vsite) { /* Construct the virtual sites for the initial configuration */ construct_vsites(fplog,vsite,state->x,nrnb,ir->delta_t,NULL, top->idef.iparams,top->idef.il, fr->ePBC,fr->bMolPBC,graph,cr,state->box); } } debug_gmx(); if (MASTER(cr)) { char tbuf[20]; fprintf(fplog,"Initial temperature: %g K\n",enerd->term[F_TEMP]); fprintf(stderr,"starting mdrun '%s'\n", *(top_global->name)); if (ir->nsteps >= 0) { sprintf(tbuf,"%8.1f",(ir->init_step+ir->nsteps)*ir->delta_t); } else { sprintf(tbuf,"%s","infinite"); } if (ir->init_step > 0) { fprintf(stderr,"%s steps, %s ps (continuing from step %s, %8.1f ps).\n", gmx_step_str(ir->init_step+ir->nsteps,sbuf),tbuf, gmx_step_str(ir->init_step,sbuf2), ir->init_step*ir->delta_t); } else { fprintf(stderr,"%s steps, %s ps.\n", gmx_step_str(ir->nsteps,sbuf),tbuf); } } fprintf(fplog,"\n"); /* Set and write start time */ runtime_start(runtime); print_date_and_time(fplog,cr->nodeid,"Started mdrun",runtime); wallcycle_start(wcycle,ewcRUN); if (fplog) fprintf(fplog,"\n"); /* safest point to do file checkpointing is here. More general point would be immediately before integrator call */ debug_gmx(); /*********************************************************** * * Loop over MD steps * ************************************************************/ /* loop over MD steps or if rerunMD to end of input trajectory */ bFirstStep = TRUE; /* Skip the first Nose-Hoover integration when we get the state from tpx */ bStateFromTPX = !opt2bSet("-cpi",nfile,fnm); bInitStep = bFirstStep && bStateFromTPX; bStartingFromCpt = (Flags & MD_STARTFROMCPT) && bInitStep; bLastStep = FALSE; init_global_signals(&gs,cr,ir,repl_ex_nst); step = ir->init_step; step_rel = 0; while (!bLastStep) { wallcycle_start(wcycle,ewcSTEP); GMX_MPE_LOG(ev_timestep1); bLastStep = (step_rel == ir->nsteps); t = t0 + step*ir->delta_t; if (gs.set[eglsSTOPCOND] != 0) { bLastStep = TRUE; } do_log = do_per_step(step,ir->nstlog) || bFirstStep || bLastStep; do_verbose = bVerbose && (step % stepout == 0 || bFirstStep || bLastStep); if (MASTER(cr) && do_log) { print_ebin_header(fplog,step,t,state->lambda); } clear_mat(force_vir); GMX_MPE_LOG(ev_timestep2); /* We write a checkpoint at this MD step when: * either when we signalled through gs (in OpenMM NS works different), * or at the last step (but not when we do not want confout), * but never at the first step. */ bCPT = ((gs.set[eglsCHKPT] || (bLastStep && (Flags & MD_CONFOUT))) && step > ir->init_step ); if (bCPT) { gs.set[eglsCHKPT] = 0; } /* Now we have the energies and forces corresponding to the * coordinates at time t. We must output all of this before * the update. * for RerunMD t is read from input trajectory */ GMX_MPE_LOG(ev_output_start); mdof_flags = 0; if (do_per_step(step,ir->nstxout)) { mdof_flags |= MDOF_X; } if (do_per_step(step,ir->nstvout)) { mdof_flags |= MDOF_V; } if (do_per_step(step,ir->nstfout)) { mdof_flags |= MDOF_F; } if (do_per_step(step,ir->nstxtcout)) { mdof_flags |= MDOF_XTC; } if (bCPT) { mdof_flags |= MDOF_CPT; }; do_ene = (do_per_step(step,ir->nstenergy) || bLastStep); if (mdof_flags != 0 || do_ene || do_log) { wallcycle_start(wcycle,ewcTRAJ); bF = (mdof_flags & MDOF_F); bX = (mdof_flags & (MDOF_X | MDOF_XTC | MDOF_CPT)); bV = (mdof_flags & (MDOF_V | MDOF_CPT)); openmm_copy_state(openmmData, state, &t, f, enerd, bX, bV, bF, do_ene); upd_mdebin(mdebin, FALSE,TRUE, t,mdatoms->tmass,enerd,state,lastbox, shake_vir,force_vir,total_vir,pres, ekind,mu_tot,constr); print_ebin(outf->fp_ene,do_ene,FALSE,FALSE,do_log?fplog:NULL, step,t, eprNORMAL,bCompact,mdebin,fcd,groups,&(ir->opts)); write_traj(fplog,cr,outf,mdof_flags,top_global, step,t,state,state_global,f,f_global,&n_xtc,&x_xtc); if (bCPT) { nchkpt++; bCPT = FALSE; } debug_gmx(); if (bLastStep && step_rel == ir->nsteps && (Flags & MD_CONFOUT) && MASTER(cr)) { /* x and v have been collected in write_traj, * because a checkpoint file will always be written * at the last step. */ fprintf(stderr,"\nWriting final coordinates.\n"); if (ir->ePBC != epbcNONE && !ir->bPeriodicMols) { /* Make molecules whole only for confout writing */ do_pbc_mtop(fplog,ir->ePBC,state->box,top_global,state_global->x); } write_sto_conf_mtop(ftp2fn(efSTO,nfile,fnm), *top_global->name,top_global, state_global->x,state_global->v, ir->ePBC,state->box); debug_gmx(); } wallcycle_stop(wcycle,ewcTRAJ); } GMX_MPE_LOG(ev_output_finish); /* Determine the wallclock run time up till now */ run_time = gmx_gettime() - (double)runtime->real; /* Check whether everything is still allright */ if (((int)gmx_get_stop_condition() > handled_stop_condition) #ifdef GMX_THREADS && MASTER(cr) #endif ) { /* this is just make gs.sig compatible with the hack of sending signals around by MPI_Reduce with together with other floats */ /* NOTE: this only works for serial code. For code that allows MPI nodes to propagate their condition, see kernel/md.c*/ if ( gmx_get_stop_condition() == gmx_stop_cond_next_ns ) gs.set[eglsSTOPCOND]=1; if ( gmx_get_stop_condition() == gmx_stop_cond_next ) gs.set[eglsSTOPCOND]=1; /* < 0 means stop at next step, > 0 means stop at next NS step */ if (fplog) { fprintf(fplog, "\n\nReceived the %s signal, stopping at the next %sstep\n\n", gmx_get_signal_name(), gs.sig[eglsSTOPCOND]==1 ? "NS " : ""); fflush(fplog); } fprintf(stderr, "\n\nReceived the %s signal, stopping at the next %sstep\n\n", gmx_get_signal_name(), gs.sig[eglsSTOPCOND]==1 ? "NS " : ""); fflush(stderr); handled_stop_condition=(int)gmx_get_stop_condition(); } else if (MASTER(cr) && (max_hours > 0 && run_time > max_hours*60.0*60.0*0.99) && gs.set[eglsSTOPCOND] == 0) { /* Signal to terminate the run */ gs.set[eglsSTOPCOND] = 1; if (fplog) { fprintf(fplog,"\nStep %s: Run time exceeded %.3f hours, will terminate the run\n",gmx_step_str(step,sbuf),max_hours*0.99); } fprintf(stderr, "\nStep %s: Run time exceeded %.3f hours, will terminate the run\n",gmx_step_str(step,sbuf),max_hours*0.99); } /* checkpoints */ if (MASTER(cr) && (cpt_period >= 0 && (cpt_period == 0 || run_time >= nchkpt*cpt_period*60.0)) && gs.set[eglsCHKPT] == 0) { gs.set[eglsCHKPT] = 1; } /* Time for performance */ if (((step % stepout) == 0) || bLastStep) { runtime_upd_proc(runtime); } if (do_per_step(step,ir->nstlog)) { if (fflush(fplog) != 0) { gmx_fatal(FARGS,"Cannot flush logfile - maybe you are out of quota?"); } } /* Remaining runtime */ if (MULTIMASTER(cr) && (do_verbose || gmx_got_usr_signal() )) { print_time(stderr,runtime,step,ir,cr); } bFirstStep = FALSE; bInitStep = FALSE; bStartingFromCpt = FALSE; step++; step_rel++; openmm_take_one_step(openmmData); } /* End of main MD loop */ debug_gmx(); /* Stop the time */ runtime_end(runtime); if (MASTER(cr)) { if (ir->nstcalcenergy > 0) { print_ebin(outf->fp_ene,FALSE,FALSE,FALSE,fplog,step,t, eprAVER,FALSE,mdebin,fcd,groups,&(ir->opts)); } } openmm_cleanup(fplog, openmmData); done_mdoutf(outf); debug_gmx(); runtime->nsteps_done = step_rel; return 0; }
void do_force(FILE *fplog,t_commrec *cr, t_inputrec *inputrec, int step,t_nrnb *nrnb,gmx_wallcycle_t wcycle, gmx_localtop_t *top, gmx_groups_t *groups, matrix box,rvec x[],history_t *hist, rvec f[],rvec buf[], tensor vir_force, t_mdatoms *mdatoms, gmx_enerdata_t *enerd,t_fcdata *fcd, real lambda,t_graph *graph, t_forcerec *fr,gmx_vsite_t *vsite,rvec mu_tot, real t,FILE *field,gmx_edsam_t ed, int flags) { static rvec box_size; int cg0,cg1,i,j; int start,homenr; static double mu[2*DIM]; rvec mu_tot_AB[2]; bool bSepDVDL,bStateChanged,bNS,bFillGrid,bCalcCGCM,bBS,bDoForces; matrix boxs; real e,v,dvdl; t_pbc pbc; float cycles_ppdpme,cycles_pme,cycles_force; start = mdatoms->start; homenr = mdatoms->homenr; bSepDVDL = (fr->bSepDVDL && do_per_step(step,inputrec->nstlog)); clear_mat(vir_force); if (PARTDECOMP(cr)) { pd_cg_range(cr,&cg0,&cg1); } else { cg0 = 0; if (DOMAINDECOMP(cr)) cg1 = cr->dd->ncg_tot; else cg1 = top->cgs.nr; if (fr->n_tpi > 0) cg1--; } bStateChanged = (flags & GMX_FORCE_STATECHANGED); bNS = (flags & GMX_FORCE_NS); bFillGrid = (bNS && bStateChanged); bCalcCGCM = (bFillGrid && !DOMAINDECOMP(cr)); bDoForces = (flags & GMX_FORCE_FORCES); if (bStateChanged) { update_forcerec(fplog,fr,box); /* Calculate total (local) dipole moment in a temporary common array. * This makes it possible to sum them over nodes faster. */ calc_mu(start,homenr, x,mdatoms->chargeA,mdatoms->chargeB,mdatoms->nChargePerturbed, mu,mu+DIM); } if (fr->ePBC != epbcNONE) { /* Compute shift vectors every step, * because of pressure coupling or box deformation! */ if (DYNAMIC_BOX(*inputrec) && bStateChanged) calc_shifts(box,fr->shift_vec); if (bCalcCGCM) { put_charge_groups_in_box(fplog,cg0,cg1,fr->ePBC,box, &(top->cgs),x,fr->cg_cm); inc_nrnb(nrnb,eNR_CGCM,homenr); inc_nrnb(nrnb,eNR_RESETX,cg1-cg0); } else if (EI_ENERGY_MINIMIZATION(inputrec->eI) && graph) { unshift_self(graph,box,x); } } else if (bCalcCGCM) { calc_cgcm(fplog,cg0,cg1,&(top->cgs),x,fr->cg_cm); inc_nrnb(nrnb,eNR_CGCM,homenr); } if (bCalcCGCM) { if (PAR(cr)) { move_cgcm(fplog,cr,fr->cg_cm); } if (gmx_debug_at) pr_rvecs(debug,0,"cgcm",fr->cg_cm,top->cgs.nr); } #ifdef GMX_MPI if (!(cr->duty & DUTY_PME)) { /* Send particle coordinates to the pme nodes. * Since this is only implemented for domain decomposition * and domain decomposition does not use the graph, * we do not need to worry about shifting. */ wallcycle_start(wcycle,ewcPP_PMESENDX); GMX_MPE_LOG(ev_send_coordinates_start); bBS = (inputrec->nwall == 2); if (bBS) { copy_mat(box,boxs); svmul(inputrec->wall_ewald_zfac,boxs[ZZ],boxs[ZZ]); } gmx_pme_send_x(cr,bBS ? boxs : box,x,mdatoms->nChargePerturbed,lambda); GMX_MPE_LOG(ev_send_coordinates_finish); wallcycle_stop(wcycle,ewcPP_PMESENDX); } #endif /* GMX_MPI */ /* Communicate coordinates and sum dipole if necessary */ if (PAR(cr)) { wallcycle_start(wcycle,ewcMOVEX); if (DOMAINDECOMP(cr)) { dd_move_x(cr->dd,box,x,buf); } else { move_x(fplog,cr,GMX_LEFT,GMX_RIGHT,x,nrnb); } /* When we don't need the total dipole we sum it in global_stat */ if (NEED_MUTOT(*inputrec)) gmx_sumd(2*DIM,mu,cr); wallcycle_stop(wcycle,ewcMOVEX); } for(i=0; i<2; i++) for(j=0;j<DIM;j++) mu_tot_AB[i][j] = mu[i*DIM + j]; if (fr->efep == efepNO) copy_rvec(mu_tot_AB[0],mu_tot); else for(j=0; j<DIM; j++) mu_tot[j] = (1.0 - lambda)*mu_tot_AB[0][j] + lambda*mu_tot_AB[1][j]; /* Reset energies */ reset_energies(&(inputrec->opts),fr,bNS,enerd,MASTER(cr)); if (bNS) { wallcycle_start(wcycle,ewcNS); if (graph && bStateChanged) /* Calculate intramolecular shift vectors to make molecules whole */ mk_mshift(fplog,graph,fr->ePBC,box,x); /* Reset long range forces if necessary */ if (fr->bTwinRange) { clear_rvecs(fr->f_twin_n,fr->f_twin); clear_rvecs(SHIFTS,fr->fshift_twin); } /* Do the actual neighbour searching and if twin range electrostatics * also do the calculation of long range forces and energies. */ dvdl = 0; ns(fplog,fr,x,f,box,groups,&(inputrec->opts),top,mdatoms, cr,nrnb,step,lambda,&dvdl,&enerd->grpp,bFillGrid,bDoForces); if (bSepDVDL) fprintf(fplog,sepdvdlformat,"LR non-bonded",0,dvdl); enerd->dvdl_lr = dvdl; enerd->term[F_DVDL] += dvdl; wallcycle_stop(wcycle,ewcNS); } if (DOMAINDECOMP(cr)) { if (!(cr->duty & DUTY_PME)) { wallcycle_start(wcycle,ewcPPDURINGPME); dd_force_flop_start(cr->dd,nrnb); } } /* Start the force cycle counter. * This counter is stopped in do_forcelow_level. * No parallel communication should occur while this counter is running, * since that will interfere with the dynamic load balancing. */ wallcycle_start(wcycle,ewcFORCE); if (bDoForces) { /* Reset PME/Ewald forces if necessary */ if (fr->bF_NoVirSum) { GMX_BARRIER(cr->mpi_comm_mygroup); if (fr->bDomDec) clear_rvecs(fr->f_novirsum_n,fr->f_novirsum); else clear_rvecs(homenr,fr->f_novirsum+start); GMX_BARRIER(cr->mpi_comm_mygroup); } /* Copy long range forces into normal buffers */ if (fr->bTwinRange) { for(i=0; i<fr->f_twin_n; i++) copy_rvec(fr->f_twin[i],f[i]); for(i=0; i<SHIFTS; i++) copy_rvec(fr->fshift_twin[i],fr->fshift[i]); } else { if (DOMAINDECOMP(cr)) clear_rvecs(cr->dd->nat_tot,f); else clear_rvecs(mdatoms->nr,f); clear_rvecs(SHIFTS,fr->fshift); } clear_rvec(fr->vir_diag_posres); GMX_BARRIER(cr->mpi_comm_mygroup); } if (inputrec->ePull == epullCONSTRAINT) clear_pull_forces(inputrec->pull); /* update QMMMrec, if necessary */ if(fr->bQMMM) update_QMMMrec(cr,fr,x,mdatoms,box,top); if ((flags & GMX_FORCE_BONDED) && top->idef.il[F_POSRES].nr > 0) { /* Position restraints always require full pbc */ set_pbc(&pbc,inputrec->ePBC,box); v = posres(top->idef.il[F_POSRES].nr,top->idef.il[F_POSRES].iatoms, top->idef.iparams_posres, (const rvec*)x,fr->f_novirsum,fr->vir_diag_posres, inputrec->ePBC==epbcNONE ? NULL : &pbc,lambda,&dvdl, fr->rc_scaling,fr->ePBC,fr->posres_com,fr->posres_comB); if (bSepDVDL) { fprintf(fplog,sepdvdlformat, interaction_function[F_POSRES].longname,v,dvdl); } enerd->term[F_POSRES] += v; enerd->term[F_DVDL] += dvdl; inc_nrnb(nrnb,eNR_POSRES,top->idef.il[F_POSRES].nr/2); } /* Compute the bonded and non-bonded forces */ do_force_lowlevel(fplog,step,fr,inputrec,&(top->idef), cr,nrnb,wcycle,mdatoms,&(inputrec->opts), x,hist,f,enerd,fcd,box,lambda,graph,&(top->excls),mu_tot_AB, flags,&cycles_force); GMX_BARRIER(cr->mpi_comm_mygroup); if (ed) { do_flood(fplog,cr,x,f,ed,box,step); } if (DOMAINDECOMP(cr)) { dd_force_flop_stop(cr->dd,nrnb); if (wcycle) dd_cycles_add(cr->dd,cycles_force,ddCyclF); } if (bDoForces) { /* Compute forces due to electric field */ calc_f_el(MASTER(cr) ? field : NULL, start,homenr,mdatoms->chargeA,x,f,inputrec->ex,inputrec->et,t); /* When using PME/Ewald we compute the long range virial there. * otherwise we do it based on long range forces from twin range * cut-off based calculation (or not at all). */ /* Communicate the forces */ if (PAR(cr)) { wallcycle_start(wcycle,ewcMOVEF); if (DOMAINDECOMP(cr)) { dd_move_f(cr->dd,f,buf,fr->fshift); /* Position restraint do not introduce inter-cg forces */ if (EEL_FULL(fr->eeltype) && cr->dd->n_intercg_excl) dd_move_f(cr->dd,fr->f_novirsum,buf,NULL); } else { move_f(fplog,cr,GMX_LEFT,GMX_RIGHT,f,buf,nrnb); } wallcycle_stop(wcycle,ewcMOVEF); } } if (bDoForces) { if (vsite) { wallcycle_start(wcycle,ewcVSITESPREAD); spread_vsite_f(fplog,vsite,x,f,fr->fshift,nrnb, &top->idef,fr->ePBC,fr->bMolPBC,graph,box,cr); wallcycle_stop(wcycle,ewcVSITESPREAD); } /* Calculation of the virial must be done after vsites! */ calc_virial(fplog,mdatoms->start,mdatoms->homenr,x,f, vir_force,graph,box,nrnb,fr,inputrec->ePBC); } if (inputrec->ePull == epullUMBRELLA || inputrec->ePull == epullCONST_F) { /* Calculate the center of mass forces, this requires communication, * which is why pull_potential is called close to other communication. * The virial contribution is calculated directly, * which is why we call pull_potential after calc_virial. */ set_pbc(&pbc,inputrec->ePBC,box); dvdl = 0; enerd->term[F_COM_PULL] = pull_potential(inputrec->ePull,inputrec->pull,mdatoms,&pbc, cr,t,lambda,x,f,vir_force,&dvdl); if (bSepDVDL) fprintf(fplog,sepdvdlformat,"Com pull",enerd->term[F_COM_PULL],dvdl); enerd->term[F_DVDL] += dvdl; } if (!(cr->duty & DUTY_PME)) { cycles_ppdpme = wallcycle_stop(wcycle,ewcPPDURINGPME); dd_cycles_add(cr->dd,cycles_ppdpme,ddCyclPPduringPME); } #ifdef GMX_MPI if (PAR(cr) && !(cr->duty & DUTY_PME)) { /* In case of node-splitting, the PP nodes receive the long-range * forces, virial and energy from the PME nodes here. */ wallcycle_start(wcycle,ewcPP_PMEWAITRECVF); dvdl = 0; gmx_pme_receive_f(cr,fr->f_novirsum,fr->vir_el_recip,&e,&dvdl, &cycles_pme); if (bSepDVDL) fprintf(fplog,sepdvdlformat,"PME mesh",e,dvdl); enerd->term[F_COUL_RECIP] += e; enerd->term[F_DVDL] += dvdl; if (wcycle) dd_cycles_add(cr->dd,cycles_pme,ddCyclPME); wallcycle_stop(wcycle,ewcPP_PMEWAITRECVF); } #endif if (bDoForces && fr->bF_NoVirSum) { if (vsite) { /* Spread the mesh force on virtual sites to the other particles... * This is parallellized. MPI communication is performed * if the constructing atoms aren't local. */ wallcycle_start(wcycle,ewcVSITESPREAD); spread_vsite_f(fplog,vsite,x,fr->f_novirsum,NULL,nrnb, &top->idef,fr->ePBC,fr->bMolPBC,graph,box,cr); wallcycle_stop(wcycle,ewcVSITESPREAD); } /* Now add the forces, this is local */ if (fr->bDomDec) { sum_forces(0,fr->f_novirsum_n,f,fr->f_novirsum); } else { sum_forces(start,start+homenr,f,fr->f_novirsum); } if (EEL_FULL(fr->eeltype)) { /* Add the mesh contribution to the virial */ m_add(vir_force,fr->vir_el_recip,vir_force); } if (debug) pr_rvecs(debug,0,"vir_force",vir_force,DIM); } /* Sum the potential energy terms from group contributions */ sum_epot(&(inputrec->opts),enerd); if (fr->print_force >= 0 && bDoForces) print_large_forces(stderr,mdatoms,cr,step,fr->print_force,x,f); }
void do_force_lowlevel(t_forcerec *fr, t_inputrec *ir, t_idef *idef, t_commrec *cr, t_nrnb *nrnb, gmx_wallcycle_t wcycle, t_mdatoms *md, rvec x[], history_t *hist, rvec f[], rvec f_longrange[], gmx_enerdata_t *enerd, t_fcdata *fcd, gmx_localtop_t *top, gmx_genborn_t *born, gmx_bool bBornRadii, matrix box, t_lambda *fepvals, real *lambda, t_graph *graph, t_blocka *excl, rvec mu_tot[], int flags, float *cycles_pme) { int i, j; int donb_flags; gmx_bool bSB; int pme_flags; matrix boxs; rvec box_size; t_pbc pbc; real dvdl_dum[efptNR], dvdl_nb[efptNR]; #ifdef GMX_MPI double t0 = 0.0, t1, t2, t3; /* time measurement for coarse load balancing */ #endif set_pbc(&pbc, fr->ePBC, box); /* reset free energy components */ for (i = 0; i < efptNR; i++) { dvdl_nb[i] = 0; dvdl_dum[i] = 0; } /* Reset box */ for (i = 0; (i < DIM); i++) { box_size[i] = box[i][i]; } debug_gmx(); /* do QMMM first if requested */ if (fr->bQMMM) { enerd->term[F_EQM] = calculate_QMMM(cr, x, f, fr); } /* Call the short range functions all in one go. */ #ifdef GMX_MPI /*#define TAKETIME ((cr->npmenodes) && (fr->timesteps < 12))*/ #define TAKETIME FALSE if (TAKETIME) { MPI_Barrier(cr->mpi_comm_mygroup); t0 = MPI_Wtime(); } #endif if (ir->nwall) { /* foreign lambda component for walls */ real dvdl_walls = do_walls(ir, fr, box, md, x, f, lambda[efptVDW], enerd->grpp.ener[egLJSR], nrnb); enerd->dvdl_lin[efptVDW] += dvdl_walls; } /* If doing GB, reset dvda and calculate the Born radii */ if (ir->implicit_solvent) { wallcycle_sub_start(wcycle, ewcsNONBONDED); for (i = 0; i < born->nr; i++) { fr->dvda[i] = 0; } if (bBornRadii) { calc_gb_rad(cr, fr, ir, top, x, &(fr->gblist), born, md, nrnb); } wallcycle_sub_stop(wcycle, ewcsNONBONDED); } where(); /* We only do non-bonded calculation with group scheme here, the verlet * calls are done from do_force_cutsVERLET(). */ if (fr->cutoff_scheme == ecutsGROUP && (flags & GMX_FORCE_NONBONDED)) { donb_flags = 0; /* Add short-range interactions */ donb_flags |= GMX_NONBONDED_DO_SR; /* Currently all group scheme kernels always calculate (shift-)forces */ if (flags & GMX_FORCE_FORCES) { donb_flags |= GMX_NONBONDED_DO_FORCE; } if (flags & GMX_FORCE_VIRIAL) { donb_flags |= GMX_NONBONDED_DO_SHIFTFORCE; } if (flags & GMX_FORCE_ENERGY) { donb_flags |= GMX_NONBONDED_DO_POTENTIAL; } if (flags & GMX_FORCE_DO_LR) { donb_flags |= GMX_NONBONDED_DO_LR; } wallcycle_sub_start(wcycle, ewcsNONBONDED); do_nonbonded(fr, x, f, f_longrange, md, excl, &enerd->grpp, nrnb, lambda, dvdl_nb, -1, -1, donb_flags); /* If we do foreign lambda and we have soft-core interactions * we have to recalculate the (non-linear) energies contributions. */ if (fepvals->n_lambda > 0 && (flags & GMX_FORCE_DHDL) && fepvals->sc_alpha != 0) { for (i = 0; i < enerd->n_lambda; i++) { real lam_i[efptNR]; for (j = 0; j < efptNR; j++) { lam_i[j] = (i == 0 ? lambda[j] : fepvals->all_lambda[j][i-1]); } reset_foreign_enerdata(enerd); do_nonbonded(fr, x, f, f_longrange, md, excl, &(enerd->foreign_grpp), nrnb, lam_i, dvdl_dum, -1, -1, (donb_flags & ~GMX_NONBONDED_DO_FORCE) | GMX_NONBONDED_DO_FOREIGNLAMBDA); sum_epot(&(enerd->foreign_grpp), enerd->foreign_term); enerd->enerpart_lambda[i] += enerd->foreign_term[F_EPOT]; } } wallcycle_sub_stop(wcycle, ewcsNONBONDED); where(); } /* If we are doing GB, calculate bonded forces and apply corrections * to the solvation forces */ /* MRS: Eventually, many need to include free energy contribution here! */ if (ir->implicit_solvent) { wallcycle_sub_start(wcycle, ewcsLISTED); calc_gb_forces(cr, md, born, top, x, f, fr, idef, ir->gb_algorithm, ir->sa_algorithm, nrnb, &pbc, graph, enerd); wallcycle_sub_stop(wcycle, ewcsLISTED); } #ifdef GMX_MPI if (TAKETIME) { t1 = MPI_Wtime(); fr->t_fnbf += t1-t0; } #endif if (fepvals->sc_alpha != 0) { enerd->dvdl_nonlin[efptVDW] += dvdl_nb[efptVDW]; } else { enerd->dvdl_lin[efptVDW] += dvdl_nb[efptVDW]; } if (fepvals->sc_alpha != 0) /* even though coulomb part is linear, we already added it, beacuse we need to go through the vdw calculation anyway */ { enerd->dvdl_nonlin[efptCOUL] += dvdl_nb[efptCOUL]; } else { enerd->dvdl_lin[efptCOUL] += dvdl_nb[efptCOUL]; } debug_gmx(); if (debug) { pr_rvecs(debug, 0, "fshift after SR", fr->fshift, SHIFTS); } /* Shift the coordinates. Must be done before listed forces and PPPM, * but is also necessary for SHAKE and update, therefore it can NOT * go when no listed forces have to be evaluated. * * The shifting and PBC code is deliberately not timed, since with * the Verlet scheme it only takes non-zero time with triclinic * boxes, and even then the time is around a factor of 100 less * than the next smallest counter. */ /* Here sometimes we would not need to shift with NBFonly, * but we do so anyhow for consistency of the returned coordinates. */ if (graph) { shift_self(graph, box, x); if (TRICLINIC(box)) { inc_nrnb(nrnb, eNR_SHIFTX, 2*graph->nnodes); } else { inc_nrnb(nrnb, eNR_SHIFTX, graph->nnodes); } } /* Check whether we need to do listed interactions or correct for exclusions */ if (fr->bMolPBC && ((flags & GMX_FORCE_LISTED) || EEL_RF(fr->eeltype) || EEL_FULL(fr->eeltype) || EVDW_PME(fr->vdwtype))) { /* TODO There are no electrostatics methods that require this transformation, when using the Verlet scheme, so update the above conditional. */ /* Since all atoms are in the rectangular or triclinic unit-cell, * only single box vector shifts (2 in x) are required. */ set_pbc_dd(&pbc, fr->ePBC, cr->dd, TRUE, box); } debug_gmx(); do_force_listed(wcycle, box, ir->fepvals, cr->ms, idef, (const rvec *) x, hist, f, fr, &pbc, graph, enerd, nrnb, lambda, md, fcd, DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL, flags); where(); *cycles_pme = 0; clear_mat(fr->vir_el_recip); clear_mat(fr->vir_lj_recip); /* Do long-range electrostatics and/or LJ-PME, including related short-range * corrections. */ if (EEL_FULL(fr->eeltype) || EVDW_PME(fr->vdwtype)) { int status = 0; real Vlr_q = 0, Vlr_lj = 0, Vcorr_q = 0, Vcorr_lj = 0; real dvdl_long_range_q = 0, dvdl_long_range_lj = 0; bSB = (ir->nwall == 2); if (bSB) { copy_mat(box, boxs); svmul(ir->wall_ewald_zfac, boxs[ZZ], boxs[ZZ]); box_size[ZZ] *= ir->wall_ewald_zfac; } if (EEL_PME_EWALD(fr->eeltype) || EVDW_PME(fr->vdwtype)) { real dvdl_long_range_correction_q = 0; real dvdl_long_range_correction_lj = 0; /* With the Verlet scheme exclusion forces are calculated * in the non-bonded kernel. */ /* The TPI molecule does not have exclusions with the rest * of the system and no intra-molecular PME grid * contributions will be calculated in * gmx_pme_calc_energy. */ if ((ir->cutoff_scheme == ecutsGROUP && fr->n_tpi == 0) || ir->ewald_geometry != eewg3D || ir->epsilon_surface != 0) { int nthreads, t; wallcycle_sub_start(wcycle, ewcsEWALD_CORRECTION); if (fr->n_tpi > 0) { gmx_fatal(FARGS, "TPI with PME currently only works in a 3D geometry with tin-foil boundary conditions"); } nthreads = gmx_omp_nthreads_get(emntBonded); #pragma omp parallel for num_threads(nthreads) schedule(static) for (t = 0; t < nthreads; t++) { int i; rvec *fnv; tensor *vir_q, *vir_lj; real *Vcorrt_q, *Vcorrt_lj, *dvdlt_q, *dvdlt_lj; if (t == 0) { fnv = fr->f_novirsum; vir_q = &fr->vir_el_recip; vir_lj = &fr->vir_lj_recip; Vcorrt_q = &Vcorr_q; Vcorrt_lj = &Vcorr_lj; dvdlt_q = &dvdl_long_range_correction_q; dvdlt_lj = &dvdl_long_range_correction_lj; } else { fnv = fr->f_t[t].f; vir_q = &fr->f_t[t].vir_q; vir_lj = &fr->f_t[t].vir_lj; Vcorrt_q = &fr->f_t[t].Vcorr_q; Vcorrt_lj = &fr->f_t[t].Vcorr_lj; dvdlt_q = &fr->f_t[t].dvdl[efptCOUL]; dvdlt_lj = &fr->f_t[t].dvdl[efptVDW]; for (i = 0; i < fr->natoms_force; i++) { clear_rvec(fnv[i]); } clear_mat(*vir_q); clear_mat(*vir_lj); } *dvdlt_q = 0; *dvdlt_lj = 0; ewald_LRcorrection(fr->excl_load[t], fr->excl_load[t+1], cr, t, fr, md->chargeA, md->chargeB, md->sqrt_c6A, md->sqrt_c6B, md->sigmaA, md->sigmaB, md->sigma3A, md->sigma3B, md->nChargePerturbed || md->nTypePerturbed, ir->cutoff_scheme != ecutsVERLET, excl, x, bSB ? boxs : box, mu_tot, ir->ewald_geometry, ir->epsilon_surface, fnv, *vir_q, *vir_lj, Vcorrt_q, Vcorrt_lj, lambda[efptCOUL], lambda[efptVDW], dvdlt_q, dvdlt_lj); } if (nthreads > 1) { reduce_thread_forces(fr->natoms_force, fr->f_novirsum, fr->vir_el_recip, fr->vir_lj_recip, &Vcorr_q, &Vcorr_lj, &dvdl_long_range_correction_q, &dvdl_long_range_correction_lj, nthreads, fr->f_t); } wallcycle_sub_stop(wcycle, ewcsEWALD_CORRECTION); } if (EEL_PME_EWALD(fr->eeltype) && fr->n_tpi == 0) { /* This is not in a subcounter because it takes a negligible and constant-sized amount of time */ Vcorr_q += ewald_charge_correction(cr, fr, lambda[efptCOUL], box, &dvdl_long_range_correction_q, fr->vir_el_recip); } enerd->dvdl_lin[efptCOUL] += dvdl_long_range_correction_q; enerd->dvdl_lin[efptVDW] += dvdl_long_range_correction_lj; if ((EEL_PME(fr->eeltype) || EVDW_PME(fr->vdwtype)) && (cr->duty & DUTY_PME)) { /* Do reciprocal PME for Coulomb and/or LJ. */ assert(fr->n_tpi >= 0); if (fr->n_tpi == 0 || (flags & GMX_FORCE_STATECHANGED)) { pme_flags = GMX_PME_SPREAD | GMX_PME_SOLVE; if (EEL_PME(fr->eeltype)) { pme_flags |= GMX_PME_DO_COULOMB; } if (EVDW_PME(fr->vdwtype)) { pme_flags |= GMX_PME_DO_LJ; } if (flags & GMX_FORCE_FORCES) { pme_flags |= GMX_PME_CALC_F; } if (flags & GMX_FORCE_VIRIAL) { pme_flags |= GMX_PME_CALC_ENER_VIR; } if (fr->n_tpi > 0) { /* We don't calculate f, but we do want the potential */ pme_flags |= GMX_PME_CALC_POT; } wallcycle_start(wcycle, ewcPMEMESH); status = gmx_pme_do(fr->pmedata, 0, md->homenr - fr->n_tpi, x, fr->f_novirsum, md->chargeA, md->chargeB, md->sqrt_c6A, md->sqrt_c6B, md->sigmaA, md->sigmaB, bSB ? boxs : box, cr, DOMAINDECOMP(cr) ? dd_pme_maxshift_x(cr->dd) : 0, DOMAINDECOMP(cr) ? dd_pme_maxshift_y(cr->dd) : 0, nrnb, wcycle, fr->vir_el_recip, fr->ewaldcoeff_q, fr->vir_lj_recip, fr->ewaldcoeff_lj, &Vlr_q, &Vlr_lj, lambda[efptCOUL], lambda[efptVDW], &dvdl_long_range_q, &dvdl_long_range_lj, pme_flags); *cycles_pme = wallcycle_stop(wcycle, ewcPMEMESH); if (status != 0) { gmx_fatal(FARGS, "Error %d in reciprocal PME routine", status); } /* We should try to do as little computation after * this as possible, because parallel PME synchronizes * the nodes, so we want all load imbalance of the * rest of the force calculation to be before the PME * call. DD load balancing is done on the whole time * of the force call (without PME). */ } if (fr->n_tpi > 0) { if (EVDW_PME(ir->vdwtype)) { gmx_fatal(FARGS, "Test particle insertion not implemented with LJ-PME"); } /* Determine the PME grid energy of the test molecule * with the PME grid potential of the other charges. */ gmx_pme_calc_energy(fr->pmedata, fr->n_tpi, x + md->homenr - fr->n_tpi, md->chargeA + md->homenr - fr->n_tpi, &Vlr_q); } } } if (!EEL_PME(fr->eeltype) && EEL_PME_EWALD(fr->eeltype)) { Vlr_q = do_ewald(ir, x, fr->f_novirsum, md->chargeA, md->chargeB, box_size, cr, md->homenr, fr->vir_el_recip, fr->ewaldcoeff_q, lambda[efptCOUL], &dvdl_long_range_q, fr->ewald_table); } /* Note that with separate PME nodes we get the real energies later */ enerd->dvdl_lin[efptCOUL] += dvdl_long_range_q; enerd->dvdl_lin[efptVDW] += dvdl_long_range_lj; enerd->term[F_COUL_RECIP] = Vlr_q + Vcorr_q; enerd->term[F_LJ_RECIP] = Vlr_lj + Vcorr_lj; if (debug) { fprintf(debug, "Vlr_q = %g, Vcorr_q = %g, Vlr_corr_q = %g\n", Vlr_q, Vcorr_q, enerd->term[F_COUL_RECIP]); pr_rvecs(debug, 0, "vir_el_recip after corr", fr->vir_el_recip, DIM); pr_rvecs(debug, 0, "fshift after LR Corrections", fr->fshift, SHIFTS); fprintf(debug, "Vlr_lj: %g, Vcorr_lj = %g, Vlr_corr_lj = %g\n", Vlr_lj, Vcorr_lj, enerd->term[F_LJ_RECIP]); pr_rvecs(debug, 0, "vir_lj_recip after corr", fr->vir_lj_recip, DIM); } } else { /* Is there a reaction-field exclusion correction needed? */ if (EEL_RF(fr->eeltype) && eelRF_NEC != fr->eeltype) { /* With the Verlet scheme, exclusion forces are calculated * in the non-bonded kernel. */ if (ir->cutoff_scheme != ecutsVERLET) { real dvdl_rf_excl = 0; enerd->term[F_RF_EXCL] = RF_excl_correction(fr, graph, md, excl, x, f, fr->fshift, &pbc, lambda[efptCOUL], &dvdl_rf_excl); enerd->dvdl_lin[efptCOUL] += dvdl_rf_excl; } } } where(); debug_gmx(); if (debug) { print_nrnb(debug, nrnb); } debug_gmx(); #ifdef GMX_MPI if (TAKETIME) { t2 = MPI_Wtime(); MPI_Barrier(cr->mpi_comm_mygroup); t3 = MPI_Wtime(); fr->t_wait += t3-t2; if (fr->timesteps == 11) { char buf[22]; fprintf(stderr, "* PP load balancing info: rank %d, step %s, rel wait time=%3.0f%% , load string value: %7.2f\n", cr->nodeid, gmx_step_str(fr->timesteps, buf), 100*fr->t_wait/(fr->t_wait+fr->t_fnbf), (fr->t_fnbf+fr->t_wait)/fr->t_fnbf); } fr->timesteps++; } #endif if (debug) { pr_rvecs(debug, 0, "fshift after bondeds", fr->fshift, SHIFTS); } }
void do_md_trajectory_writing(FILE *fplog, t_commrec *cr, int nfile, const t_filenm fnm[], gmx_int64_t step, gmx_int64_t step_rel, double t, t_inputrec *ir, t_state *state, t_state *state_global, gmx_mtop_t *top_global, t_forcerec *fr, gmx_mdoutf_t outf, t_mdebin *mdebin, gmx_ekindata_t *ekind, rvec *f, rvec *f_global, int *nchkpt, gmx_bool bCPT, gmx_bool bRerunMD, gmx_bool bLastStep, gmx_bool bDoConfOut, gmx_bool bSumEkinhOld ) { int mdof_flags; rvec *x_for_confout = NULL; mdof_flags = 0; if (do_per_step(step, ir->nstxout)) { mdof_flags |= MDOF_X; } if (do_per_step(step, ir->nstvout)) { mdof_flags |= MDOF_V; } if (do_per_step(step, ir->nstfout)) { mdof_flags |= MDOF_F; } if (do_per_step(step, ir->nstxout_compressed)) { mdof_flags |= MDOF_X_COMPRESSED; } if (bCPT) { mdof_flags |= MDOF_CPT; } ; #if defined(GMX_FAHCORE) if (bLastStep) { /* Enforce writing positions and velocities at end of run */ mdof_flags |= (MDOF_X | MDOF_V); } if (MASTER(cr)) { fcReportProgress( ir->nsteps, step ); } #if defined(__native_client__) fcCheckin(MASTER(cr)); #endif /* sync bCPT and fc record-keeping */ if (bCPT && MASTER(cr)) { fcRequestCheckPoint(); } #endif if (mdof_flags != 0) { wallcycle_start(mdoutf_get_wcycle(outf), ewcTRAJ); if (bCPT) { if (MASTER(cr)) { if (bSumEkinhOld) { state_global->ekinstate.bUpToDate = FALSE; } else { update_ekinstate(&state_global->ekinstate, ekind); state_global->ekinstate.bUpToDate = TRUE; } update_energyhistory(state_global->enerhist, mdebin); } } mdoutf_write_to_trajectory_files(fplog, cr, outf, mdof_flags, top_global, step, t, state, state_global, f, f_global); if (bCPT) { (*nchkpt)++; } debug_gmx(); if (bLastStep && step_rel == ir->nsteps && bDoConfOut && MASTER(cr) && !bRerunMD) { if (fr->bMolPBC && state->x == state_global->x) { /* This (single-rank) run needs to allocate a temporary array of size natoms so that any periodicity removal for mdrun -confout does not perturb the update and thus the final .edr output. This makes .cpt restarts look binary identical, and makes .edr restarts binary identical. */ snew(x_for_confout, state_global->natoms); copy_rvecn(state_global->x, x_for_confout, 0, state_global->natoms); } else { /* With DD, or no bMolPBC, it doesn't matter if we change state_global->x */ x_for_confout = state_global->x; } /* x and v have been collected in mdoutf_write_to_trajectory_files, * because a checkpoint file will always be written * at the last step. */ fprintf(stderr, "\nWriting final coordinates.\n"); if (fr->bMolPBC) { /* Make molecules whole only for confout writing */ do_pbc_mtop(fplog, ir->ePBC, state->box, top_global, x_for_confout); } write_sto_conf_mtop(ftp2fn(efSTO, nfile, fnm), *top_global->name, top_global, x_for_confout, state_global->v, ir->ePBC, state->box); if (fr->bMolPBC && state->x == state_global->x) { sfree(x_for_confout); } debug_gmx(); } wallcycle_stop(mdoutf_get_wcycle(outf), ewcTRAJ); } }
/* TODO Specialize this routine into init-time and loop-time versions? e.g. bReadEkin is only true when restoring from checkpoint */ void compute_globals(FILE *fplog, gmx_global_stat *gstat, t_commrec *cr, t_inputrec *ir, t_forcerec *fr, gmx_ekindata_t *ekind, t_state *state, t_mdatoms *mdatoms, t_nrnb *nrnb, t_vcm *vcm, gmx_wallcycle_t wcycle, gmx_enerdata_t *enerd, tensor force_vir, tensor shake_vir, tensor total_vir, tensor pres, rvec mu_tot, gmx_constr_t constr, gmx::SimulationSignaller *signalCoordinator, matrix box, int *totalNumberOfBondedInteractions, gmx_bool *bSumEkinhOld, int flags) { tensor corr_vir, corr_pres; gmx_bool bEner, bPres, bTemp; gmx_bool bStopCM, bGStat, bReadEkin, bEkinAveVel, bScaleEkin, bConstrain; real prescorr, enercorr, dvdlcorr, dvdl_ekin; /* translate CGLO flags to gmx_booleans */ bStopCM = flags & CGLO_STOPCM; bGStat = flags & CGLO_GSTAT; bReadEkin = (flags & CGLO_READEKIN); bScaleEkin = (flags & CGLO_SCALEEKIN); bEner = flags & CGLO_ENERGY; bTemp = flags & CGLO_TEMPERATURE; bPres = (flags & CGLO_PRESSURE); bConstrain = (flags & CGLO_CONSTRAINT); /* we calculate a full state kinetic energy either with full-step velocity verlet or half step where we need the pressure */ bEkinAveVel = (ir->eI == eiVV || (ir->eI == eiVVAK && bPres) || bReadEkin); /* in initalization, it sums the shake virial in vv, and to sums ekinh_old in leapfrog (or if we are calculating ekinh_old) for other reasons */ /* ########## Kinetic energy ############## */ if (bTemp) { /* Non-equilibrium MD: this is parallellized, but only does communication * when there really is NEMD. */ if (PAR(cr) && (ekind->bNEMD)) { accumulate_u(cr, &(ir->opts), ekind); } if (!bReadEkin) { calc_ke_part(state, &(ir->opts), mdatoms, ekind, nrnb, bEkinAveVel); } } /* Calculate center of mass velocity if necessary, also parallellized */ if (bStopCM) { calc_vcm_grp(0, mdatoms->homenr, mdatoms, state->x, state->v, vcm); } if (bTemp || bStopCM || bPres || bEner || bConstrain) { if (!bGStat) { /* We will not sum ekinh_old, * so signal that we still have to do it. */ *bSumEkinhOld = TRUE; } else { gmx::ArrayRef<real> signalBuffer = signalCoordinator->getCommunicationBuffer(); if (PAR(cr)) { wallcycle_start(wcycle, ewcMoveE); global_stat(gstat, cr, enerd, force_vir, shake_vir, mu_tot, ir, ekind, constr, bStopCM ? vcm : NULL, signalBuffer.size(), signalBuffer.data(), totalNumberOfBondedInteractions, *bSumEkinhOld, flags); wallcycle_stop(wcycle, ewcMoveE); } signalCoordinator->finalizeSignals(); *bSumEkinhOld = FALSE; } } if (!ekind->bNEMD && debug && bTemp && (vcm->nr > 0)) { correct_ekin(debug, 0, mdatoms->homenr, state->v, vcm->group_p[0], mdatoms->massT, mdatoms->tmass, ekind->ekin); } /* Do center of mass motion removal */ if (bStopCM) { check_cm_grp(fplog, vcm, ir, 1); do_stopcm_grp(0, mdatoms->homenr, mdatoms->cVCM, state->x, state->v, vcm); inc_nrnb(nrnb, eNR_STOPCM, mdatoms->homenr); } if (bEner) { /* Calculate the amplitude of the cosine velocity profile */ ekind->cosacc.vcos = ekind->cosacc.mvcos/mdatoms->tmass; } if (bTemp) { /* Sum the kinetic energies of the groups & calc temp */ /* compute full step kinetic energies if vv, or if vv-avek and we are computing the pressure with inputrecNptTrotter */ /* three maincase: VV with AveVel (md-vv), vv with AveEkin (md-vv-avek), leap with AveEkin (md). Leap with AveVel is not supported; it's not clear that it will actually work. bEkinAveVel: If TRUE, we simply multiply ekin by ekinscale to get a full step kinetic energy. If FALSE, we average ekinh_old and ekinh*ekinscale_nhc to get an averaged half step kinetic energy. */ enerd->term[F_TEMP] = sum_ekin(&(ir->opts), ekind, &dvdl_ekin, bEkinAveVel, bScaleEkin); enerd->dvdl_lin[efptMASS] = (double) dvdl_ekin; enerd->term[F_EKIN] = trace(ekind->ekin); } /* ########## Long range energy information ###### */ if (bEner || bPres || bConstrain) { calc_dispcorr(ir, fr, box, state->lambda[efptVDW], corr_pres, corr_vir, &prescorr, &enercorr, &dvdlcorr); } if (bEner) { enerd->term[F_DISPCORR] = enercorr; enerd->term[F_EPOT] += enercorr; enerd->term[F_DVDL_VDW] += dvdlcorr; } /* ########## Now pressure ############## */ if (bPres || bConstrain) { m_add(force_vir, shake_vir, total_vir); /* Calculate pressure and apply LR correction if PPPM is used. * Use the box from last timestep since we already called update(). */ enerd->term[F_PRES] = calc_pres(fr->ePBC, ir->nwall, box, ekind->ekin, total_vir, pres); /* Calculate long range corrections to pressure and energy */ /* this adds to enerd->term[F_PRES] and enerd->term[F_ETOT], and computes enerd->term[F_DISPCORR]. Also modifies the total_vir and pres tesors */ m_add(total_vir, corr_vir, total_vir); m_add(pres, corr_pres, pres); enerd->term[F_PDISPCORR] = prescorr; enerd->term[F_PRES] += prescorr; } }
void do_md_trajectory_writing(FILE *fplog, t_commrec *cr, int nfile, const t_filenm fnm[], gmx_int64_t step, gmx_int64_t step_rel, double t, t_inputrec *ir, t_state *state, t_state *state_global, gmx_mtop_t *top_global, t_forcerec *fr, gmx_mdoutf_t outf, t_mdebin *mdebin, gmx_ekindata_t *ekind, rvec *f, rvec *f_global, int *nchkpt, gmx_bool bCPT, gmx_bool bRerunMD, gmx_bool bLastStep, gmx_bool bDoConfOut, gmx_bool bSumEkinhOld ) { int mdof_flags; mdof_flags = 0; if (do_per_step(step, ir->nstxout)) { mdof_flags |= MDOF_X; } if (do_per_step(step, ir->nstvout)) { mdof_flags |= MDOF_V; } if (do_per_step(step, ir->nstfout)) { mdof_flags |= MDOF_F; } if (do_per_step(step, ir->nstxout_compressed)) { mdof_flags |= MDOF_X_COMPRESSED; } if (bCPT) { mdof_flags |= MDOF_CPT; } ; #if defined(GMX_FAHCORE) || defined(GMX_WRITELASTSTEP) if (bLastStep) { /* Enforce writing positions and velocities at end of run */ mdof_flags |= (MDOF_X | MDOF_V); } #endif #ifdef GMX_FAHCORE if (MASTER(cr)) { fcReportProgress( ir->nsteps, step ); } #if defined(__native_client__) fcCheckin(MASTER(cr)); #endif /* sync bCPT and fc record-keeping */ if (bCPT && MASTER(cr)) { fcRequestCheckPoint(); } #endif if (mdof_flags != 0) { wallcycle_start(mdoutf_get_wcycle(outf), ewcTRAJ); if (bCPT) { if (MASTER(cr)) { if (bSumEkinhOld) { state_global->ekinstate.bUpToDate = FALSE; } else { update_ekinstate(&state_global->ekinstate, ekind); state_global->ekinstate.bUpToDate = TRUE; } update_energyhistory(&state_global->enerhist, mdebin); } } mdoutf_write_to_trajectory_files(fplog, cr, outf, mdof_flags, top_global, step, t, state, state_global, f, f_global); if (bCPT) { (*nchkpt)++; } debug_gmx(); if (bLastStep && step_rel == ir->nsteps && bDoConfOut && MASTER(cr) && !bRerunMD) { /* x and v have been collected in mdoutf_write_to_trajectory_files, * because a checkpoint file will always be written * at the last step. */ fprintf(stderr, "\nWriting final coordinates.\n"); if (fr->bMolPBC) { /* Make molecules whole only for confout writing */ do_pbc_mtop(fplog, ir->ePBC, state->box, top_global, state_global->x); } write_sto_conf_mtop(ftp2fn(efSTO, nfile, fnm), *top_global->name, top_global, state_global->x, state_global->v, ir->ePBC, state->box); debug_gmx(); } wallcycle_stop(mdoutf_get_wcycle(outf), ewcTRAJ); } }
int mdrunner(gmx_hw_opt_t *hw_opt, FILE *fplog, t_commrec *cr, int nfile, const t_filenm fnm[], const output_env_t oenv, gmx_bool bVerbose, gmx_bool bCompact, int nstglobalcomm, ivec ddxyz, int dd_node_order, real rdd, real rconstr, const char *dddlb_opt, real dlb_scale, const char *ddcsx, const char *ddcsy, const char *ddcsz, const char *nbpu_opt, int nstlist_cmdline, gmx_int64_t nsteps_cmdline, int nstepout, int resetstep, int gmx_unused nmultisim, int repl_ex_nst, int repl_ex_nex, int repl_ex_seed, real pforce, real cpt_period, real max_hours, int imdport, unsigned long Flags) { gmx_bool bForceUseGPU, bTryUseGPU, bRerunMD; t_inputrec *inputrec; t_state *state = NULL; matrix box; gmx_ddbox_t ddbox = {0}; int npme_major, npme_minor; t_nrnb *nrnb; gmx_mtop_t *mtop = NULL; t_mdatoms *mdatoms = NULL; t_forcerec *fr = NULL; t_fcdata *fcd = NULL; real ewaldcoeff_q = 0; real ewaldcoeff_lj = 0; struct gmx_pme_t **pmedata = NULL; gmx_vsite_t *vsite = NULL; gmx_constr_t constr; int nChargePerturbed = -1, nTypePerturbed = 0, status; gmx_wallcycle_t wcycle; gmx_bool bReadEkin; gmx_walltime_accounting_t walltime_accounting = NULL; int rc; gmx_int64_t reset_counters; gmx_edsam_t ed = NULL; int nthreads_pme = 1; int nthreads_pp = 1; gmx_membed_t membed = NULL; gmx_hw_info_t *hwinfo = NULL; /* The master rank decides early on bUseGPU and broadcasts this later */ gmx_bool bUseGPU = FALSE; /* CAUTION: threads may be started later on in this function, so cr doesn't reflect the final parallel state right now */ snew(inputrec, 1); snew(mtop, 1); if (Flags & MD_APPENDFILES) { fplog = NULL; } bRerunMD = (Flags & MD_RERUN); bForceUseGPU = (strncmp(nbpu_opt, "gpu", 3) == 0); bTryUseGPU = (strncmp(nbpu_opt, "auto", 4) == 0) || bForceUseGPU; /* Detect hardware, gather information. This is an operation that is * global for this process (MPI rank). */ hwinfo = gmx_detect_hardware(fplog, cr, bTryUseGPU); gmx_print_detected_hardware(fplog, cr, hwinfo); if (fplog != NULL) { /* Print references after all software/hardware printing */ please_cite(fplog, "Abraham2015"); please_cite(fplog, "Pall2015"); please_cite(fplog, "Pronk2013"); please_cite(fplog, "Hess2008b"); please_cite(fplog, "Spoel2005a"); please_cite(fplog, "Lindahl2001a"); please_cite(fplog, "Berendsen95a"); } snew(state, 1); if (SIMMASTER(cr)) { /* Read (nearly) all data required for the simulation */ read_tpx_state(ftp2fn(efTPR, nfile, fnm), inputrec, state, NULL, mtop); if (inputrec->cutoff_scheme == ecutsVERLET) { /* Here the master rank decides if all ranks will use GPUs */ bUseGPU = (hwinfo->gpu_info.n_dev_compatible > 0 || getenv("GMX_EMULATE_GPU") != NULL); /* TODO add GPU kernels for this and replace this check by: * (bUseGPU && (ir->vdwtype == evdwPME && * ir->ljpme_combination_rule == eljpmeLB)) * update the message text and the content of nbnxn_acceleration_supported. */ if (bUseGPU && !nbnxn_gpu_acceleration_supported(fplog, cr, inputrec, bRerunMD)) { /* Fallback message printed by nbnxn_acceleration_supported */ if (bForceUseGPU) { gmx_fatal(FARGS, "GPU acceleration requested, but not supported with the given input settings"); } bUseGPU = FALSE; } prepare_verlet_scheme(fplog, cr, inputrec, nstlist_cmdline, mtop, state->box, bUseGPU); } else { if (nstlist_cmdline > 0) { gmx_fatal(FARGS, "Can not set nstlist with the group cut-off scheme"); } if (hwinfo->gpu_info.n_dev_compatible > 0) { md_print_warn(cr, fplog, "NOTE: GPU(s) found, but the current simulation can not use GPUs\n" " To use a GPU, set the mdp option: cutoff-scheme = Verlet\n"); } if (bForceUseGPU) { gmx_fatal(FARGS, "GPU requested, but can't be used without cutoff-scheme=Verlet"); } #ifdef GMX_TARGET_BGQ md_print_warn(cr, fplog, "NOTE: There is no SIMD implementation of the group scheme kernels on\n" " BlueGene/Q. You will observe better performance from using the\n" " Verlet cut-off scheme.\n"); #endif } if (inputrec->eI == eiSD2) { md_print_warn(cr, fplog, "The stochastic dynamics integrator %s is deprecated, since\n" "it is slower than integrator %s and is slightly less accurate\n" "with constraints. Use the %s integrator.", ei_names[inputrec->eI], ei_names[eiSD1], ei_names[eiSD1]); } } /* Check and update the hardware options for internal consistency */ check_and_update_hw_opt_1(hw_opt, cr); /* Early check for externally set process affinity. */ gmx_check_thread_affinity_set(fplog, cr, hw_opt, hwinfo->nthreads_hw_avail, FALSE); #ifdef GMX_THREAD_MPI if (SIMMASTER(cr)) { if (cr->npmenodes > 0 && hw_opt->nthreads_tmpi <= 0) { gmx_fatal(FARGS, "You need to explicitly specify the number of MPI threads (-ntmpi) when using separate PME ranks"); } /* Since the master knows the cut-off scheme, update hw_opt for this. * This is done later for normal MPI and also once more with tMPI * for all tMPI ranks. */ check_and_update_hw_opt_2(hw_opt, inputrec->cutoff_scheme); /* NOW the threads will be started: */ hw_opt->nthreads_tmpi = get_nthreads_mpi(hwinfo, hw_opt, inputrec, mtop, cr, fplog, bUseGPU); if (hw_opt->nthreads_tmpi > 1) { t_commrec *cr_old = cr; /* now start the threads. */ cr = mdrunner_start_threads(hw_opt, fplog, cr_old, nfile, fnm, oenv, bVerbose, bCompact, nstglobalcomm, ddxyz, dd_node_order, rdd, rconstr, dddlb_opt, dlb_scale, ddcsx, ddcsy, ddcsz, nbpu_opt, nstlist_cmdline, nsteps_cmdline, nstepout, resetstep, nmultisim, repl_ex_nst, repl_ex_nex, repl_ex_seed, pforce, cpt_period, max_hours, Flags); /* the main thread continues here with a new cr. We don't deallocate the old cr because other threads may still be reading it. */ if (cr == NULL) { gmx_comm("Failed to spawn threads"); } } } #endif /* END OF CAUTION: cr is now reliable */ /* g_membed initialisation * * Because we change the mtop, init_membed is called before the init_parallel * * (in case we ever want to make it run in parallel) */ if (opt2bSet("-membed", nfile, fnm)) { if (MASTER(cr)) { fprintf(stderr, "Initializing membed"); } membed = init_membed(fplog, nfile, fnm, mtop, inputrec, state, cr, &cpt_period); } if (PAR(cr)) { /* now broadcast everything to the non-master nodes/threads: */ init_parallel(cr, inputrec, mtop); /* The master rank decided on the use of GPUs, * broadcast this information to all ranks. */ gmx_bcast_sim(sizeof(bUseGPU), &bUseGPU, cr); } if (fplog != NULL) { pr_inputrec(fplog, 0, "Input Parameters", inputrec, FALSE); fprintf(fplog, "\n"); } /* now make sure the state is initialized and propagated */ set_state_entries(state, inputrec); /* A parallel command line option consistency check that we can only do after any threads have started. */ if (!PAR(cr) && (ddxyz[XX] > 1 || ddxyz[YY] > 1 || ddxyz[ZZ] > 1 || cr->npmenodes > 0)) { gmx_fatal(FARGS, "The -dd or -npme option request a parallel simulation, " #ifndef GMX_MPI "but %s was compiled without threads or MPI enabled" #else #ifdef GMX_THREAD_MPI "but the number of threads (option -nt) is 1" #else "but %s was not started through mpirun/mpiexec or only one rank was requested through mpirun/mpiexec" #endif #endif , output_env_get_program_display_name(oenv) ); } if (bRerunMD && (EI_ENERGY_MINIMIZATION(inputrec->eI) || eiNM == inputrec->eI)) { gmx_fatal(FARGS, "The .mdp file specified an energy mininization or normal mode algorithm, and these are not compatible with mdrun -rerun"); } if (can_use_allvsall(inputrec, TRUE, cr, fplog) && DOMAINDECOMP(cr)) { gmx_fatal(FARGS, "All-vs-all loops do not work with domain decomposition, use a single MPI rank"); } if (!(EEL_PME(inputrec->coulombtype) || EVDW_PME(inputrec->vdwtype))) { if (cr->npmenodes > 0) { gmx_fatal_collective(FARGS, cr, NULL, "PME-only ranks are requested, but the system does not use PME for electrostatics or LJ"); } cr->npmenodes = 0; } if (bUseGPU && cr->npmenodes < 0) { /* With GPUs we don't automatically use PME-only ranks. PME ranks can * improve performance with many threads per GPU, since our OpenMP * scaling is bad, but it's difficult to automate the setup. */ cr->npmenodes = 0; } #ifdef GMX_FAHCORE if (MASTER(cr)) { fcRegisterSteps(inputrec->nsteps, inputrec->init_step); } #endif /* NMR restraints must be initialized before load_checkpoint, * since with time averaging the history is added to t_state. * For proper consistency check we therefore need to extend * t_state here. * So the PME-only nodes (if present) will also initialize * the distance restraints. */ snew(fcd, 1); /* This needs to be called before read_checkpoint to extend the state */ init_disres(fplog, mtop, inputrec, cr, fcd, state, repl_ex_nst > 0); init_orires(fplog, mtop, state->x, inputrec, cr, &(fcd->orires), state); if (DEFORM(*inputrec)) { /* Store the deform reference box before reading the checkpoint */ if (SIMMASTER(cr)) { copy_mat(state->box, box); } if (PAR(cr)) { gmx_bcast(sizeof(box), box, cr); } /* Because we do not have the update struct available yet * in which the reference values should be stored, * we store them temporarily in static variables. * This should be thread safe, since they are only written once * and with identical values. */ tMPI_Thread_mutex_lock(&deform_init_box_mutex); deform_init_init_step_tpx = inputrec->init_step; copy_mat(box, deform_init_box_tpx); tMPI_Thread_mutex_unlock(&deform_init_box_mutex); } if (opt2bSet("-cpi", nfile, fnm)) { /* Check if checkpoint file exists before doing continuation. * This way we can use identical input options for the first and subsequent runs... */ if (gmx_fexist_master(opt2fn_master("-cpi", nfile, fnm, cr), cr) ) { load_checkpoint(opt2fn_master("-cpi", nfile, fnm, cr), &fplog, cr, ddxyz, inputrec, state, &bReadEkin, (Flags & MD_APPENDFILES), (Flags & MD_APPENDFILESSET)); if (bReadEkin) { Flags |= MD_READ_EKIN; } } } if (MASTER(cr) && (Flags & MD_APPENDFILES)) { gmx_log_open(ftp2fn(efLOG, nfile, fnm), cr, Flags, &fplog); } /* override nsteps with value from cmdline */ override_nsteps_cmdline(fplog, nsteps_cmdline, inputrec, cr); if (SIMMASTER(cr)) { copy_mat(state->box, box); } if (PAR(cr)) { gmx_bcast(sizeof(box), box, cr); } /* Essential dynamics */ if (opt2bSet("-ei", nfile, fnm)) { /* Open input and output files, allocate space for ED data structure */ ed = ed_open(mtop->natoms, &state->edsamstate, nfile, fnm, Flags, oenv, cr); } if (PAR(cr) && !(EI_TPI(inputrec->eI) || inputrec->eI == eiNM)) { cr->dd = init_domain_decomposition(fplog, cr, Flags, ddxyz, rdd, rconstr, dddlb_opt, dlb_scale, ddcsx, ddcsy, ddcsz, mtop, inputrec, box, state->x, &ddbox, &npme_major, &npme_minor); make_dd_communicators(fplog, cr, dd_node_order); /* Set overallocation to avoid frequent reallocation of arrays */ set_over_alloc_dd(TRUE); } else { /* PME, if used, is done on all nodes with 1D decomposition */ cr->npmenodes = 0; cr->duty = (DUTY_PP | DUTY_PME); npme_major = 1; npme_minor = 1; if (inputrec->ePBC == epbcSCREW) { gmx_fatal(FARGS, "pbc=%s is only implemented with domain decomposition", epbc_names[inputrec->ePBC]); } } if (PAR(cr)) { /* After possible communicator splitting in make_dd_communicators. * we can set up the intra/inter node communication. */ gmx_setup_nodecomm(fplog, cr); } /* Initialize per-physical-node MPI process/thread ID and counters. */ gmx_init_intranode_counters(cr); #ifdef GMX_MPI if (MULTISIM(cr)) { md_print_info(cr, fplog, "This is simulation %d out of %d running as a composite GROMACS\n" "multi-simulation job. Setup for this simulation:\n\n", cr->ms->sim, cr->ms->nsim); } md_print_info(cr, fplog, "Using %d MPI %s\n", cr->nnodes, #ifdef GMX_THREAD_MPI cr->nnodes == 1 ? "thread" : "threads" #else cr->nnodes == 1 ? "process" : "processes" #endif ); fflush(stderr); #endif /* Check and update hw_opt for the cut-off scheme */ check_and_update_hw_opt_2(hw_opt, inputrec->cutoff_scheme); /* Check and update hw_opt for the number of MPI ranks */ check_and_update_hw_opt_3(hw_opt); gmx_omp_nthreads_init(fplog, cr, hwinfo->nthreads_hw_avail, hw_opt->nthreads_omp, hw_opt->nthreads_omp_pme, (cr->duty & DUTY_PP) == 0, inputrec->cutoff_scheme == ecutsVERLET); #ifndef NDEBUG if (integrator[inputrec->eI].func != do_tpi && inputrec->cutoff_scheme == ecutsVERLET) { gmx_feenableexcept(); } #endif if (bUseGPU) { /* Select GPU id's to use */ gmx_select_gpu_ids(fplog, cr, &hwinfo->gpu_info, bForceUseGPU, &hw_opt->gpu_opt); } else { /* Ignore (potentially) manually selected GPUs */ hw_opt->gpu_opt.n_dev_use = 0; } /* check consistency across ranks of things like SIMD * support and number of GPUs selected */ gmx_check_hw_runconf_consistency(fplog, hwinfo, cr, hw_opt, bUseGPU); /* Now that we know the setup is consistent, check for efficiency */ check_resource_division_efficiency(hwinfo, hw_opt, Flags & MD_NTOMPSET, cr, fplog); if (DOMAINDECOMP(cr)) { /* When we share GPUs over ranks, we need to know this for the DLB */ dd_setup_dlb_resource_sharing(cr, hwinfo, hw_opt); } /* getting number of PP/PME threads PME: env variable should be read only on one node to make sure it is identical everywhere; */ /* TODO nthreads_pp is only used for pinning threads. * This is a temporary solution until we have a hw topology library. */ nthreads_pp = gmx_omp_nthreads_get(emntNonbonded); nthreads_pme = gmx_omp_nthreads_get(emntPME); wcycle = wallcycle_init(fplog, resetstep, cr, nthreads_pp, nthreads_pme); if (PAR(cr)) { /* Master synchronizes its value of reset_counters with all nodes * including PME only nodes */ reset_counters = wcycle_get_reset_counters(wcycle); gmx_bcast_sim(sizeof(reset_counters), &reset_counters, cr); wcycle_set_reset_counters(wcycle, reset_counters); } snew(nrnb, 1); if (cr->duty & DUTY_PP) { bcast_state(cr, state); /* Initiate forcerecord */ fr = mk_forcerec(); fr->hwinfo = hwinfo; fr->gpu_opt = &hw_opt->gpu_opt; init_forcerec(fplog, oenv, fr, fcd, inputrec, mtop, cr, box, opt2fn("-table", nfile, fnm), opt2fn("-tabletf", nfile, fnm), opt2fn("-tablep", nfile, fnm), opt2fn("-tableb", nfile, fnm), nbpu_opt, FALSE, pforce); /* version for PCA_NOT_READ_NODE (see md.c) */ /*init_forcerec(fplog,fr,fcd,inputrec,mtop,cr,box,FALSE, "nofile","nofile","nofile","nofile",FALSE,pforce); */ /* Initialize QM-MM */ if (fr->bQMMM) { init_QMMMrec(cr, mtop, inputrec, fr); } /* Initialize the mdatoms structure. * mdatoms is not filled with atom data, * as this can not be done now with domain decomposition. */ mdatoms = init_mdatoms(fplog, mtop, inputrec->efep != efepNO); /* Initialize the virtual site communication */ vsite = init_vsite(mtop, cr, FALSE); calc_shifts(box, fr->shift_vec); /* With periodic molecules the charge groups should be whole at start up * and the virtual sites should not be far from their proper positions. */ if (!inputrec->bContinuation && MASTER(cr) && !(inputrec->ePBC != epbcNONE && inputrec->bPeriodicMols)) { /* Make molecules whole at start of run */ if (fr->ePBC != epbcNONE) { do_pbc_first_mtop(fplog, inputrec->ePBC, box, mtop, state->x); } if (vsite) { /* Correct initial vsite positions are required * for the initial distribution in the domain decomposition * and for the initial shell prediction. */ construct_vsites_mtop(vsite, mtop, state->x); } } if (EEL_PME(fr->eeltype) || EVDW_PME(fr->vdwtype)) { ewaldcoeff_q = fr->ewaldcoeff_q; ewaldcoeff_lj = fr->ewaldcoeff_lj; pmedata = &fr->pmedata; } else { pmedata = NULL; } } else { /* This is a PME only node */ /* We don't need the state */ done_state(state); ewaldcoeff_q = calc_ewaldcoeff_q(inputrec->rcoulomb, inputrec->ewald_rtol); ewaldcoeff_lj = calc_ewaldcoeff_lj(inputrec->rvdw, inputrec->ewald_rtol_lj); snew(pmedata, 1); } if (hw_opt->thread_affinity != threadaffOFF) { /* Before setting affinity, check whether the affinity has changed * - which indicates that probably the OpenMP library has changed it * since we first checked). */ gmx_check_thread_affinity_set(fplog, cr, hw_opt, hwinfo->nthreads_hw_avail, TRUE); /* Set the CPU affinity */ gmx_set_thread_affinity(fplog, cr, hw_opt, hwinfo); } /* Initiate PME if necessary, * either on all nodes or on dedicated PME nodes only. */ if (EEL_PME(inputrec->coulombtype) || EVDW_PME(inputrec->vdwtype)) { if (mdatoms) { nChargePerturbed = mdatoms->nChargePerturbed; if (EVDW_PME(inputrec->vdwtype)) { nTypePerturbed = mdatoms->nTypePerturbed; } } if (cr->npmenodes > 0) { /* The PME only nodes need to know nChargePerturbed(FEP on Q) and nTypePerturbed(FEP on LJ)*/ gmx_bcast_sim(sizeof(nChargePerturbed), &nChargePerturbed, cr); gmx_bcast_sim(sizeof(nTypePerturbed), &nTypePerturbed, cr); } if (cr->duty & DUTY_PME) { status = gmx_pme_init(pmedata, cr, npme_major, npme_minor, inputrec, mtop ? mtop->natoms : 0, nChargePerturbed, nTypePerturbed, (Flags & MD_REPRODUCIBLE), nthreads_pme); if (status != 0) { gmx_fatal(FARGS, "Error %d initializing PME", status); } } } if (integrator[inputrec->eI].func == do_md) { /* Turn on signal handling on all nodes */ /* * (A user signal from the PME nodes (if any) * is communicated to the PP nodes. */ signal_handler_install(); } if (cr->duty & DUTY_PP) { /* Assumes uniform use of the number of OpenMP threads */ walltime_accounting = walltime_accounting_init(gmx_omp_nthreads_get(emntDefault)); if (inputrec->bPull) { /* Initialize pull code */ inputrec->pull_work = init_pull(fplog, inputrec->pull, inputrec, nfile, fnm, mtop, cr, oenv, inputrec->fepvals->init_lambda, EI_DYNAMICS(inputrec->eI) && MASTER(cr), Flags); } if (inputrec->bRot) { /* Initialize enforced rotation code */ init_rot(fplog, inputrec, nfile, fnm, cr, state->x, box, mtop, oenv, bVerbose, Flags); } if (inputrec->eSwapCoords != eswapNO) { /* Initialize ion swapping code */ init_swapcoords(fplog, bVerbose, inputrec, opt2fn_master("-swap", nfile, fnm, cr), mtop, state->x, state->box, &state->swapstate, cr, oenv, Flags); } constr = init_constraints(fplog, mtop, inputrec, ed, state, cr); if (DOMAINDECOMP(cr)) { GMX_RELEASE_ASSERT(fr, "fr was NULL while cr->duty was DUTY_PP"); dd_init_bondeds(fplog, cr->dd, mtop, vsite, inputrec, Flags & MD_DDBONDCHECK, fr->cginfo_mb); set_dd_parameters(fplog, cr->dd, dlb_scale, inputrec, &ddbox); setup_dd_grid(fplog, cr->dd); } /* Now do whatever the user wants us to do (how flexible...) */ integrator[inputrec->eI].func(fplog, cr, nfile, fnm, oenv, bVerbose, bCompact, nstglobalcomm, vsite, constr, nstepout, inputrec, mtop, fcd, state, mdatoms, nrnb, wcycle, ed, fr, repl_ex_nst, repl_ex_nex, repl_ex_seed, membed, cpt_period, max_hours, imdport, Flags, walltime_accounting); if (inputrec->bPull) { finish_pull(inputrec->pull_work); } if (inputrec->bRot) { finish_rot(inputrec->rot); } } else { GMX_RELEASE_ASSERT(pmedata, "pmedata was NULL while cr->duty was not DUTY_PP"); /* do PME only */ walltime_accounting = walltime_accounting_init(gmx_omp_nthreads_get(emntPME)); gmx_pmeonly(*pmedata, cr, nrnb, wcycle, walltime_accounting, ewaldcoeff_q, ewaldcoeff_lj, inputrec); } wallcycle_stop(wcycle, ewcRUN); /* Finish up, write some stuff * if rerunMD, don't write last frame again */ finish_run(fplog, cr, inputrec, nrnb, wcycle, walltime_accounting, fr ? fr->nbv : NULL, EI_DYNAMICS(inputrec->eI) && !MULTISIM(cr)); /* Free GPU memory and context */ free_gpu_resources(fr, cr, &hwinfo->gpu_info, fr ? fr->gpu_opt : NULL); if (opt2bSet("-membed", nfile, fnm)) { sfree(membed); } gmx_hardware_info_free(hwinfo); /* Does what it says */ print_date_and_time(fplog, cr->nodeid, "Finished mdrun", gmx_gettime()); walltime_accounting_destroy(walltime_accounting); /* PLUMED */ if(plumedswitch){ plumed_finalize(plumedmain); } /* END PLUMED */ /* Close logfile already here if we were appending to it */ if (MASTER(cr) && (Flags & MD_APPENDFILES)) { gmx_log_close(fplog); } rc = (int)gmx_get_stop_condition(); done_ed(&ed); #ifdef GMX_THREAD_MPI /* we need to join all threads. The sub-threads join when they exit this function, but the master thread needs to be told to wait for that. */ if (PAR(cr) && MASTER(cr)) { tMPI_Finalize(); } #endif return rc; }