std::unique_ptr<BoxDeformation> prepareBoxDeformation(const matrix &initialBox, t_commrec *cr, const t_inputrec &inputrec) { if (!inputrecDeform(&inputrec)) { return nullptr; } if (!EI_DYNAMICS(inputrec.eI)) { GMX_THROW(NotImplementedError("Box deformation is only supported with dynamical integrators")); } matrix box; // Only the rank that read the tpr has the global state, and thus // the initial box, so we pass that around. if (SIMMASTER(cr)) { copy_mat(initialBox, box); } if (PAR(cr)) { gmx_bcast(sizeof(box), box, cr); } return compat::make_unique<BoxDeformation>(inputrec.delta_t, inputrec.init_step, inputrec.deform, box); }
void md_print_warn(const t_commrec *cr, FILE *fplog, const char *fmt, ...) { va_list ap; if (cr == NULL || SIMMASTER(cr)) { va_start(ap, fmt); fprintf(stderr, "\n"); vfprintf(stderr, fmt, ap); fprintf(stderr, "\n"); va_end(ap); } if (fplog != NULL) { va_start(ap, fmt); fprintf(fplog, "\n"); vfprintf(fplog, fmt, ap); fprintf(fplog, "\n"); va_end(ap); } }
static bool invalidWithinSimulation(const t_commrec *cr, bool invalidLocally) { #ifdef GMX_MPI int value = invalidLocally ? 1 : 0; int globalValue; MPI_Reduce(&value, &globalValue, 1, MPI_INT, MPI_LAND, MASTERRANK(cr), cr->mpi_comm_mysim); return SIMMASTER(cr) ? (globalValue != 0) : invalidLocally; #else return invalidLocally; #endif }
gmx_bool gmx_fexist_master(const char *fname, t_commrec *cr) { gmx_bool bExist; if (SIMMASTER(cr)) { bExist = gmx_fexist(fname); } if (PAR(cr)) { gmx_bcast(sizeof(bExist),&bExist,cr); } return bExist; }
void gmx_check_hw_runconf_consistency(FILE *fplog, const gmx_hw_info_t *hwinfo, const t_commrec *cr, const gmx_hw_opt_t *hw_opt, gmx_bool bUseGPU) { int npppn; char th_or_proc[STRLEN], th_or_proc_plural[STRLEN], pernode[STRLEN]; gmx_bool btMPI, bMPI, bMaxMpiThreadsSet, bNthreadsAuto, bEmulateGPU; assert(hwinfo); assert(cr); /* Below we only do consistency checks for PP and GPUs, * this is irrelevant for PME only nodes, so in that case we return * here. */ if (!(cr->duty & DUTY_PP)) { return; } #if defined(GMX_THREAD_MPI) bMPI = FALSE; btMPI = TRUE; bNthreadsAuto = (hw_opt->nthreads_tmpi < 1); #elif defined(GMX_LIB_MPI) bMPI = TRUE; btMPI = FALSE; bNthreadsAuto = FALSE; #else bMPI = FALSE; btMPI = FALSE; bNthreadsAuto = FALSE; #endif /* GPU emulation detection is done later, but we need here as well * -- uncool, but there's no elegant workaround */ bEmulateGPU = (getenv("GMX_EMULATE_GPU") != NULL); bMaxMpiThreadsSet = (getenv("GMX_MAX_MPI_THREADS") != NULL); /* check the SIMD level mdrun is compiled with against hardware capabilities */ /* TODO: Here we assume homogeneous hardware which is not necessarily the case! Might not hurt to add an extra check over MPI. */ gmx_cpuid_simd_check(hwinfo->cpuid_info, fplog, SIMMASTER(cr)); check_use_of_rdtscp_on_this_cpu(fplog, cr, hwinfo); /* NOTE: this print is only for and on one physical node */ print_gpu_detection_stats(fplog, &hwinfo->gpu_info, cr); if (hwinfo->gpu_info.ncuda_dev_compatible > 0) { std::string gpuUseageReport; try { gpuUseageReport = makeGpuUsageReport(&hwinfo->gpu_info, &hw_opt->gpu_opt, cr->nrank_pp_intranode); } GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; /* NOTE: this print is only for and on one physical node */ md_print_info(cr, fplog, gpuUseageReport.c_str()); }
/* Check the process affinity mask and if it is found to be non-zero, * will honor it and disable mdrun internal affinity setting. * Note that this will only work on Linux as we use a GNU feature. */ void gmx_check_thread_affinity_set(FILE *fplog, const t_commrec *cr, gmx_hw_opt_t *hw_opt, int gmx_unused nthreads_hw_avail, gmx_bool bAfterOpenmpInit) { #ifdef HAVE_SCHED_AFFINITY cpu_set_t mask_current; int i, ret, cpu_count, cpu_set; gmx_bool bAllSet; #endif #ifdef GMX_LIB_MPI gmx_bool bAllSet_All; #endif assert(hw_opt); if (!bAfterOpenmpInit) { /* Check for externally set OpenMP affinity and turn off internal * pinning if any is found. We need to do this check early to tell * thread-MPI whether it should do pinning when spawning threads. * TODO: the above no longer holds, we should move these checks later */ if (hw_opt->thread_affinity != threadaffOFF) { char *message; if (!gmx_omp_check_thread_affinity(&message)) { /* TODO: with -pin auto we should only warn when using all cores */ md_print_warn(cr, fplog, "%s", message); sfree(message); hw_opt->thread_affinity = threadaffOFF; } } /* With thread-MPI this is needed as pinning might get turned off, * which needs to be known before starting thread-MPI. * With thread-MPI hw_opt is processed here on the master rank * and passed to the other ranks later, so we only do this on master. */ if (!SIMMASTER(cr)) { return; } #ifndef GMX_THREAD_MPI return; #endif } #ifdef HAVE_SCHED_AFFINITY if (hw_opt->thread_affinity == threadaffOFF) { /* internal affinity setting is off, don't bother checking process affinity */ return; } CPU_ZERO(&mask_current); if ((ret = sched_getaffinity(0, sizeof(cpu_set_t), &mask_current)) != 0) { /* failed to query affinity mask, will just return */ if (debug) { fprintf(debug, "Failed to query affinity mask (error %d)", ret); } return; } /* Before proceeding with the actual check, make sure that the number of * detected CPUs is >= the CPUs in the current set. * We need to check for CPU_COUNT as it was added only in glibc 2.6. */ #ifdef CPU_COUNT if (nthreads_hw_avail < CPU_COUNT(&mask_current)) { if (debug) { fprintf(debug, "%d hardware threads detected, but %d was returned by CPU_COUNT", nthreads_hw_avail, CPU_COUNT(&mask_current)); } return; } #endif /* CPU_COUNT */ bAllSet = TRUE; for (i = 0; (i < nthreads_hw_avail && i < CPU_SETSIZE); i++) { bAllSet = bAllSet && (CPU_ISSET(i, &mask_current) != 0); } #ifdef GMX_LIB_MPI MPI_Allreduce(&bAllSet, &bAllSet_All, 1, MPI_INT, MPI_LAND, MPI_COMM_WORLD); bAllSet = bAllSet_All; #endif if (!bAllSet) { if (hw_opt->thread_affinity == threadaffAUTO) { if (!bAfterOpenmpInit) { md_print_warn(cr, fplog, "Non-default thread affinity set, disabling internal thread affinity"); } else { md_print_warn(cr, fplog, "Non-default thread affinity set probably by the OpenMP library,\n" "disabling internal thread affinity"); } hw_opt->thread_affinity = threadaffOFF; } else { /* Only warn once, at the last check (bAfterOpenmpInit==TRUE) */ if (bAfterOpenmpInit) { md_print_warn(cr, fplog, "Overriding thread affinity set outside %s\n", ShortProgram()); } } if (debug) { fprintf(debug, "Non-default affinity mask found\n"); } } else { if (debug) { fprintf(debug, "Default affinity mask found\n"); } } #endif /* HAVE_SCHED_AFFINITY */ }
/* Checks we can do when we don't (yet) know the cut-off scheme */ void check_and_update_hw_opt_1(gmx_hw_opt_t *hw_opt, const t_commrec *cr) { /* Currently hw_opt only contains default settings or settings supplied * by the user on the command line. * Check for OpenMP settings stored in environment variables, which can * potentially be different on different MPI ranks. */ gmx_omp_nthreads_read_env(&hw_opt->nthreads_omp, SIMMASTER(cr)); /* Check restrictions on the user supplied options before modifying them. * TODO: Put the user values in a const struct and preserve them. */ #ifndef GMX_THREAD_MPI if (hw_opt->nthreads_tot > 0) { gmx_fatal(FARGS, "Setting the total number of threads is only supported with thread-MPI and GROMACS was compiled without thread-MPI"); } if (hw_opt->nthreads_tmpi > 0) { gmx_fatal(FARGS, "Setting the number of thread-MPI ranks is only supported with thread-MPI and GROMACS was compiled without thread-MPI"); } #endif if (bHasOmpSupport) { /* Check restrictions on PME thread related options set by the user */ if (hw_opt->nthreads_omp_pme > 0 && hw_opt->nthreads_omp <= 0) { gmx_fatal(FARGS, "You need to specify -ntomp in addition to -ntomp_pme"); } if (hw_opt->nthreads_omp_pme >= 1 && hw_opt->nthreads_omp_pme != hw_opt->nthreads_omp && cr->npmenodes <= 0) { /* This can result in a fatal error on many MPI ranks, * but since the thread count can differ per rank, * we can't easily avoid this. */ gmx_fatal(FARGS, "You need to explicitly specify the number of PME ranks (-npme) when using different number of OpenMP threads for PP and PME ranks"); } } else { /* GROMACS was configured without OpenMP support */ if (hw_opt->nthreads_omp > 1 || hw_opt->nthreads_omp_pme > 1) { gmx_fatal(FARGS, "More than 1 OpenMP thread requested, but GROMACS was compiled without OpenMP support"); } hw_opt->nthreads_omp = 1; hw_opt->nthreads_omp_pme = 1; } if (hw_opt->nthreads_tot > 0 && hw_opt->nthreads_omp_pme <= 0) { /* We have the same number of OpenMP threads for PP and PME ranks, * thus we can perform several consistency checks. */ if (hw_opt->nthreads_tmpi > 0 && hw_opt->nthreads_omp > 0 && hw_opt->nthreads_tot != hw_opt->nthreads_tmpi*hw_opt->nthreads_omp) { gmx_fatal(FARGS, "The total number of threads requested (%d) does not match the thread-MPI ranks (%d) times the OpenMP threads (%d) requested", hw_opt->nthreads_tot, hw_opt->nthreads_tmpi, hw_opt->nthreads_omp); } if (hw_opt->nthreads_tmpi > 0 && hw_opt->nthreads_tot % hw_opt->nthreads_tmpi != 0) { gmx_fatal(FARGS, "The total number of threads requested (%d) is not divisible by the number of thread-MPI ranks requested (%d)", hw_opt->nthreads_tot, hw_opt->nthreads_tmpi); } if (hw_opt->nthreads_omp > 0 && hw_opt->nthreads_tot % hw_opt->nthreads_omp != 0) { gmx_fatal(FARGS, "The total number of threads requested (%d) is not divisible by the number of OpenMP threads requested (%d)", hw_opt->nthreads_tot, hw_opt->nthreads_omp); } } if (hw_opt->nthreads_tot == 1) { hw_opt->nthreads_tmpi = 1; if (hw_opt->nthreads_omp > 1) { gmx_fatal(FARGS, "You requested %d OpenMP threads with %d total threads", hw_opt->nthreads_tmpi, hw_opt->nthreads_tot); } hw_opt->nthreads_omp = 1; hw_opt->nthreads_omp_pme = 1; } /* Parse GPU IDs, if provided. * We check consistency with the tMPI thread count later. */ gmx_parse_gpu_ids(&hw_opt->gpu_opt); #ifdef GMX_THREAD_MPI if (hw_opt->gpu_opt.n_dev_use > 0 && hw_opt->nthreads_tmpi == 0) { /* Set the number of MPI threads equal to the number of GPUs */ hw_opt->nthreads_tmpi = hw_opt->gpu_opt.n_dev_use; if (hw_opt->nthreads_tot > 0 && hw_opt->nthreads_tmpi > hw_opt->nthreads_tot) { /* We have more GPUs than total threads requested. * We choose to (later) generate a mismatch error, * instead of launching more threads than requested. */ hw_opt->nthreads_tmpi = hw_opt->nthreads_tot; } } #endif if (debug) { print_hw_opt(debug, hw_opt); } /* Asserting this simplifies the hardware resource division later * on. */ GMX_RELEASE_ASSERT(!(hw_opt->nthreads_omp_pme >= 1 && hw_opt->nthreads_omp <= 0), "PME thread count should only be set when the normal thread count is also set"); }
const char *opt2fn_master(const char *opt, int nfile, const t_filenm fnm[], t_commrec *cr) { return SIMMASTER(cr) ? opt2fn(opt, nfile, fnm) : NULL; }
/*! \brief Support handling restarts * * \todo Clean this up (next patch) * * Read just the simulation 'generation' and with bTryToAppendFiles check files. * This is is needed at the beginning of mdrun, * to be able to rename the logfile correctly. * When file appending is requested, checks which output files are present, * and returns TRUE/FALSE in bDoAppendFiles if all or none are present. * If only some output files are present, give a fatal error. * When bDoAppendFiles is TRUE upon return, bAddPart will tell whether the simulation part * needs to be added to the output file name. * * This routine cannot print tons of data, since it is called before * the log file is opened. */ static void read_checkpoint_data(const char *filename, int *simulation_part, t_commrec *cr, gmx_bool bTryToAppendFiles, int nfile, const t_filenm fnm[], const char *part_suffix, gmx_bool *bAddPart, gmx_bool *bDoAppendFiles) { t_fileio *fp; int nfiles; gmx_file_position_t *outputfiles; int nexist, f; char *fn, suf_up[STRLEN]; *bDoAppendFiles = FALSE; if (SIMMASTER(cr)) { if (!gmx_fexist(filename) || (!(fp = gmx_fio_open(filename, "r")) )) { *simulation_part = 0; } else { read_checkpoint_simulation_part_and_filenames(fp, simulation_part, &nfiles, &outputfiles); if (bTryToAppendFiles) { nexist = 0; for (f = 0; f < nfiles; f++) { if (exist_output_file(outputfiles[f].filename, nfile, fnm)) { nexist++; } } if (nexist == nfiles) { *bDoAppendFiles = bTryToAppendFiles; } else if (nexist > 0) { fprintf(stderr, "Output file appending has been requested,\n" "but some output files listed in the checkpoint file %s\n" "are not present or are named differently by the current program:\n", filename); fprintf(stderr, "output files present:"); for (f = 0; f < nfiles; f++) { if (exist_output_file(outputfiles[f].filename, nfile, fnm)) { fprintf(stderr, " %s", outputfiles[f].filename); } } fprintf(stderr, "\n"); fprintf(stderr, "output files not present or named differently:"); for (f = 0; f < nfiles; f++) { if (!exist_output_file(outputfiles[f].filename, nfile, fnm)) { fprintf(stderr, " %s", outputfiles[f].filename); } } fprintf(stderr, "\n"); gmx_fatal(FARGS, "File appending requested, but %d of the %d output files are not present or are named differently", nfiles-nexist, nfiles); } } if (*bDoAppendFiles) { if (nfiles == 0) { gmx_fatal(FARGS, "File appending requested, but no output file information is stored in the checkpoint file"); } fn = outputfiles[0].filename; if (strlen(fn) < 4 || gmx_strcasecmp(fn+strlen(fn)-4, ftp2ext(efLOG)) == 0) { gmx_fatal(FARGS, "File appending requested, but the log file is not the first file listed in the checkpoint file"); } /* Set bAddPart to whether the suffix string '.part' is present * in the log file name. */ strcpy(suf_up, part_suffix); upstring(suf_up); *bAddPart = (strstr(fn, part_suffix) != NULL || strstr(fn, suf_up) != NULL); } sfree(outputfiles); } } if (PAR(cr)) { gmx_bcast(sizeof(*simulation_part), simulation_part, cr); if (*simulation_part > 0 && bTryToAppendFiles) { gmx_bcast(sizeof(*bDoAppendFiles), bDoAppendFiles, cr); gmx_bcast(sizeof(*bAddPart), bAddPart, cr); } } }
void finish_run(FILE *fplog,t_commrec *cr,char *confout, t_inputrec *inputrec, t_nrnb nrnb[],gmx_wallcycle_t wcycle, double nodetime,double realtime,int nsteps_done, bool bWriteStat) { int i,j; t_nrnb *nrnb_all=NULL,ntot; real delta_t; double nbfs,mflop; double cycles[ewcNR]; #ifdef GMX_MPI int sender; double nrnb_buf[4]; MPI_Status status; #endif wallcycle_sum(cr,wcycle,cycles); if (cr->nnodes > 1) { if (SIMMASTER(cr)) snew(nrnb_all,cr->nnodes); #ifdef GMX_MPI MPI_Gather(nrnb,sizeof(t_nrnb),MPI_BYTE, nrnb_all,sizeof(t_nrnb),MPI_BYTE, 0,cr->mpi_comm_mysim); #endif } else { nrnb_all = nrnb; } if (SIMMASTER(cr)) { for(i=0; (i<eNRNB); i++) ntot.n[i]=0; for(i=0; (i<cr->nnodes); i++) for(j=0; (j<eNRNB); j++) ntot.n[j] += nrnb_all[i].n[j]; print_flop(fplog,&ntot,&nbfs,&mflop); if (nrnb_all) { sfree(nrnb_all); } } if ((cr->duty & DUTY_PP) && DOMAINDECOMP(cr)) { print_dd_statistics(cr,inputrec,fplog); } if (SIMMASTER(cr)) { if (PARTDECOMP(cr)) { pr_load(fplog,cr,nrnb_all); } wallcycle_print(fplog,cr->nnodes,cr->npmenodes,realtime,wcycle,cycles); if (EI_DYNAMICS(inputrec->eI)) { delta_t = inputrec->delta_t; } else { delta_t = 0; } if (fplog) { print_perf(fplog,nodetime,realtime,cr->nnodes-cr->npmenodes, nsteps_done,delta_t,nbfs,mflop); } if (bWriteStat) { print_perf(stderr,nodetime,realtime,cr->nnodes-cr->npmenodes, nsteps_done,delta_t,nbfs,mflop); } /* runtime=inputrec->nsteps*inputrec->delta_t; if (bWriteStat) { if (cr->nnodes == 1) fprintf(stderr,"\n\n"); print_perf(stderr,nodetime,realtime,runtime,&ntot, cr->nnodes-cr->npmenodes,FALSE); } wallcycle_print(fplog,cr->nnodes,cr->npmenodes,realtime,wcycle,cycles); print_perf(fplog,nodetime,realtime,runtime,&ntot,cr->nnodes-cr->npmenodes, TRUE); if (PARTDECOMP(cr)) pr_load(fplog,cr,nrnb_all); if (cr->nnodes > 1) sfree(nrnb_all); */ } }
/*! \brief Helper function for parsing various input about the number of OpenMP threads to use in various modules and deciding what to do about it. */ static void manage_number_of_openmp_threads(const gmx::MDLogger &mdlog, const t_commrec *cr, bool bOMP, int nthreads_hw_avail, int omp_nthreads_req, int omp_nthreads_pme_req, gmx_bool gmx_unused bThisNodePMEOnly, gmx_bool bFullOmpSupport, int numRanksOnThisNode, gmx_bool bSepPME) { int nth; char *env; #if GMX_THREAD_MPI /* modth is shared among tMPI threads, so for thread safety, the * detection is done on the master only. It is not thread-safe * with multiple simulations, but that's anyway not supported by * tMPI. */ if (!SIMMASTER(cr)) { return; } #else GMX_UNUSED_VALUE(cr); #endif if (modth.initialized) { /* Just return if the initialization has already been done. This could only happen if gmx_omp_nthreads_init() has already been called. */ return; } /* With full OpenMP support (verlet scheme) set the number of threads * per process / default: * - 1 if not compiled with OpenMP or * - OMP_NUM_THREADS if the env. var is set, or * - omp_nthreads_req = #of threads requested by the user on the mdrun * command line, otherwise * - take the max number of available threads and distribute them * on the processes/tMPI threads. * ~ The GMX_*_NUM_THREADS env var overrides the number of threads of * the respective module and it has to be used in conjunction with * OMP_NUM_THREADS. * * With the group scheme OpenMP multithreading is only supported in PME, * for all other modules nthreads is set to 1. * The number of PME threads is equal to: * - 1 if not compiled with OpenMP or * - GMX_PME_NUM_THREADS if defined, otherwise * - OMP_NUM_THREADS if defined, otherwise * - 1 */ nth = 1; if ((env = getenv("OMP_NUM_THREADS")) != nullptr) { if (!bOMP && (std::strncmp(env, "1", 1) != 0)) { gmx_warning("OMP_NUM_THREADS is set, but %s was compiled without OpenMP support!", gmx::getProgramContext().displayName()); } else { nth = gmx_omp_get_max_threads(); } } else if (omp_nthreads_req > 0) { nth = omp_nthreads_req; } else if (bFullOmpSupport && bOMP) { /* max available threads per node */ nth = nthreads_hw_avail; /* divide the threads among the MPI ranks */ if (nth >= numRanksOnThisNode) { nth /= numRanksOnThisNode; } else { nth = 1; } } /* now we have the global values, set them: * - 1 if not compiled with OpenMP and for the group scheme * - nth for the verlet scheme when compiled with OpenMP */ if (bFullOmpSupport && bOMP) { modth.gnth = nth; } else { modth.gnth = 1; } if (bSepPME) { if (omp_nthreads_pme_req > 0) { modth.gnth_pme = omp_nthreads_pme_req; } else { modth.gnth_pme = nth; } } else { modth.gnth_pme = 0; } /* now set the per-module values */ modth.nth[emntDefault] = modth.gnth; pick_module_nthreads(mdlog, emntDomdec, bFullOmpSupport, bSepPME); pick_module_nthreads(mdlog, emntPairsearch, bFullOmpSupport, bSepPME); pick_module_nthreads(mdlog, emntNonbonded, bFullOmpSupport, bSepPME); pick_module_nthreads(mdlog, emntBonded, bFullOmpSupport, bSepPME); pick_module_nthreads(mdlog, emntPME, bFullOmpSupport, bSepPME); pick_module_nthreads(mdlog, emntUpdate, bFullOmpSupport, bSepPME); pick_module_nthreads(mdlog, emntVSITE, bFullOmpSupport, bSepPME); pick_module_nthreads(mdlog, emntLINCS, bFullOmpSupport, bSepPME); pick_module_nthreads(mdlog, emntSETTLE, bFullOmpSupport, bSepPME); /* set the number of threads globally */ if (bOMP) { #if !GMX_THREAD_MPI if (bThisNodePMEOnly) { gmx_omp_set_num_threads(modth.gnth_pme); } else #endif /* GMX_THREAD_MPI */ { if (bFullOmpSupport) { gmx_omp_set_num_threads(nth); } else { gmx_omp_set_num_threads(1); } } } modth.initialized = TRUE; }
void gmx_omp_nthreads_init(FILE *fplog, t_commrec *cr, int nthreads_hw_avail, int omp_nthreads_req, int omp_nthreads_pme_req, gmx_bool gmx_unused bThisNodePMEOnly, gmx_bool bFullOmpSupport) { int nth, nth_pmeonly, gmx_maxth, nppn; char *env; gmx_bool bSepPME, bOMP; #ifdef GMX_OPENMP bOMP = TRUE; #else bOMP = FALSE; #endif /* GMX_OPENMP */ /* number of MPI processes/threads per physical node */ nppn = cr->nrank_intranode; bSepPME = ( (cr->duty & DUTY_PP) && !(cr->duty & DUTY_PME)) || (!(cr->duty & DUTY_PP) && (cr->duty & DUTY_PME)); #ifdef GMX_THREAD_MPI /* modth is shared among tMPI threads, so for thread safety do the * detection is done on the master only. It is not thread-safe with * multiple simulations, but that's anyway not supported by tMPI. */ if (SIMMASTER(cr)) #endif { /* just return if the initialization has already been done */ if (modth.initialized) { return; } /* With full OpenMP support (verlet scheme) set the number of threads * per process / default: * - 1 if not compiled with OpenMP or * - OMP_NUM_THREADS if the env. var is set, or * - omp_nthreads_req = #of threads requested by the user on the mdrun * command line, otherwise * - take the max number of available threads and distribute them * on the processes/tMPI threads. * ~ The GMX_*_NUM_THREADS env var overrides the number of threads of * the respective module and it has to be used in conjunction with * OMP_NUM_THREADS. * * With the group scheme OpenMP multithreading is only supported in PME, * for all other modules nthreads is set to 1. * The number of PME threads is equal to: * - 1 if not compiled with OpenMP or * - GMX_PME_NUM_THREADS if defined, otherwise * - OMP_NUM_THREADS if defined, otherwise * - 1 */ nth = 1; if ((env = getenv("OMP_NUM_THREADS")) != NULL) { if (!bOMP && (strncmp(env, "1", 1) != 0)) { gmx_warning("OMP_NUM_THREADS is set, but %s was compiled without OpenMP support!", ShortProgram()); } else { nth = gmx_omp_get_max_threads(); } } else if (omp_nthreads_req > 0) { nth = omp_nthreads_req; } else if (bFullOmpSupport && bOMP) { /* max available threads per node */ nth = nthreads_hw_avail; /* divide the threads among the MPI processes/tMPI threads */ if (nth >= nppn) { nth /= nppn; } else { nth = 1; } } /* now we have the global values, set them: * - 1 if not compiled with OpenMP and for the group scheme * - nth for the verlet scheme when compiled with OpenMP */ if (bFullOmpSupport && bOMP) { modth.gnth = nth; } else { modth.gnth = 1; } if (bSepPME) { if (omp_nthreads_pme_req > 0) { modth.gnth_pme = omp_nthreads_pme_req; } else { modth.gnth_pme = nth; } } else { modth.gnth_pme = 0; } /* now set the per-module values */ modth.nth[emntDefault] = modth.gnth; pick_module_nthreads(fplog, emntDomdec, SIMMASTER(cr), bFullOmpSupport, bSepPME); pick_module_nthreads(fplog, emntPairsearch, SIMMASTER(cr), bFullOmpSupport, bSepPME); pick_module_nthreads(fplog, emntNonbonded, SIMMASTER(cr), bFullOmpSupport, bSepPME); pick_module_nthreads(fplog, emntBonded, SIMMASTER(cr), bFullOmpSupport, bSepPME); pick_module_nthreads(fplog, emntPME, SIMMASTER(cr), bFullOmpSupport, bSepPME); pick_module_nthreads(fplog, emntUpdate, SIMMASTER(cr), bFullOmpSupport, bSepPME); pick_module_nthreads(fplog, emntVSITE, SIMMASTER(cr), bFullOmpSupport, bSepPME); pick_module_nthreads(fplog, emntLINCS, SIMMASTER(cr), bFullOmpSupport, bSepPME); pick_module_nthreads(fplog, emntSETTLE, SIMMASTER(cr), bFullOmpSupport, bSepPME); /* set the number of threads globally */ if (bOMP) { #ifndef GMX_THREAD_MPI if (bThisNodePMEOnly) { gmx_omp_set_num_threads(modth.gnth_pme); } else #endif /* GMX_THREAD_MPI */ { if (bFullOmpSupport) { gmx_omp_set_num_threads(nth); } else { gmx_omp_set_num_threads(1); } } } modth.initialized = TRUE; } #ifdef GMX_THREAD_MPI /* Non-master threads have to wait for the detection to be done. */ if (PAR(cr)) { MPI_Barrier(cr->mpi_comm_mysim); } #endif /* inform the user about the settings */ if (bOMP) { #ifdef GMX_THREAD_MPI const char *mpi_str = "per tMPI thread"; #else const char *mpi_str = "per MPI process"; #endif /* for group scheme we print PME threads info only */ if (bFullOmpSupport) { md_print_info(cr, fplog, "Using %d OpenMP thread%s %s\n", modth.gnth, modth.gnth > 1 ? "s" : "", cr->nnodes > 1 ? mpi_str : ""); } if (bSepPME && modth.gnth_pme != modth.gnth) { md_print_info(cr, fplog, "Using %d OpenMP thread%s %s for PME\n", modth.gnth_pme, modth.gnth_pme > 1 ? "s" : "", cr->nnodes > 1 ? mpi_str : ""); } } /* detect and warn about oversubscription * TODO: enable this for separate PME nodes as well! */ if (!bSepPME && cr->rank_pp_intranode == 0) { char sbuf[STRLEN], sbuf1[STRLEN], sbuf2[STRLEN]; if (modth.gnth*nppn > nthreads_hw_avail) { sprintf(sbuf, "threads"); sbuf1[0] = '\0'; sprintf(sbuf2, "O"); #ifdef GMX_MPI if (modth.gnth == 1) { #ifdef GMX_THREAD_MPI sprintf(sbuf, "thread-MPI threads"); #else sprintf(sbuf, "MPI processes"); sprintf(sbuf1, " per node"); sprintf(sbuf2, "On node %d: o", cr->sim_nodeid); #endif } #endif md_print_warn(cr, fplog, "WARNING: %sversubscribing the available %d logical CPU cores%s with %d %s.\n" " This will cause considerable performance loss!", sbuf2, nthreads_hw_avail, sbuf1, nppn*modth.gnth, sbuf); } } }
int mdrunner(gmx_hw_opt_t *hw_opt, FILE *fplog, t_commrec *cr, int nfile, const t_filenm fnm[], const output_env_t oenv, gmx_bool bVerbose, gmx_bool bCompact, int nstglobalcomm, ivec ddxyz, int dd_node_order, real rdd, real rconstr, const char *dddlb_opt, real dlb_scale, const char *ddcsx, const char *ddcsy, const char *ddcsz, const char *nbpu_opt, int nstlist_cmdline, gmx_int64_t nsteps_cmdline, int nstepout, int resetstep, int gmx_unused nmultisim, int repl_ex_nst, int repl_ex_nex, int repl_ex_seed, real pforce, real cpt_period, real max_hours, int imdport, unsigned long Flags) { gmx_bool bForceUseGPU, bTryUseGPU, bRerunMD; t_inputrec *inputrec; t_state *state = NULL; matrix box; gmx_ddbox_t ddbox = {0}; int npme_major, npme_minor; t_nrnb *nrnb; gmx_mtop_t *mtop = NULL; t_mdatoms *mdatoms = NULL; t_forcerec *fr = NULL; t_fcdata *fcd = NULL; real ewaldcoeff_q = 0; real ewaldcoeff_lj = 0; struct gmx_pme_t **pmedata = NULL; gmx_vsite_t *vsite = NULL; gmx_constr_t constr; int nChargePerturbed = -1, nTypePerturbed = 0, status; gmx_wallcycle_t wcycle; gmx_bool bReadEkin; gmx_walltime_accounting_t walltime_accounting = NULL; int rc; gmx_int64_t reset_counters; gmx_edsam_t ed = NULL; int nthreads_pme = 1; int nthreads_pp = 1; gmx_membed_t membed = NULL; gmx_hw_info_t *hwinfo = NULL; /* The master rank decides early on bUseGPU and broadcasts this later */ gmx_bool bUseGPU = FALSE; /* CAUTION: threads may be started later on in this function, so cr doesn't reflect the final parallel state right now */ snew(inputrec, 1); snew(mtop, 1); if (Flags & MD_APPENDFILES) { fplog = NULL; } bRerunMD = (Flags & MD_RERUN); bForceUseGPU = (strncmp(nbpu_opt, "gpu", 3) == 0); bTryUseGPU = (strncmp(nbpu_opt, "auto", 4) == 0) || bForceUseGPU; /* Detect hardware, gather information. This is an operation that is * global for this process (MPI rank). */ hwinfo = gmx_detect_hardware(fplog, cr, bTryUseGPU); gmx_print_detected_hardware(fplog, cr, hwinfo); if (fplog != NULL) { /* Print references after all software/hardware printing */ please_cite(fplog, "Abraham2015"); please_cite(fplog, "Pall2015"); please_cite(fplog, "Pronk2013"); please_cite(fplog, "Hess2008b"); please_cite(fplog, "Spoel2005a"); please_cite(fplog, "Lindahl2001a"); please_cite(fplog, "Berendsen95a"); } snew(state, 1); if (SIMMASTER(cr)) { /* Read (nearly) all data required for the simulation */ read_tpx_state(ftp2fn(efTPR, nfile, fnm), inputrec, state, NULL, mtop); if (inputrec->cutoff_scheme == ecutsVERLET) { /* Here the master rank decides if all ranks will use GPUs */ bUseGPU = (hwinfo->gpu_info.n_dev_compatible > 0 || getenv("GMX_EMULATE_GPU") != NULL); /* TODO add GPU kernels for this and replace this check by: * (bUseGPU && (ir->vdwtype == evdwPME && * ir->ljpme_combination_rule == eljpmeLB)) * update the message text and the content of nbnxn_acceleration_supported. */ if (bUseGPU && !nbnxn_gpu_acceleration_supported(fplog, cr, inputrec, bRerunMD)) { /* Fallback message printed by nbnxn_acceleration_supported */ if (bForceUseGPU) { gmx_fatal(FARGS, "GPU acceleration requested, but not supported with the given input settings"); } bUseGPU = FALSE; } prepare_verlet_scheme(fplog, cr, inputrec, nstlist_cmdline, mtop, state->box, bUseGPU); } else { if (nstlist_cmdline > 0) { gmx_fatal(FARGS, "Can not set nstlist with the group cut-off scheme"); } if (hwinfo->gpu_info.n_dev_compatible > 0) { md_print_warn(cr, fplog, "NOTE: GPU(s) found, but the current simulation can not use GPUs\n" " To use a GPU, set the mdp option: cutoff-scheme = Verlet\n"); } if (bForceUseGPU) { gmx_fatal(FARGS, "GPU requested, but can't be used without cutoff-scheme=Verlet"); } #ifdef GMX_TARGET_BGQ md_print_warn(cr, fplog, "NOTE: There is no SIMD implementation of the group scheme kernels on\n" " BlueGene/Q. You will observe better performance from using the\n" " Verlet cut-off scheme.\n"); #endif } if (inputrec->eI == eiSD2) { md_print_warn(cr, fplog, "The stochastic dynamics integrator %s is deprecated, since\n" "it is slower than integrator %s and is slightly less accurate\n" "with constraints. Use the %s integrator.", ei_names[inputrec->eI], ei_names[eiSD1], ei_names[eiSD1]); } } /* Check and update the hardware options for internal consistency */ check_and_update_hw_opt_1(hw_opt, cr); /* Early check for externally set process affinity. */ gmx_check_thread_affinity_set(fplog, cr, hw_opt, hwinfo->nthreads_hw_avail, FALSE); #ifdef GMX_THREAD_MPI if (SIMMASTER(cr)) { if (cr->npmenodes > 0 && hw_opt->nthreads_tmpi <= 0) { gmx_fatal(FARGS, "You need to explicitly specify the number of MPI threads (-ntmpi) when using separate PME ranks"); } /* Since the master knows the cut-off scheme, update hw_opt for this. * This is done later for normal MPI and also once more with tMPI * for all tMPI ranks. */ check_and_update_hw_opt_2(hw_opt, inputrec->cutoff_scheme); /* NOW the threads will be started: */ hw_opt->nthreads_tmpi = get_nthreads_mpi(hwinfo, hw_opt, inputrec, mtop, cr, fplog, bUseGPU); if (hw_opt->nthreads_tmpi > 1) { t_commrec *cr_old = cr; /* now start the threads. */ cr = mdrunner_start_threads(hw_opt, fplog, cr_old, nfile, fnm, oenv, bVerbose, bCompact, nstglobalcomm, ddxyz, dd_node_order, rdd, rconstr, dddlb_opt, dlb_scale, ddcsx, ddcsy, ddcsz, nbpu_opt, nstlist_cmdline, nsteps_cmdline, nstepout, resetstep, nmultisim, repl_ex_nst, repl_ex_nex, repl_ex_seed, pforce, cpt_period, max_hours, Flags); /* the main thread continues here with a new cr. We don't deallocate the old cr because other threads may still be reading it. */ if (cr == NULL) { gmx_comm("Failed to spawn threads"); } } } #endif /* END OF CAUTION: cr is now reliable */ /* g_membed initialisation * * Because we change the mtop, init_membed is called before the init_parallel * * (in case we ever want to make it run in parallel) */ if (opt2bSet("-membed", nfile, fnm)) { if (MASTER(cr)) { fprintf(stderr, "Initializing membed"); } membed = init_membed(fplog, nfile, fnm, mtop, inputrec, state, cr, &cpt_period); } if (PAR(cr)) { /* now broadcast everything to the non-master nodes/threads: */ init_parallel(cr, inputrec, mtop); /* The master rank decided on the use of GPUs, * broadcast this information to all ranks. */ gmx_bcast_sim(sizeof(bUseGPU), &bUseGPU, cr); } if (fplog != NULL) { pr_inputrec(fplog, 0, "Input Parameters", inputrec, FALSE); fprintf(fplog, "\n"); } /* now make sure the state is initialized and propagated */ set_state_entries(state, inputrec); /* A parallel command line option consistency check that we can only do after any threads have started. */ if (!PAR(cr) && (ddxyz[XX] > 1 || ddxyz[YY] > 1 || ddxyz[ZZ] > 1 || cr->npmenodes > 0)) { gmx_fatal(FARGS, "The -dd or -npme option request a parallel simulation, " #ifndef GMX_MPI "but %s was compiled without threads or MPI enabled" #else #ifdef GMX_THREAD_MPI "but the number of threads (option -nt) is 1" #else "but %s was not started through mpirun/mpiexec or only one rank was requested through mpirun/mpiexec" #endif #endif , output_env_get_program_display_name(oenv) ); } if (bRerunMD && (EI_ENERGY_MINIMIZATION(inputrec->eI) || eiNM == inputrec->eI)) { gmx_fatal(FARGS, "The .mdp file specified an energy mininization or normal mode algorithm, and these are not compatible with mdrun -rerun"); } if (can_use_allvsall(inputrec, TRUE, cr, fplog) && DOMAINDECOMP(cr)) { gmx_fatal(FARGS, "All-vs-all loops do not work with domain decomposition, use a single MPI rank"); } if (!(EEL_PME(inputrec->coulombtype) || EVDW_PME(inputrec->vdwtype))) { if (cr->npmenodes > 0) { gmx_fatal_collective(FARGS, cr, NULL, "PME-only ranks are requested, but the system does not use PME for electrostatics or LJ"); } cr->npmenodes = 0; } if (bUseGPU && cr->npmenodes < 0) { /* With GPUs we don't automatically use PME-only ranks. PME ranks can * improve performance with many threads per GPU, since our OpenMP * scaling is bad, but it's difficult to automate the setup. */ cr->npmenodes = 0; } #ifdef GMX_FAHCORE if (MASTER(cr)) { fcRegisterSteps(inputrec->nsteps, inputrec->init_step); } #endif /* NMR restraints must be initialized before load_checkpoint, * since with time averaging the history is added to t_state. * For proper consistency check we therefore need to extend * t_state here. * So the PME-only nodes (if present) will also initialize * the distance restraints. */ snew(fcd, 1); /* This needs to be called before read_checkpoint to extend the state */ init_disres(fplog, mtop, inputrec, cr, fcd, state, repl_ex_nst > 0); init_orires(fplog, mtop, state->x, inputrec, cr, &(fcd->orires), state); if (DEFORM(*inputrec)) { /* Store the deform reference box before reading the checkpoint */ if (SIMMASTER(cr)) { copy_mat(state->box, box); } if (PAR(cr)) { gmx_bcast(sizeof(box), box, cr); } /* Because we do not have the update struct available yet * in which the reference values should be stored, * we store them temporarily in static variables. * This should be thread safe, since they are only written once * and with identical values. */ tMPI_Thread_mutex_lock(&deform_init_box_mutex); deform_init_init_step_tpx = inputrec->init_step; copy_mat(box, deform_init_box_tpx); tMPI_Thread_mutex_unlock(&deform_init_box_mutex); } if (opt2bSet("-cpi", nfile, fnm)) { /* Check if checkpoint file exists before doing continuation. * This way we can use identical input options for the first and subsequent runs... */ if (gmx_fexist_master(opt2fn_master("-cpi", nfile, fnm, cr), cr) ) { load_checkpoint(opt2fn_master("-cpi", nfile, fnm, cr), &fplog, cr, ddxyz, inputrec, state, &bReadEkin, (Flags & MD_APPENDFILES), (Flags & MD_APPENDFILESSET)); if (bReadEkin) { Flags |= MD_READ_EKIN; } } } if (MASTER(cr) && (Flags & MD_APPENDFILES)) { gmx_log_open(ftp2fn(efLOG, nfile, fnm), cr, Flags, &fplog); } /* override nsteps with value from cmdline */ override_nsteps_cmdline(fplog, nsteps_cmdline, inputrec, cr); if (SIMMASTER(cr)) { copy_mat(state->box, box); } if (PAR(cr)) { gmx_bcast(sizeof(box), box, cr); } /* Essential dynamics */ if (opt2bSet("-ei", nfile, fnm)) { /* Open input and output files, allocate space for ED data structure */ ed = ed_open(mtop->natoms, &state->edsamstate, nfile, fnm, Flags, oenv, cr); } if (PAR(cr) && !(EI_TPI(inputrec->eI) || inputrec->eI == eiNM)) { cr->dd = init_domain_decomposition(fplog, cr, Flags, ddxyz, rdd, rconstr, dddlb_opt, dlb_scale, ddcsx, ddcsy, ddcsz, mtop, inputrec, box, state->x, &ddbox, &npme_major, &npme_minor); make_dd_communicators(fplog, cr, dd_node_order); /* Set overallocation to avoid frequent reallocation of arrays */ set_over_alloc_dd(TRUE); } else { /* PME, if used, is done on all nodes with 1D decomposition */ cr->npmenodes = 0; cr->duty = (DUTY_PP | DUTY_PME); npme_major = 1; npme_minor = 1; if (inputrec->ePBC == epbcSCREW) { gmx_fatal(FARGS, "pbc=%s is only implemented with domain decomposition", epbc_names[inputrec->ePBC]); } } if (PAR(cr)) { /* After possible communicator splitting in make_dd_communicators. * we can set up the intra/inter node communication. */ gmx_setup_nodecomm(fplog, cr); } /* Initialize per-physical-node MPI process/thread ID and counters. */ gmx_init_intranode_counters(cr); #ifdef GMX_MPI if (MULTISIM(cr)) { md_print_info(cr, fplog, "This is simulation %d out of %d running as a composite GROMACS\n" "multi-simulation job. Setup for this simulation:\n\n", cr->ms->sim, cr->ms->nsim); } md_print_info(cr, fplog, "Using %d MPI %s\n", cr->nnodes, #ifdef GMX_THREAD_MPI cr->nnodes == 1 ? "thread" : "threads" #else cr->nnodes == 1 ? "process" : "processes" #endif ); fflush(stderr); #endif /* Check and update hw_opt for the cut-off scheme */ check_and_update_hw_opt_2(hw_opt, inputrec->cutoff_scheme); /* Check and update hw_opt for the number of MPI ranks */ check_and_update_hw_opt_3(hw_opt); gmx_omp_nthreads_init(fplog, cr, hwinfo->nthreads_hw_avail, hw_opt->nthreads_omp, hw_opt->nthreads_omp_pme, (cr->duty & DUTY_PP) == 0, inputrec->cutoff_scheme == ecutsVERLET); #ifndef NDEBUG if (integrator[inputrec->eI].func != do_tpi && inputrec->cutoff_scheme == ecutsVERLET) { gmx_feenableexcept(); } #endif if (bUseGPU) { /* Select GPU id's to use */ gmx_select_gpu_ids(fplog, cr, &hwinfo->gpu_info, bForceUseGPU, &hw_opt->gpu_opt); } else { /* Ignore (potentially) manually selected GPUs */ hw_opt->gpu_opt.n_dev_use = 0; } /* check consistency across ranks of things like SIMD * support and number of GPUs selected */ gmx_check_hw_runconf_consistency(fplog, hwinfo, cr, hw_opt, bUseGPU); /* Now that we know the setup is consistent, check for efficiency */ check_resource_division_efficiency(hwinfo, hw_opt, Flags & MD_NTOMPSET, cr, fplog); if (DOMAINDECOMP(cr)) { /* When we share GPUs over ranks, we need to know this for the DLB */ dd_setup_dlb_resource_sharing(cr, hwinfo, hw_opt); } /* getting number of PP/PME threads PME: env variable should be read only on one node to make sure it is identical everywhere; */ /* TODO nthreads_pp is only used for pinning threads. * This is a temporary solution until we have a hw topology library. */ nthreads_pp = gmx_omp_nthreads_get(emntNonbonded); nthreads_pme = gmx_omp_nthreads_get(emntPME); wcycle = wallcycle_init(fplog, resetstep, cr, nthreads_pp, nthreads_pme); if (PAR(cr)) { /* Master synchronizes its value of reset_counters with all nodes * including PME only nodes */ reset_counters = wcycle_get_reset_counters(wcycle); gmx_bcast_sim(sizeof(reset_counters), &reset_counters, cr); wcycle_set_reset_counters(wcycle, reset_counters); } snew(nrnb, 1); if (cr->duty & DUTY_PP) { bcast_state(cr, state); /* Initiate forcerecord */ fr = mk_forcerec(); fr->hwinfo = hwinfo; fr->gpu_opt = &hw_opt->gpu_opt; init_forcerec(fplog, oenv, fr, fcd, inputrec, mtop, cr, box, opt2fn("-table", nfile, fnm), opt2fn("-tabletf", nfile, fnm), opt2fn("-tablep", nfile, fnm), opt2fn("-tableb", nfile, fnm), nbpu_opt, FALSE, pforce); /* version for PCA_NOT_READ_NODE (see md.c) */ /*init_forcerec(fplog,fr,fcd,inputrec,mtop,cr,box,FALSE, "nofile","nofile","nofile","nofile",FALSE,pforce); */ /* Initialize QM-MM */ if (fr->bQMMM) { init_QMMMrec(cr, mtop, inputrec, fr); } /* Initialize the mdatoms structure. * mdatoms is not filled with atom data, * as this can not be done now with domain decomposition. */ mdatoms = init_mdatoms(fplog, mtop, inputrec->efep != efepNO); /* Initialize the virtual site communication */ vsite = init_vsite(mtop, cr, FALSE); calc_shifts(box, fr->shift_vec); /* With periodic molecules the charge groups should be whole at start up * and the virtual sites should not be far from their proper positions. */ if (!inputrec->bContinuation && MASTER(cr) && !(inputrec->ePBC != epbcNONE && inputrec->bPeriodicMols)) { /* Make molecules whole at start of run */ if (fr->ePBC != epbcNONE) { do_pbc_first_mtop(fplog, inputrec->ePBC, box, mtop, state->x); } if (vsite) { /* Correct initial vsite positions are required * for the initial distribution in the domain decomposition * and for the initial shell prediction. */ construct_vsites_mtop(vsite, mtop, state->x); } } if (EEL_PME(fr->eeltype) || EVDW_PME(fr->vdwtype)) { ewaldcoeff_q = fr->ewaldcoeff_q; ewaldcoeff_lj = fr->ewaldcoeff_lj; pmedata = &fr->pmedata; } else { pmedata = NULL; } } else { /* This is a PME only node */ /* We don't need the state */ done_state(state); ewaldcoeff_q = calc_ewaldcoeff_q(inputrec->rcoulomb, inputrec->ewald_rtol); ewaldcoeff_lj = calc_ewaldcoeff_lj(inputrec->rvdw, inputrec->ewald_rtol_lj); snew(pmedata, 1); } if (hw_opt->thread_affinity != threadaffOFF) { /* Before setting affinity, check whether the affinity has changed * - which indicates that probably the OpenMP library has changed it * since we first checked). */ gmx_check_thread_affinity_set(fplog, cr, hw_opt, hwinfo->nthreads_hw_avail, TRUE); /* Set the CPU affinity */ gmx_set_thread_affinity(fplog, cr, hw_opt, hwinfo); } /* Initiate PME if necessary, * either on all nodes or on dedicated PME nodes only. */ if (EEL_PME(inputrec->coulombtype) || EVDW_PME(inputrec->vdwtype)) { if (mdatoms) { nChargePerturbed = mdatoms->nChargePerturbed; if (EVDW_PME(inputrec->vdwtype)) { nTypePerturbed = mdatoms->nTypePerturbed; } } if (cr->npmenodes > 0) { /* The PME only nodes need to know nChargePerturbed(FEP on Q) and nTypePerturbed(FEP on LJ)*/ gmx_bcast_sim(sizeof(nChargePerturbed), &nChargePerturbed, cr); gmx_bcast_sim(sizeof(nTypePerturbed), &nTypePerturbed, cr); } if (cr->duty & DUTY_PME) { status = gmx_pme_init(pmedata, cr, npme_major, npme_minor, inputrec, mtop ? mtop->natoms : 0, nChargePerturbed, nTypePerturbed, (Flags & MD_REPRODUCIBLE), nthreads_pme); if (status != 0) { gmx_fatal(FARGS, "Error %d initializing PME", status); } } } if (integrator[inputrec->eI].func == do_md) { /* Turn on signal handling on all nodes */ /* * (A user signal from the PME nodes (if any) * is communicated to the PP nodes. */ signal_handler_install(); } if (cr->duty & DUTY_PP) { /* Assumes uniform use of the number of OpenMP threads */ walltime_accounting = walltime_accounting_init(gmx_omp_nthreads_get(emntDefault)); if (inputrec->bPull) { /* Initialize pull code */ inputrec->pull_work = init_pull(fplog, inputrec->pull, inputrec, nfile, fnm, mtop, cr, oenv, inputrec->fepvals->init_lambda, EI_DYNAMICS(inputrec->eI) && MASTER(cr), Flags); } if (inputrec->bRot) { /* Initialize enforced rotation code */ init_rot(fplog, inputrec, nfile, fnm, cr, state->x, box, mtop, oenv, bVerbose, Flags); } if (inputrec->eSwapCoords != eswapNO) { /* Initialize ion swapping code */ init_swapcoords(fplog, bVerbose, inputrec, opt2fn_master("-swap", nfile, fnm, cr), mtop, state->x, state->box, &state->swapstate, cr, oenv, Flags); } constr = init_constraints(fplog, mtop, inputrec, ed, state, cr); if (DOMAINDECOMP(cr)) { GMX_RELEASE_ASSERT(fr, "fr was NULL while cr->duty was DUTY_PP"); dd_init_bondeds(fplog, cr->dd, mtop, vsite, inputrec, Flags & MD_DDBONDCHECK, fr->cginfo_mb); set_dd_parameters(fplog, cr->dd, dlb_scale, inputrec, &ddbox); setup_dd_grid(fplog, cr->dd); } /* Now do whatever the user wants us to do (how flexible...) */ integrator[inputrec->eI].func(fplog, cr, nfile, fnm, oenv, bVerbose, bCompact, nstglobalcomm, vsite, constr, nstepout, inputrec, mtop, fcd, state, mdatoms, nrnb, wcycle, ed, fr, repl_ex_nst, repl_ex_nex, repl_ex_seed, membed, cpt_period, max_hours, imdport, Flags, walltime_accounting); if (inputrec->bPull) { finish_pull(inputrec->pull_work); } if (inputrec->bRot) { finish_rot(inputrec->rot); } } else { GMX_RELEASE_ASSERT(pmedata, "pmedata was NULL while cr->duty was not DUTY_PP"); /* do PME only */ walltime_accounting = walltime_accounting_init(gmx_omp_nthreads_get(emntPME)); gmx_pmeonly(*pmedata, cr, nrnb, wcycle, walltime_accounting, ewaldcoeff_q, ewaldcoeff_lj, inputrec); } wallcycle_stop(wcycle, ewcRUN); /* Finish up, write some stuff * if rerunMD, don't write last frame again */ finish_run(fplog, cr, inputrec, nrnb, wcycle, walltime_accounting, fr ? fr->nbv : NULL, EI_DYNAMICS(inputrec->eI) && !MULTISIM(cr)); /* Free GPU memory and context */ free_gpu_resources(fr, cr, &hwinfo->gpu_info, fr ? fr->gpu_opt : NULL); if (opt2bSet("-membed", nfile, fnm)) { sfree(membed); } gmx_hardware_info_free(hwinfo); /* Does what it says */ print_date_and_time(fplog, cr->nodeid, "Finished mdrun", gmx_gettime()); walltime_accounting_destroy(walltime_accounting); /* PLUMED */ if(plumedswitch){ plumed_finalize(plumedmain); } /* END PLUMED */ /* Close logfile already here if we were appending to it */ if (MASTER(cr) && (Flags & MD_APPENDFILES)) { gmx_log_close(fplog); } rc = (int)gmx_get_stop_condition(); done_ed(&ed); #ifdef GMX_THREAD_MPI /* we need to join all threads. The sub-threads join when they exit this function, but the master thread needs to be told to wait for that. */ if (PAR(cr) && MASTER(cr)) { tMPI_Finalize(); } #endif return rc; }