Exemple #1
0
int mdrunner(gmx_hw_opt_t *hw_opt,
             FILE *fplog, t_commrec *cr, int nfile,
             const t_filenm fnm[], const output_env_t oenv, gmx_bool bVerbose,
             gmx_bool bCompact, int nstglobalcomm,
             ivec ddxyz, int dd_node_order, real rdd, real rconstr,
             const char *dddlb_opt, real dlb_scale,
             const char *ddcsx, const char *ddcsy, const char *ddcsz,
             const char *nbpu_opt, int nstlist_cmdline,
             gmx_int64_t nsteps_cmdline, int nstepout, int resetstep,
             int gmx_unused nmultisim, int repl_ex_nst, int repl_ex_nex,
             int repl_ex_seed, real pforce, real cpt_period, real max_hours,
             int imdport, unsigned long Flags)
{
    gmx_bool                  bForceUseGPU, bTryUseGPU, bRerunMD;
    t_inputrec               *inputrec;
    t_state                  *state = NULL;
    matrix                    box;
    gmx_ddbox_t               ddbox = {0};
    int                       npme_major, npme_minor;
    t_nrnb                   *nrnb;
    gmx_mtop_t               *mtop          = NULL;
    t_mdatoms                *mdatoms       = NULL;
    t_forcerec               *fr            = NULL;
    t_fcdata                 *fcd           = NULL;
    real                      ewaldcoeff_q  = 0;
    real                      ewaldcoeff_lj = 0;
    struct gmx_pme_t        **pmedata       = NULL;
    gmx_vsite_t              *vsite         = NULL;
    gmx_constr_t              constr;
    int                       nChargePerturbed = -1, nTypePerturbed = 0, status;
    gmx_wallcycle_t           wcycle;
    gmx_bool                  bReadEkin;
    gmx_walltime_accounting_t walltime_accounting = NULL;
    int                       rc;
    gmx_int64_t               reset_counters;
    gmx_edsam_t               ed           = NULL;
    int                       nthreads_pme = 1;
    int                       nthreads_pp  = 1;
    gmx_membed_t              membed       = NULL;
    gmx_hw_info_t            *hwinfo       = NULL;
    /* The master rank decides early on bUseGPU and broadcasts this later */
    gmx_bool                  bUseGPU      = FALSE;

    /* CAUTION: threads may be started later on in this function, so
       cr doesn't reflect the final parallel state right now */
    snew(inputrec, 1);
    snew(mtop, 1);

    if (Flags & MD_APPENDFILES)
    {
        fplog = NULL;
    }

    bRerunMD     = (Flags & MD_RERUN);
    bForceUseGPU = (strncmp(nbpu_opt, "gpu", 3) == 0);
    bTryUseGPU   = (strncmp(nbpu_opt, "auto", 4) == 0) || bForceUseGPU;

    /* Detect hardware, gather information. This is an operation that is
     * global for this process (MPI rank). */
    hwinfo = gmx_detect_hardware(fplog, cr, bTryUseGPU);

    gmx_print_detected_hardware(fplog, cr, hwinfo);

    if (fplog != NULL)
    {
        /* Print references after all software/hardware printing */
        please_cite(fplog, "Abraham2015");
        please_cite(fplog, "Pall2015");
        please_cite(fplog, "Pronk2013");
        please_cite(fplog, "Hess2008b");
        please_cite(fplog, "Spoel2005a");
        please_cite(fplog, "Lindahl2001a");
        please_cite(fplog, "Berendsen95a");
    }

    snew(state, 1);
    if (SIMMASTER(cr))
    {
        /* Read (nearly) all data required for the simulation */
        read_tpx_state(ftp2fn(efTPR, nfile, fnm), inputrec, state, NULL, mtop);

        if (inputrec->cutoff_scheme == ecutsVERLET)
        {
            /* Here the master rank decides if all ranks will use GPUs */
            bUseGPU = (hwinfo->gpu_info.n_dev_compatible > 0 ||
                       getenv("GMX_EMULATE_GPU") != NULL);

            /* TODO add GPU kernels for this and replace this check by:
             * (bUseGPU && (ir->vdwtype == evdwPME &&
             *               ir->ljpme_combination_rule == eljpmeLB))
             * update the message text and the content of nbnxn_acceleration_supported.
             */
            if (bUseGPU &&
                !nbnxn_gpu_acceleration_supported(fplog, cr, inputrec, bRerunMD))
            {
                /* Fallback message printed by nbnxn_acceleration_supported */
                if (bForceUseGPU)
                {
                    gmx_fatal(FARGS, "GPU acceleration requested, but not supported with the given input settings");
                }
                bUseGPU = FALSE;
            }

            prepare_verlet_scheme(fplog, cr,
                                  inputrec, nstlist_cmdline, mtop, state->box,
                                  bUseGPU);
        }
        else
        {
            if (nstlist_cmdline > 0)
            {
                gmx_fatal(FARGS, "Can not set nstlist with the group cut-off scheme");
            }

            if (hwinfo->gpu_info.n_dev_compatible > 0)
            {
                md_print_warn(cr, fplog,
                              "NOTE: GPU(s) found, but the current simulation can not use GPUs\n"
                              "      To use a GPU, set the mdp option: cutoff-scheme = Verlet\n");
            }

            if (bForceUseGPU)
            {
                gmx_fatal(FARGS, "GPU requested, but can't be used without cutoff-scheme=Verlet");
            }

#ifdef GMX_TARGET_BGQ
            md_print_warn(cr, fplog,
                          "NOTE: There is no SIMD implementation of the group scheme kernels on\n"
                          "      BlueGene/Q. You will observe better performance from using the\n"
                          "      Verlet cut-off scheme.\n");
#endif
        }

        if (inputrec->eI == eiSD2)
        {
            md_print_warn(cr, fplog, "The stochastic dynamics integrator %s is deprecated, since\n"
                          "it is slower than integrator %s and is slightly less accurate\n"
                          "with constraints. Use the %s integrator.",
                          ei_names[inputrec->eI], ei_names[eiSD1], ei_names[eiSD1]);
        }
    }

    /* Check and update the hardware options for internal consistency */
    check_and_update_hw_opt_1(hw_opt, cr);

    /* Early check for externally set process affinity. */
    gmx_check_thread_affinity_set(fplog, cr,
                                  hw_opt, hwinfo->nthreads_hw_avail, FALSE);

#ifdef GMX_THREAD_MPI
    if (SIMMASTER(cr))
    {
        if (cr->npmenodes > 0 && hw_opt->nthreads_tmpi <= 0)
        {
            gmx_fatal(FARGS, "You need to explicitly specify the number of MPI threads (-ntmpi) when using separate PME ranks");
        }

        /* Since the master knows the cut-off scheme, update hw_opt for this.
         * This is done later for normal MPI and also once more with tMPI
         * for all tMPI ranks.
         */
        check_and_update_hw_opt_2(hw_opt, inputrec->cutoff_scheme);

        /* NOW the threads will be started: */
        hw_opt->nthreads_tmpi = get_nthreads_mpi(hwinfo,
                                                 hw_opt,
                                                 inputrec, mtop,
                                                 cr, fplog, bUseGPU);

        if (hw_opt->nthreads_tmpi > 1)
        {
            t_commrec *cr_old       = cr;
            /* now start the threads. */
            cr = mdrunner_start_threads(hw_opt, fplog, cr_old, nfile, fnm,
                                        oenv, bVerbose, bCompact, nstglobalcomm,
                                        ddxyz, dd_node_order, rdd, rconstr,
                                        dddlb_opt, dlb_scale, ddcsx, ddcsy, ddcsz,
                                        nbpu_opt, nstlist_cmdline,
                                        nsteps_cmdline, nstepout, resetstep, nmultisim,
                                        repl_ex_nst, repl_ex_nex, repl_ex_seed, pforce,
                                        cpt_period, max_hours,
                                        Flags);
            /* the main thread continues here with a new cr. We don't deallocate
               the old cr because other threads may still be reading it. */
            if (cr == NULL)
            {
                gmx_comm("Failed to spawn threads");
            }
        }
    }
#endif
    /* END OF CAUTION: cr is now reliable */

    /* g_membed initialisation *
     * Because we change the mtop, init_membed is called before the init_parallel *
     * (in case we ever want to make it run in parallel) */
    if (opt2bSet("-membed", nfile, fnm))
    {
        if (MASTER(cr))
        {
            fprintf(stderr, "Initializing membed");
        }
        membed = init_membed(fplog, nfile, fnm, mtop, inputrec, state, cr, &cpt_period);
    }

    if (PAR(cr))
    {
        /* now broadcast everything to the non-master nodes/threads: */
        init_parallel(cr, inputrec, mtop);

        /* The master rank decided on the use of GPUs,
         * broadcast this information to all ranks.
         */
        gmx_bcast_sim(sizeof(bUseGPU), &bUseGPU, cr);
    }

    if (fplog != NULL)
    {
        pr_inputrec(fplog, 0, "Input Parameters", inputrec, FALSE);
        fprintf(fplog, "\n");
    }

    /* now make sure the state is initialized and propagated */
    set_state_entries(state, inputrec);

    /* A parallel command line option consistency check that we can
       only do after any threads have started. */
    if (!PAR(cr) &&
        (ddxyz[XX] > 1 || ddxyz[YY] > 1 || ddxyz[ZZ] > 1 || cr->npmenodes > 0))
    {
        gmx_fatal(FARGS,
                  "The -dd or -npme option request a parallel simulation, "
#ifndef GMX_MPI
                  "but %s was compiled without threads or MPI enabled"
#else
#ifdef GMX_THREAD_MPI
                  "but the number of threads (option -nt) is 1"
#else
                  "but %s was not started through mpirun/mpiexec or only one rank was requested through mpirun/mpiexec"
#endif
#endif
                  , output_env_get_program_display_name(oenv)
                  );
    }

    if (bRerunMD &&
        (EI_ENERGY_MINIMIZATION(inputrec->eI) || eiNM == inputrec->eI))
    {
        gmx_fatal(FARGS, "The .mdp file specified an energy mininization or normal mode algorithm, and these are not compatible with mdrun -rerun");
    }

    if (can_use_allvsall(inputrec, TRUE, cr, fplog) && DOMAINDECOMP(cr))
    {
        gmx_fatal(FARGS, "All-vs-all loops do not work with domain decomposition, use a single MPI rank");
    }

    if (!(EEL_PME(inputrec->coulombtype) || EVDW_PME(inputrec->vdwtype)))
    {
        if (cr->npmenodes > 0)
        {
            gmx_fatal_collective(FARGS, cr, NULL,
                                 "PME-only ranks are requested, but the system does not use PME for electrostatics or LJ");
        }

        cr->npmenodes = 0;
    }

    if (bUseGPU && cr->npmenodes < 0)
    {
        /* With GPUs we don't automatically use PME-only ranks. PME ranks can
         * improve performance with many threads per GPU, since our OpenMP
         * scaling is bad, but it's difficult to automate the setup.
         */
        cr->npmenodes = 0;
    }

#ifdef GMX_FAHCORE
    if (MASTER(cr))
    {
        fcRegisterSteps(inputrec->nsteps, inputrec->init_step);
    }
#endif

    /* NMR restraints must be initialized before load_checkpoint,
     * since with time averaging the history is added to t_state.
     * For proper consistency check we therefore need to extend
     * t_state here.
     * So the PME-only nodes (if present) will also initialize
     * the distance restraints.
     */
    snew(fcd, 1);

    /* This needs to be called before read_checkpoint to extend the state */
    init_disres(fplog, mtop, inputrec, cr, fcd, state, repl_ex_nst > 0);

    init_orires(fplog, mtop, state->x, inputrec, cr, &(fcd->orires),
                state);

    if (DEFORM(*inputrec))
    {
        /* Store the deform reference box before reading the checkpoint */
        if (SIMMASTER(cr))
        {
            copy_mat(state->box, box);
        }
        if (PAR(cr))
        {
            gmx_bcast(sizeof(box), box, cr);
        }
        /* Because we do not have the update struct available yet
         * in which the reference values should be stored,
         * we store them temporarily in static variables.
         * This should be thread safe, since they are only written once
         * and with identical values.
         */
        tMPI_Thread_mutex_lock(&deform_init_box_mutex);
        deform_init_init_step_tpx = inputrec->init_step;
        copy_mat(box, deform_init_box_tpx);
        tMPI_Thread_mutex_unlock(&deform_init_box_mutex);
    }

    if (opt2bSet("-cpi", nfile, fnm))
    {
        /* Check if checkpoint file exists before doing continuation.
         * This way we can use identical input options for the first and subsequent runs...
         */
        if (gmx_fexist_master(opt2fn_master("-cpi", nfile, fnm, cr), cr) )
        {
            load_checkpoint(opt2fn_master("-cpi", nfile, fnm, cr), &fplog,
                            cr, ddxyz,
                            inputrec, state, &bReadEkin,
                            (Flags & MD_APPENDFILES),
                            (Flags & MD_APPENDFILESSET));

            if (bReadEkin)
            {
                Flags |= MD_READ_EKIN;
            }
        }
    }

    if (MASTER(cr) && (Flags & MD_APPENDFILES))
    {
        gmx_log_open(ftp2fn(efLOG, nfile, fnm), cr,
                     Flags, &fplog);
    }

    /* override nsteps with value from cmdline */
    override_nsteps_cmdline(fplog, nsteps_cmdline, inputrec, cr);

    if (SIMMASTER(cr))
    {
        copy_mat(state->box, box);
    }

    if (PAR(cr))
    {
        gmx_bcast(sizeof(box), box, cr);
    }

    /* Essential dynamics */
    if (opt2bSet("-ei", nfile, fnm))
    {
        /* Open input and output files, allocate space for ED data structure */
        ed = ed_open(mtop->natoms, &state->edsamstate, nfile, fnm, Flags, oenv, cr);
    }

    if (PAR(cr) && !(EI_TPI(inputrec->eI) ||
                     inputrec->eI == eiNM))
    {
        cr->dd = init_domain_decomposition(fplog, cr, Flags, ddxyz, rdd, rconstr,
                                           dddlb_opt, dlb_scale,
                                           ddcsx, ddcsy, ddcsz,
                                           mtop, inputrec,
                                           box, state->x,
                                           &ddbox, &npme_major, &npme_minor);

        make_dd_communicators(fplog, cr, dd_node_order);

        /* Set overallocation to avoid frequent reallocation of arrays */
        set_over_alloc_dd(TRUE);
    }
    else
    {
        /* PME, if used, is done on all nodes with 1D decomposition */
        cr->npmenodes = 0;
        cr->duty      = (DUTY_PP | DUTY_PME);
        npme_major    = 1;
        npme_minor    = 1;

        if (inputrec->ePBC == epbcSCREW)
        {
            gmx_fatal(FARGS,
                      "pbc=%s is only implemented with domain decomposition",
                      epbc_names[inputrec->ePBC]);
        }
    }

    if (PAR(cr))
    {
        /* After possible communicator splitting in make_dd_communicators.
         * we can set up the intra/inter node communication.
         */
        gmx_setup_nodecomm(fplog, cr);
    }

    /* Initialize per-physical-node MPI process/thread ID and counters. */
    gmx_init_intranode_counters(cr);
#ifdef GMX_MPI
    if (MULTISIM(cr))
    {
        md_print_info(cr, fplog,
                      "This is simulation %d out of %d running as a composite GROMACS\n"
                      "multi-simulation job. Setup for this simulation:\n\n",
                      cr->ms->sim, cr->ms->nsim);
    }
    md_print_info(cr, fplog, "Using %d MPI %s\n",
                  cr->nnodes,
#ifdef GMX_THREAD_MPI
                  cr->nnodes == 1 ? "thread" : "threads"
#else
                  cr->nnodes == 1 ? "process" : "processes"
#endif
                  );
    fflush(stderr);
#endif

    /* Check and update hw_opt for the cut-off scheme */
    check_and_update_hw_opt_2(hw_opt, inputrec->cutoff_scheme);

    /* Check and update hw_opt for the number of MPI ranks */
    check_and_update_hw_opt_3(hw_opt);

    gmx_omp_nthreads_init(fplog, cr,
                          hwinfo->nthreads_hw_avail,
                          hw_opt->nthreads_omp,
                          hw_opt->nthreads_omp_pme,
                          (cr->duty & DUTY_PP) == 0,
                          inputrec->cutoff_scheme == ecutsVERLET);

#ifndef NDEBUG
    if (integrator[inputrec->eI].func != do_tpi &&
        inputrec->cutoff_scheme == ecutsVERLET)
    {
        gmx_feenableexcept();
    }
#endif

    if (bUseGPU)
    {
        /* Select GPU id's to use */
        gmx_select_gpu_ids(fplog, cr, &hwinfo->gpu_info, bForceUseGPU,
                           &hw_opt->gpu_opt);
    }
    else
    {
        /* Ignore (potentially) manually selected GPUs */
        hw_opt->gpu_opt.n_dev_use = 0;
    }

    /* check consistency across ranks of things like SIMD
     * support and number of GPUs selected */
    gmx_check_hw_runconf_consistency(fplog, hwinfo, cr, hw_opt, bUseGPU);

    /* Now that we know the setup is consistent, check for efficiency */
    check_resource_division_efficiency(hwinfo, hw_opt, Flags & MD_NTOMPSET,
                                       cr, fplog);

    if (DOMAINDECOMP(cr))
    {
        /* When we share GPUs over ranks, we need to know this for the DLB */
        dd_setup_dlb_resource_sharing(cr, hwinfo, hw_opt);
    }

    /* getting number of PP/PME threads
       PME: env variable should be read only on one node to make sure it is
       identical everywhere;
     */
    /* TODO nthreads_pp is only used for pinning threads.
     * This is a temporary solution until we have a hw topology library.
     */
    nthreads_pp  = gmx_omp_nthreads_get(emntNonbonded);
    nthreads_pme = gmx_omp_nthreads_get(emntPME);

    wcycle = wallcycle_init(fplog, resetstep, cr, nthreads_pp, nthreads_pme);

    if (PAR(cr))
    {
        /* Master synchronizes its value of reset_counters with all nodes
         * including PME only nodes */
        reset_counters = wcycle_get_reset_counters(wcycle);
        gmx_bcast_sim(sizeof(reset_counters), &reset_counters, cr);
        wcycle_set_reset_counters(wcycle, reset_counters);
    }

    snew(nrnb, 1);
    if (cr->duty & DUTY_PP)
    {
        bcast_state(cr, state);

        /* Initiate forcerecord */
        fr          = mk_forcerec();
        fr->hwinfo  = hwinfo;
        fr->gpu_opt = &hw_opt->gpu_opt;
        init_forcerec(fplog, oenv, fr, fcd, inputrec, mtop, cr, box,
                      opt2fn("-table", nfile, fnm),
                      opt2fn("-tabletf", nfile, fnm),
                      opt2fn("-tablep", nfile, fnm),
                      opt2fn("-tableb", nfile, fnm),
                      nbpu_opt,
                      FALSE,
                      pforce);

        /* version for PCA_NOT_READ_NODE (see md.c) */
        /*init_forcerec(fplog,fr,fcd,inputrec,mtop,cr,box,FALSE,
           "nofile","nofile","nofile","nofile",FALSE,pforce);
         */

        /* Initialize QM-MM */
        if (fr->bQMMM)
        {
            init_QMMMrec(cr, mtop, inputrec, fr);
        }

        /* Initialize the mdatoms structure.
         * mdatoms is not filled with atom data,
         * as this can not be done now with domain decomposition.
         */
        mdatoms = init_mdatoms(fplog, mtop, inputrec->efep != efepNO);

        /* Initialize the virtual site communication */
        vsite = init_vsite(mtop, cr, FALSE);

        calc_shifts(box, fr->shift_vec);

        /* With periodic molecules the charge groups should be whole at start up
         * and the virtual sites should not be far from their proper positions.
         */
        if (!inputrec->bContinuation && MASTER(cr) &&
            !(inputrec->ePBC != epbcNONE && inputrec->bPeriodicMols))
        {
            /* Make molecules whole at start of run */
            if (fr->ePBC != epbcNONE)
            {
                do_pbc_first_mtop(fplog, inputrec->ePBC, box, mtop, state->x);
            }
            if (vsite)
            {
                /* Correct initial vsite positions are required
                 * for the initial distribution in the domain decomposition
                 * and for the initial shell prediction.
                 */
                construct_vsites_mtop(vsite, mtop, state->x);
            }
        }

        if (EEL_PME(fr->eeltype) || EVDW_PME(fr->vdwtype))
        {
            ewaldcoeff_q  = fr->ewaldcoeff_q;
            ewaldcoeff_lj = fr->ewaldcoeff_lj;
            pmedata       = &fr->pmedata;
        }
        else
        {
            pmedata = NULL;
        }
    }
    else
    {
        /* This is a PME only node */

        /* We don't need the state */
        done_state(state);

        ewaldcoeff_q  = calc_ewaldcoeff_q(inputrec->rcoulomb, inputrec->ewald_rtol);
        ewaldcoeff_lj = calc_ewaldcoeff_lj(inputrec->rvdw, inputrec->ewald_rtol_lj);
        snew(pmedata, 1);
    }

    if (hw_opt->thread_affinity != threadaffOFF)
    {
        /* Before setting affinity, check whether the affinity has changed
         * - which indicates that probably the OpenMP library has changed it
         * since we first checked).
         */
        gmx_check_thread_affinity_set(fplog, cr,
                                      hw_opt, hwinfo->nthreads_hw_avail, TRUE);

        /* Set the CPU affinity */
        gmx_set_thread_affinity(fplog, cr, hw_opt, hwinfo);
    }

    /* Initiate PME if necessary,
     * either on all nodes or on dedicated PME nodes only. */
    if (EEL_PME(inputrec->coulombtype) || EVDW_PME(inputrec->vdwtype))
    {
        if (mdatoms)
        {
            nChargePerturbed = mdatoms->nChargePerturbed;
            if (EVDW_PME(inputrec->vdwtype))
            {
                nTypePerturbed   = mdatoms->nTypePerturbed;
            }
        }
        if (cr->npmenodes > 0)
        {
            /* The PME only nodes need to know nChargePerturbed(FEP on Q) and nTypePerturbed(FEP on LJ)*/
            gmx_bcast_sim(sizeof(nChargePerturbed), &nChargePerturbed, cr);
            gmx_bcast_sim(sizeof(nTypePerturbed), &nTypePerturbed, cr);
        }

        if (cr->duty & DUTY_PME)
        {
            status = gmx_pme_init(pmedata, cr, npme_major, npme_minor, inputrec,
                                  mtop ? mtop->natoms : 0, nChargePerturbed, nTypePerturbed,
                                  (Flags & MD_REPRODUCIBLE), nthreads_pme);
            if (status != 0)
            {
                gmx_fatal(FARGS, "Error %d initializing PME", status);
            }
        }
    }


    if (integrator[inputrec->eI].func == do_md)
    {
        /* Turn on signal handling on all nodes */
        /*
         * (A user signal from the PME nodes (if any)
         * is communicated to the PP nodes.
         */
        signal_handler_install();
    }

    if (cr->duty & DUTY_PP)
    {
        /* Assumes uniform use of the number of OpenMP threads */
        walltime_accounting = walltime_accounting_init(gmx_omp_nthreads_get(emntDefault));

        if (inputrec->bPull)
        {
            /* Initialize pull code */
            inputrec->pull_work =
                init_pull(fplog, inputrec->pull, inputrec, nfile, fnm,
                          mtop, cr, oenv, inputrec->fepvals->init_lambda,
                          EI_DYNAMICS(inputrec->eI) && MASTER(cr), Flags);
        }

        if (inputrec->bRot)
        {
            /* Initialize enforced rotation code */
            init_rot(fplog, inputrec, nfile, fnm, cr, state->x, box, mtop, oenv,
                     bVerbose, Flags);
        }

        if (inputrec->eSwapCoords != eswapNO)
        {
            /* Initialize ion swapping code */
            init_swapcoords(fplog, bVerbose, inputrec, opt2fn_master("-swap", nfile, fnm, cr),
                            mtop, state->x, state->box, &state->swapstate, cr, oenv, Flags);
        }

        constr = init_constraints(fplog, mtop, inputrec, ed, state, cr);

        if (DOMAINDECOMP(cr))
        {
            GMX_RELEASE_ASSERT(fr, "fr was NULL while cr->duty was DUTY_PP");
            dd_init_bondeds(fplog, cr->dd, mtop, vsite, inputrec,
                            Flags & MD_DDBONDCHECK, fr->cginfo_mb);

            set_dd_parameters(fplog, cr->dd, dlb_scale, inputrec, &ddbox);

            setup_dd_grid(fplog, cr->dd);
        }

        /* Now do whatever the user wants us to do (how flexible...) */
        integrator[inputrec->eI].func(fplog, cr, nfile, fnm,
                                      oenv, bVerbose, bCompact,
                                      nstglobalcomm,
                                      vsite, constr,
                                      nstepout, inputrec, mtop,
                                      fcd, state,
                                      mdatoms, nrnb, wcycle, ed, fr,
                                      repl_ex_nst, repl_ex_nex, repl_ex_seed,
                                      membed,
                                      cpt_period, max_hours,
                                      imdport,
                                      Flags,
                                      walltime_accounting);

        if (inputrec->bPull)
        {
            finish_pull(inputrec->pull_work);
        }

        if (inputrec->bRot)
        {
            finish_rot(inputrec->rot);
        }

    }
    else
    {
        GMX_RELEASE_ASSERT(pmedata, "pmedata was NULL while cr->duty was not DUTY_PP");
        /* do PME only */
        walltime_accounting = walltime_accounting_init(gmx_omp_nthreads_get(emntPME));
        gmx_pmeonly(*pmedata, cr, nrnb, wcycle, walltime_accounting, ewaldcoeff_q, ewaldcoeff_lj, inputrec);
    }

    wallcycle_stop(wcycle, ewcRUN);

    /* Finish up, write some stuff
     * if rerunMD, don't write last frame again
     */
    finish_run(fplog, cr,
               inputrec, nrnb, wcycle, walltime_accounting,
               fr ? fr->nbv : NULL,
               EI_DYNAMICS(inputrec->eI) && !MULTISIM(cr));


    /* Free GPU memory and context */
    free_gpu_resources(fr, cr, &hwinfo->gpu_info, fr ? fr->gpu_opt : NULL);

    if (opt2bSet("-membed", nfile, fnm))
    {
        sfree(membed);
    }

    gmx_hardware_info_free(hwinfo);

    /* Does what it says */
    print_date_and_time(fplog, cr->nodeid, "Finished mdrun", gmx_gettime());
    walltime_accounting_destroy(walltime_accounting);

    /* PLUMED */
    if(plumedswitch){
      plumed_finalize(plumedmain);
    }
    /* END PLUMED */

    /* Close logfile already here if we were appending to it */
    if (MASTER(cr) && (Flags & MD_APPENDFILES))
    {
        gmx_log_close(fplog);
    }

    rc = (int)gmx_get_stop_condition();

    done_ed(&ed);

#ifdef GMX_THREAD_MPI
    /* we need to join all threads. The sub-threads join when they
       exit this function, but the master thread needs to be told to
       wait for that. */
    if (PAR(cr) && MASTER(cr))
    {
        tMPI_Finalize();
    }
#endif

    return rc;
}
int main(int argc, char *argv[])
{
	char *msg;
	int seq;
	int argi;
	int alertcolors, alertinterval;
	char *configfn = NULL;
	char *checkfn = NULL;
	int checkpointinterval = 900;
	char acklogfn[PATH_MAX];
	FILE *acklogfd = NULL;
	char notiflogfn[PATH_MAX];
	FILE *notiflogfd = NULL;
	char *tracefn = NULL;
	struct sigaction sa;
	int configchanged;
	time_t lastxmit = 0;

	MEMDEFINE(acklogfn);
	MEMDEFINE(notiflogfn);

	libxymon_init(argv[0]);

	/* Dont save the error buffer */
	save_errbuf = 0;

	/* Load alert config */
	alertcolors = colorset(xgetenv("ALERTCOLORS"), ((1 << COL_GREEN) | (1 << COL_BLUE)));
	alertinterval = 60*atoi(xgetenv("ALERTREPEAT"));

	/* Create our loookup-trees */
	hostnames = xtreeNew(strcasecmp);
	testnames = xtreeNew(strcasecmp);
	locations = xtreeNew(strcasecmp);

	for (argi=1; (argi < argc); argi++) {
		if (argnmatch(argv[argi], "--config=")) {
			configfn = strdup(strchr(argv[argi], '=')+1);
		}
		else if (argnmatch(argv[argi], "--checkpoint-file=")) {
			checkfn = strdup(strchr(argv[argi], '=')+1);
		}
		else if (argnmatch(argv[argi], "--checkpoint-interval=")) {
			char *p = strchr(argv[argi], '=') + 1;
			checkpointinterval = atoi(p);
		}
		else if (argnmatch(argv[argi], "--dump-config")) {
			load_alertconfig(configfn, alertcolors, alertinterval);
			dump_alertconfig(1);
			return 0;
		}
		else if (argnmatch(argv[argi], "--cfid")) {
			include_configid = 1;
		}
		else if (argnmatch(argv[argi], "--test")) {
			char *testhost = NULL, *testservice = NULL, *testpage = NULL, 
			     *testcolor = "red", *testgroups = NULL;
			void *hinfo;
			int testdur = 0;
			FILE *logfd = NULL;
			activealerts_t *awalk = NULL;
			int paramno = 0;

			argi++; if (argi < argc) testhost = argv[argi];
			argi++; if (argi < argc) testservice = argv[argi];
			argi++; 
			while (argi < argc) {
				if (strncasecmp(argv[argi], "--duration=", 11) == 0) {
					testdur = durationvalue(strchr(argv[argi], '=')+1);
				}
				else if (strncasecmp(argv[argi], "--color=", 8) == 0) {
					testcolor = strchr(argv[argi], '=')+1;
				}
				else if (strncasecmp(argv[argi], "--group=", 8) == 0) {
					testgroups = strchr(argv[argi], '=')+1;
				}
				else if (strncasecmp(argv[argi], "--time=", 7) == 0) {
					fakestarttime = (time_t)atoi(strchr(argv[argi], '=')+1);
				}
				else {
					paramno++;
					if (paramno == 1) testdur = atoi(argv[argi]);
					else if (paramno == 2) testcolor = argv[argi];
					else if (paramno == 3) fakestarttime = (time_t) atoi(argv[argi]);
				}

				argi++;
			}

			if ((testhost == NULL) || (testservice == NULL)) {
				printf("Usage: xymond_alert --test HOST SERVICE [options]\n");
				printf("Possible options:\n\t[--duration=MINUTES]\n\t[--color=COLOR]\n\t[--group=GROUPNAME]\n\t[--time=TIMESPEC]\n");

				return 1;
			}

			load_hostnames(xgetenv("HOSTSCFG"), NULL, get_fqdn());
			hinfo = hostinfo(testhost);
			if (hinfo) {
				testpage = strdup(xmh_item(hinfo, XMH_ALLPAGEPATHS));
			}
			else {
				errprintf("Host not found in hosts.cfg - assuming it is on the top page\n");
				testpage = "";
			}

			awalk = (activealerts_t *)calloc(1, sizeof(activealerts_t));
			awalk->hostname = find_name(hostnames, testhost);
			awalk->testname = find_name(testnames, testservice);
			awalk->location = find_name(locations, testpage);
			awalk->ip = strdup("127.0.0.1");
			awalk->color = awalk->maxcolor = parse_color(testcolor);
			awalk->pagemessage = "Test of the alert configuration";
			awalk->eventstart = getcurrenttime(NULL) - testdur*60;
			awalk->groups = (testgroups ? strdup(testgroups) : NULL);
			awalk->state = A_PAGING;
			awalk->cookie = 12345;
			awalk->next = NULL;

			logfd = fopen("/dev/null", "w");
			starttrace(NULL);
			testonly = 1;

			load_alertconfig(configfn, alertcolors, alertinterval);
			load_holidays(0);
			send_alert(awalk, logfd);
			return 0;
		}
		else if (argnmatch(argv[argi], "--trace=")) {
			tracefn = strdup(strchr(argv[argi], '=')+1);
			starttrace(tracefn);
		}
		else if (net_worker_option(argv[argi])) {
			/* Handled in the subroutine */
		}
		else if (standardoption(argv[argi])) {
			if (showhelp) return 0;
		}
		else {
			errprintf("Unknown option '%s'\n", argv[argi]);
		}
	}

	/* Do the network stuff if needed */
	net_worker_run(ST_ALERT, LOC_SINGLESERVER, NULL);

	if (checkfn) {
		load_checkpoint(checkfn);
		nextcheckpoint = gettimer() + checkpointinterval;
		dbgprintf("Next checkpoint at %d, interval %d\n", (int) nextcheckpoint, checkpointinterval);
	}

	setup_signalhandler("xymond_alert");
	/* Need to handle these ourselves, so we can shutdown and save state-info */
	memset(&sa, 0, sizeof(sa));
	sa.sa_handler = sig_handler;
	sigaction(SIGPIPE, &sa, NULL);
	sigaction(SIGTERM, &sa, NULL);
	sigaction(SIGINT,  &sa, NULL);
	sigaction(SIGCHLD, &sa, NULL);
	sigaction(SIGUSR1, &sa, NULL);
	sigaction(SIGHUP,  &sa, NULL);

	if (xgetenv("XYMONSERVERLOGS")) {
		sprintf(acklogfn, "%s/acknowledge.log", xgetenv("XYMONSERVERLOGS"));
		acklogfd = fopen(acklogfn, "a");
		sprintf(notiflogfn, "%s/notifications.log", xgetenv("XYMONSERVERLOGS"));
		notiflogfd = fopen(notiflogfn, "a");
	}

	/*
	 * The general idea here is that this loop handles receiving of alert-
	 * and ack-messages from the master daemon, and maintains a list of 
	 * host+test combinations that may have alerts going out.
	 *
	 * This module does not deal with any specific alert-configuration, 
	 * it just picks up the alert messages, maintains the list of 
	 * known tests that are in some sort of critical condition, and
	 * periodically pushes alerts to the do_alert.c module for handling.
	 *
	 * The only modification of alerts that happen here is the handling
	 * of when the next alert is due. It calls into the next_alert() 
	 * routine to learn when an alert should be repeated, and also 
	 * deals with Acknowledgments that stop alerts from going out for
	 * a period of time.
	 */
	while (running) {
		char *eoln, *restofmsg;
		char *metadata[20];
		char *p;
		int metacount;
		char *hostname = NULL, *testname = NULL;
		struct timespec timeout;
		time_t now, nowtimer;
		int anytogo;
		activealerts_t *awalk;
		int childstat;

		nowtimer = gettimer();
		if (checkfn && (nowtimer > nextcheckpoint)) {
			dbgprintf("Saving checkpoint\n");
			nextcheckpoint = nowtimer + checkpointinterval;
			save_checkpoint(checkfn);

			if (acklogfd) acklogfd = freopen(acklogfn, "a", acklogfd);
			if (notiflogfd) notiflogfd = freopen(notiflogfn, "a", notiflogfd);
		}

		timeout.tv_sec = 60; timeout.tv_nsec = 0;
		msg = get_xymond_message(C_PAGE, "xymond_alert", &seq, &timeout);
		if (msg == NULL) {
			running = 0;
			continue;
		}

		/* See what time it is - must happen AFTER the timeout */
		now = getcurrenttime(NULL);

		/* Split the message in the first line (with meta-data), and the rest */
 		eoln = strchr(msg, '\n');
		if (eoln) {
			*eoln = '\0';
			restofmsg = eoln+1;
		}
		else {
			restofmsg = "";
		}

		/* 
		 * Now parse the meta-data into elements.
		 * We use our own "gettok()" routine which works
		 * like strtok(), but can handle empty elements.
		 */
		metacount = 0; 
		memset(&metadata, 0, sizeof(metadata));
		p = gettok(msg, "|");
		while (p && (metacount < 19)) {
			metadata[metacount] = p;
			metacount++;
			p = gettok(NULL, "|");
		}
		metadata[metacount] = NULL;

		if (metacount > 3) hostname = metadata[3];
		if (metacount > 4) testname = metadata[4];

		if ((metacount > 10) && (strncmp(metadata[0], "@@page", 6) == 0)) {
			/* @@page|timestamp|sender|hostname|testname|hostip|expiretime|color|prevcolor|changetime|location|cookie|osname|classname|grouplist|modifiers */

			int newcolor, newalertstatus, oldalertstatus;

			dbgprintf("Got page message from %s:%s\n", hostname, testname);
			traceprintf("@@page %s:%s:%s=%s\n", hostname, testname, metadata[10], metadata[7]);

			awalk = find_active(hostname, testname);
			if (awalk == NULL) {
				char *hwalk = find_name(hostnames, hostname);
				char *twalk = find_name(testnames, testname);
				char *pwalk = find_name(locations, metadata[10]);

				awalk = (activealerts_t *)calloc(1, sizeof(activealerts_t));
				awalk->hostname = hwalk;
				awalk->testname = twalk;
				awalk->location = pwalk;
				awalk->cookie = -1;
				awalk->state = A_DEAD;
				/*
				 * Use changetime here, if we restart the alert module then
				 * this gets the duration values more right than using "now".
				 * Also, define this only when a new alert arrives - we should
				 * NOT clear this when a status goes yellow->red, or if it
				 * flaps between yellow and red.
				 */
				awalk->eventstart = atoi(metadata[9]);
				add_active(awalk->hostname, awalk);
				traceprintf("New record\n");
			}

			newcolor = parse_color(metadata[7]);
			oldalertstatus = ((alertcolors & (1 << awalk->color)) != 0);
			newalertstatus = ((alertcolors & (1 << newcolor)) != 0);

			traceprintf("state %d->%d\n", oldalertstatus, newalertstatus);

			if (newalertstatus) {
				/* It's in an alert state. */
				awalk->color = newcolor;
				awalk->state = A_PAGING;

				if (newcolor > awalk->maxcolor) {
					if (awalk->maxcolor != 0) {
						/*
						 * Severity has increased (yellow -> red).
						 * Clear the repeat-interval, and set maxcolor to
						 * the new color. If it drops to yellow again,
						 * maxcolor stays at red, so a test that flaps
						 * between yellow and red will only alert on red
						 * the first time, and then follow the repeat
						 * interval.
						 */
						dbgprintf("Severity increased, cleared repeat interval: %s/%s %s->%s\n",
							awalk->hostname, awalk->testname,
							colorname(awalk->maxcolor), colorname(newcolor));
						clear_interval(awalk);
					}

					awalk->maxcolor = newcolor;
				}
			}
			else {
				/* 
				 * Send one "recovered" message out now, then go to A_DEAD.
				 * Dont update the color here - we want recoveries to go out 
				 * only if the alert color triggered an alert
				 */
				awalk->state = (newcolor == COL_BLUE) ? A_DISABLED : A_RECOVERED;
			}

			if (oldalertstatus != newalertstatus) {
				dbgprintf("Alert status changed from %d to %d\n", oldalertstatus, newalertstatus);
				clear_interval(awalk);
			}

			if (awalk->ip) xfree(awalk->ip);
			awalk->ip = strdup(metadata[5]);
			awalk->cookie = atoi(metadata[11]);
			if (awalk->osname) xfree(awalk->osname);
			awalk->osname    = (metadata[12] ? strdup(metadata[12]) : NULL);
			if (awalk->classname) xfree(awalk->classname);
			awalk->classname = (metadata[13] ? strdup(metadata[13]) : NULL);
			if (awalk->groups) xfree(awalk->groups);
			awalk->groups    = (metadata[14] ? strdup(metadata[14]) : NULL);
			if (awalk->pagemessage) xfree(awalk->pagemessage);
			if (metadata[15]) {
				/* Modifiers are more interesting than the message itself */
				awalk->pagemessage = (char *)malloc(strlen(awalk->hostname) + strlen(awalk->testname) + strlen(colorname(awalk->color)) + strlen(metadata[15]) + strlen(restofmsg) + 10);
				sprintf(awalk->pagemessage, "%s:%s %s\n%s\n%s",
					awalk->hostname, awalk->testname, colorname(awalk->color), metadata[15], restofmsg);
			}
			else {
				awalk->pagemessage = strdup(restofmsg);
			}
		}
		else if ((metacount > 5) && (strncmp(metadata[0], "@@ack", 5) == 0)) {
 			/* @@ack|timestamp|sender|hostname|testname|hostip|expiretime */

			/*
			 * An ack is handled simply by setting the next
			 * alert-time to when the ack expires.
			 */
			time_t nextalert = atoi(metadata[6]);

			dbgprintf("Got ack message from %s:%s\n", hostname, testname);
			traceprintf("@@ack: %s:%s now=%d, ackeduntil %d\n",
				     hostname, testname, (int)now, (int)nextalert);

			awalk = find_active(hostname, testname);

			if (acklogfd) {
				int cookie = (awalk ? awalk->cookie : -1);
				int color  = (awalk ? awalk->color : 0);

				fprintf(acklogfd, "%d\t%d\t%d\t%d\t%s\t%s.%s\t%s\t%s\n",
					(int)now, cookie, 
					(int)((nextalert - now) / 60), cookie,
					"np_filename_not_used", 
					hostname, testname, 
					colorname(color),
					nlencode(restofmsg));
				fflush(acklogfd);
			}

			if (awalk && (awalk->state == A_PAGING)) {
				traceprintf("Record updated\n");
				awalk->state = A_ACKED;
				awalk->nextalerttime = nextalert;
				if (awalk->ackmessage) xfree(awalk->ackmessage);
				awalk->ackmessage = strdup(restofmsg);
			}
			else {
				traceprintf("No record\n");
			}
		}
		else if ((metacount > 4) && (strncmp(metadata[0], "@@notify", 5) == 0)) {
			/* @@notify|timestamp|sender|hostname|testname|pagepath */

			char *hwalk = find_name(hostnames, hostname);
			char *twalk = find_name(testnames, testname);
			char *pwalk = find_name(locations, (metadata[5] ? metadata[5] : ""));

			awalk = (activealerts_t *)calloc(1, sizeof(activealerts_t));
			awalk->hostname = hwalk;
			awalk->testname = twalk;
			awalk->location = pwalk;
			awalk->cookie = -1;
			awalk->pagemessage = strdup(restofmsg);
			awalk->eventstart = getcurrenttime(NULL);
			awalk->state = A_NOTIFY;
			add_active(awalk->hostname, awalk);
		}
		else if ((metacount > 3) && 
			 ((strncmp(metadata[0], "@@drophost", 10) == 0) || (strncmp(metadata[0], "@@dropstate", 11) == 0))) {
			/* @@drophost|timestamp|sender|hostname */
			/* @@dropstate|timestamp|sender|hostname */
			xtreePos_t handle;

			handle = xtreeFind(hostnames, hostname);
			if (handle != xtreeEnd(hostnames)) {
				alertanchor_t *anchor = (alertanchor_t *)xtreeData(hostnames, handle);
				for (awalk = anchor->head; (awalk); awalk = awalk->next) awalk->state = A_DEAD;
			}
		}
		else if ((metacount > 4) && (strncmp(metadata[0], "@@droptest", 10) == 0)) {
			/* @@droptest|timestamp|sender|hostname|testname */

			awalk = find_active(hostname, testname);
			if (awalk) awalk->state = A_DEAD;
		}
		else if ((metacount > 4) && (strncmp(metadata[0], "@@renamehost", 12) == 0)) {
			/* @@renamehost|timestamp|sender|hostname|newhostname */

			/* 
			 * We handle rename's simply by dropping the alert. If there is still an
			 * active alert for the host, it will have to be dealt with when the next
			 * status update arrives.
			 */
			xtreePos_t handle;

			handle = xtreeFind(hostnames, hostname);
			if (handle != xtreeEnd(hostnames)) {
				alertanchor_t *anchor = (alertanchor_t *)xtreeData(hostnames, handle);
				for (awalk = anchor->head; (awalk); awalk = awalk->next) awalk->state = A_DEAD;
			}
		}
		else if ((metacount > 5) && (strncmp(metadata[0], "@@renametest", 12) == 0)) {
			/* @@renametest|timestamp|sender|hostname|oldtestname|newtestname */

			/* 
			 * We handle rename's simply by dropping the alert. If there is still an
			 * active alert for the host, it will have to be dealt with when the next
			 * status update arrives.
			 */
			awalk = find_active(hostname, testname);
			if (awalk) awalk->state = A_DEAD;
		}
		else if (strncmp(metadata[0], "@@shutdown", 10) == 0) {
			running = 0;
			errprintf("Got a shutdown message\n");
			continue;
		}
		else if (strncmp(metadata[0], "@@logrotate", 11) == 0) {
			char *fn = xgetenv("XYMONCHANNEL_LOGFILENAME");
			if (fn && strlen(fn)) {
				reopen_file(fn, "a", stdout);
				reopen_file(fn, "a", stderr);

				if (tracefn) {
					stoptrace();
					starttrace(tracefn);
				}
			}
			continue;
		}
		else if (strncmp(metadata[0], "@@reload", 8) == 0) {
			/* Nothing ... right now */
		}
		else if (strncmp(metadata[0], "@@idle", 6) == 0) {
			/* Timeout */
		}

		/*
		 * When a burst of alerts happen, we get lots of alert messages
		 * coming in quickly. So lets handle them in bunches and only 
		 * do the full alert handling once every 10 secs - that lets us
		 * combine a bunch of alerts into one transmission process.
		 */
		if (nowtimer < (lastxmit+10)) continue;
		lastxmit = nowtimer;

		/* 
		 * Loop through the activealerts list and see if anything is pending.
		 * This is an optimization, we could just as well just fork off the
		 * notification child and let it handle all of it. But there is no
		 * reason to fork a child process unless it is going to do something.
		 */
		configchanged = load_alertconfig(configfn, alertcolors, alertinterval);
		configchanged += load_holidays(0);
		anytogo = 0;
		for (awalk = alistBegin(); (awalk); awalk = alistNext()) {
			int anymatch = 0;

			switch (awalk->state) {
			  case A_NORECIP:
				if (!configchanged) break;

				/* The configuration has changed - switch NORECIP -> PAGING */
				awalk->state = A_PAGING;
				clear_interval(awalk);
				/* Fall through */

			  case A_PAGING:
				if (have_recipient(awalk, &anymatch)) {
					if (awalk->nextalerttime <= now) anytogo++;
				}
				else {
					if (!anymatch) {
						awalk->state = A_NORECIP;
						cleanup_alert(awalk);
					}
				}
				break;

			  case A_ACKED:
				if (awalk->nextalerttime <= now) {
					/* An ack has expired, so drop the ack message and switch to A_PAGING */
					anytogo++;
					if (awalk->ackmessage) xfree(awalk->ackmessage);
					awalk->state = A_PAGING;
				}
				break;

			  case A_RECOVERED:
			  case A_DISABLED:
			  case A_NOTIFY:
				anytogo++;
				break;

			  case A_DEAD:
				break;
			}
		}
		dbgprintf("%d alerts to go\n", anytogo);

		if (anytogo) {
			pid_t childpid;

			childpid = fork();
			if (childpid == 0) {
				/* The child */
				start_alerts();
				for (awalk = alistBegin(); (awalk); awalk = alistNext()) {
					switch (awalk->state) {
					  case A_PAGING:
						if (awalk->nextalerttime <= now) {
							send_alert(awalk, notiflogfd);
						}
						break;

					  case A_ACKED:
						/* Cannot be A_ACKED unless the ack is still valid, so no alert. */
						break;

					  case A_RECOVERED:
					  case A_DISABLED:
					  case A_NOTIFY:
						send_alert(awalk, notiflogfd);
						break;

					  case A_NORECIP:
					  case A_DEAD:
						break;
					}
				}
				finish_alerts();

				/* Child does not continue */
				exit(0);
			}
			else if (childpid < 0) {
				errprintf("Fork failed, cannot send alerts: %s\n", strerror(errno));
			}
		}

		/* Update the state flag and the next-alert timestamp */
		for (awalk = alistBegin(); (awalk); awalk = alistNext()) {
			switch (awalk->state) {
			  case A_PAGING:
				if (awalk->nextalerttime <= now) awalk->nextalerttime = next_alert(awalk);
				break;

			  case A_NORECIP:
				break;

			  case A_ACKED:
				/* Still cannot get here except if ack is still valid */
				break;

			  case A_RECOVERED:
			  case A_DISABLED:
			  case A_NOTIFY:
				awalk->state = A_DEAD;
				/* Fall through */

			  case A_DEAD:
				cleanup_alert(awalk); 
				break;
			}
		}

		clean_all_active();

		/* Pickup any finished child processes to avoid zombies */
		while (wait3(&childstat, WNOHANG, NULL) > 0) ;
	}

	if (checkfn) save_checkpoint(checkfn);
	if (acklogfd) fclose(acklogfd);
	if (notiflogfd) fclose(notiflogfd);
	stoptrace();

	MEMUNDEFINE(notiflogfn);
	MEMUNDEFINE(acklogfn);

	if (termsig >= 0) {
		errprintf("Terminated by signal %d\n", termsig);
	}

	return 0;
}
Exemple #3
0
/// @brief Function to run one a single program
///
/// @callgraph
/// @callergraph
///
/// @section License
/// Use of this code, either in source or compiled form, is subject to license from the authors.
/// Copyright \htmlonly &copy \endhtmlonly Richard Evans, 2009-2011. All Rights Reserved.
///
/// @section Information
/// @author  Richard Evans, [email protected]
/// @version 1.1
/// @date    09/03/2011
///
/// @return EXIT_SUCCESS
///
/// @internal
///	Created:		02/10/2008
///	Revision:	1.1 09/03/2011
///=====================================================================================
///
int run(){
	// Check for calling of function
	if(err::check==true) std::cout << "sim::run has been called" << std::endl;

	// Initialise simulation data structures
	sim::initialize(mp::num_materials);

   anisotropy::initialize(atoms::num_atoms, atoms::type_array, mp::mu_s_array);

   // now seed generator
	mtrandom::grnd.seed(vmpi::parallel_rng_seed(mtrandom::integration_seed));

   // Check for load spin configurations from checkpoint
   if(sim::load_checkpoint_flag) load_checkpoint();

   {
      // Set up statistical data sets
      #ifdef MPICF
         int num_atoms_for_statistics = vmpi::num_core_atoms+vmpi::num_bdry_atoms;
      #else
         int num_atoms_for_statistics = atoms::num_atoms;
      #endif
      // unroll list of non-magnetic materials
      std::vector<bool> non_magnetic_materials_array(mp::num_materials, false);
      for(int m = 0; m < mp::num_materials; m++){
         if( mp::material[m].non_magnetic == 2 ) non_magnetic_materials_array[m] = true;
      }
      stats::initialize(num_atoms_for_statistics, mp::num_materials, atoms::m_spin_array, atoms::type_array, atoms::category_array, non_magnetic_materials_array);
   }

   // Precalculate initial statistics
   stats::update(atoms::x_spin_array, atoms::y_spin_array, atoms::z_spin_array, atoms::m_spin_array);

   // initialise dipole field calculation
   dipole::initialize(cells::num_atoms_in_unit_cell,
                     cells::num_cells,
                     cells::num_local_cells,
                     cells::macro_cell_size,
                     cells::local_cell_array,
                     cells::num_atoms_in_cell,
                     cells::num_atoms_in_cell_global, // <----
                     cells::index_atoms_array,
                     cells::volume_array,
                     cells::pos_and_mom_array,
                     cells::atom_in_cell_coords_array_x,
                     cells::atom_in_cell_coords_array_y,
                     cells::atom_in_cell_coords_array_z,
                     atoms::type_array,
                     cells::atom_cell_id_array,
                     atoms::x_coord_array,
                     atoms::y_coord_array,
                     atoms::z_coord_array,
                     atoms::num_atoms
   );

   // Initialize GPU acceleration if enabled
   if(gpu::acceleration) gpu::initialize();

   // For MPI version, calculate initialisation time
	if(vmpi::my_rank==0){
		#ifdef MPICF
			std::cout << "Time for initialisation: " << MPI_Wtime()-vmpi::start_time << std::endl;
			zlog << zTs() << "Time for initialisation: " << MPI_Wtime()-vmpi::start_time << std::endl;
			vmpi::start_time=MPI_Wtime(); // reset timer
		#endif
   }

   // Set timer for runtime
   stopwatch_t stopwatch;
   stopwatch.start();

   // Precondition spins at equilibration temperature
   sim::internal::monte_carlo_preconditioning();

   // For MPI version, calculate initialisation time
   if(vmpi::my_rank==0){
		std::cout << "Starting Simulation with Program ";
		zlog << zTs() << "Starting Simulation with Program ";
	}

	// Now set initial compute time
	#ifdef MPICF
	vmpi::ComputeTime=MPI_Wtime();
	vmpi::WaitTime=MPI_Wtime();
	vmpi::TotalComputeTime=0.0;
	vmpi::TotalWaitTime=0.0;
	#endif

	// Select program to run
	switch(sim::program){
		case 0:
			if(vmpi::my_rank==0){
				std::cout << "Benchmark..." << std::endl;
				zlog << "Benchmark..." << std::endl;
			}
			program::bmark();
			break;

		case 1:
			if(vmpi::my_rank==0){
				std::cout << "Time-Series..." << std::endl;
				zlog << "Time-Series..." << std::endl;
			}
			program::time_series();
			break;

		case 2:
			if(vmpi::my_rank==0){
				std::cout << "Hysteresis-Loop..." << std::endl;
				zlog << "Hysteresis-Loop..." << std::endl;
			}
			program::hysteresis();
			break;

		case 3:
			if(vmpi::my_rank==0){
				std::cout << "Static-Hysteresis-Loop..." << std::endl;
				zlog << "Static-Hysteresis-Loop..." << std::endl;
			}
			program::static_hysteresis();
			break;

		case 4:
			if(vmpi::my_rank==0){
				std::cout << "Curie-Temperature..." << std::endl;
				zlog << "Curie-Temperature..." << std::endl;
			}
			program::curie_temperature();
			break;

		case 5:
			if(vmpi::my_rank==0){
				std::cout << "Field-Cool..." << std::endl;
				zlog << "Field-Cool..." << std::endl;
			}
			program::field_cool();
			break;

		case 6:
			if(vmpi::my_rank==0){
				std::cout << "Temperature-Pulse..." << std::endl;
				zlog << "Temperature-Pulse..." << std::endl;
			}
			program::temperature_pulse();
			break;

		case 7:
			if(vmpi::my_rank==0){
				std::cout << "HAMR-Simulation..." << std::endl;
				zlog << "HAMR-Simulation..." << std::endl;
			}
			program::hamr();
			break;

		case 8:
			if(vmpi::my_rank==0){
				std::cout << "CMC-Anisotropy..." << std::endl;
				zlog << "CMC-Anisotropy..." << std::endl;
			}
			program::cmc_anisotropy();
			break;

		case 9:
			if(vmpi::my_rank==0){
				std::cout << "Hybrid-CMC..." << std::endl;
				zlog << "Hybrid-CMC..." << std::endl;
			}
			program::hybrid_cmc();
			break;

      case 10:
         if(vmpi::my_rank==0){
            std::cout << "Reverse-Hybrid-CMC..." << std::endl;
            zlog << "Reverse-Hybrid-CMC..." << std::endl;
         }
         program::reverse_hybrid_cmc();
         break;

      case 11:
         if(vmpi::my_rank==0){
            std::cout << "LaGrange-Multiplier..." << std::endl;
            zlog << "LaGrange-Multiplier..." << std::endl;
         }
         program::lagrange_multiplier();
         break;

      case 12:
         if(vmpi::my_rank==0){
            std::cout << "partial-hysteresis-loop..." << std::endl;
            zlog << "partial-hysteresis-loop..." << std::endl;
         }
         program::partial_hysteresis_loop();
         break;

      case 13:
         if(vmpi::my_rank==0){
            std::cout << "localised-temperature-pulse..." << std::endl;
            zlog << "localised-temperature-pulse..." << std::endl;
         }
         program::localised_temperature_pulse();
         break;

      case 14:
         if(vmpi::my_rank==0){
            std::cout << "effective-damping..." << std::endl;
            zlog << "effective-damping..." << std::endl;
         }
         program::effective_damping();
         break;

		case 15:
	  		if(vmpi::my_rank==0){
	    		std::cout << "fmr..." << std::endl;
	    		zlog << "fmr..." << std::endl;
	  		}
	  		program::fmr();
	  		break;

		case 16:
	  		if(vmpi::my_rank==0){
	    		std::cout << "localised-field-cool..." << std::endl;
	    		zlog << "localised-field-cool..." << std::endl;
	  		}
	  		program::local_field_cool();
	  		break;

		case 50:
			if(vmpi::my_rank==0){
				std::cout << "Diagnostic-Boltzmann..." << std::endl;
				zlog << "Diagnostic-Boltzmann..." << std::endl;
			}
			program::boltzmann_dist();
			break;

	    case 51:
		  	if(vmpi::my_rank==0){
		       std::cout << "Setting..." << std::endl;
		       zlog << "Setting..." << std::endl;
	     	}
		  	program::setting_process();
		    break;

		default:{
			std::cerr << "Unknown Internal Program ID "<< sim::program << " requested, exiting" << std::endl;
			zlog << "Unknown Internal Program ID "<< sim::program << " requested, exiting" << std::endl;
			exit (EXIT_FAILURE);
			}
	}

   std::cout <<     "Simulation run time [s]: " << stopwatch.elapsed_seconds() << std::endl;
   zlog << zTs() << "Simulation run time [s]: " << stopwatch.elapsed_seconds() << std::endl;

   //------------------------------------------------
   // Output Monte Carlo statistics if applicable
   //------------------------------------------------
   if(sim::integrator==1){
      std::cout << "Monte Carlo statistics:" << std::endl;
      std::cout << "\tTotal moves: " << long(sim::mc_statistics_moves) << std::endl;
      std::cout << "\t" << ((sim::mc_statistics_moves - sim::mc_statistics_reject)/sim::mc_statistics_moves)*100.0 << "% Accepted" << std::endl;
      std::cout << "\t" << (sim::mc_statistics_reject/sim::mc_statistics_moves)*100.0                              << "% Rejected" << std::endl;
      zlog << zTs() << "Monte Carlo statistics:" << std::endl;
      zlog << zTs() << "\tTotal moves: " << sim::mc_statistics_moves << std::endl;
      zlog << zTs() << "\t" << ((sim::mc_statistics_moves - sim::mc_statistics_reject)/sim::mc_statistics_moves)*100.0 << "% Accepted" << std::endl;
      zlog << zTs() << "\t" << (sim::mc_statistics_reject/sim::mc_statistics_moves)*100.0                              << "% Rejected" << std::endl;
   }
   if(sim::integrator==3 || sim::integrator==4){
      std::cout << "Constrained Monte Carlo statistics:" << std::endl;
      std::cout << "\tTotal moves: " << cmc::mc_total << std::endl;
      std::cout << "\t" << (cmc::mc_success/cmc::mc_total)*100.0    << "% Accepted" << std::endl;
      std::cout << "\t" << (cmc::energy_reject/cmc::mc_total)*100.0 << "% Rejected (Energy)" << std::endl;
      std::cout << "\t" << (cmc::sphere_reject/cmc::mc_total)*100.0 << "% Rejected (Sphere)" << std::endl;
      zlog << zTs() << "Constrained Monte Carlo statistics:" << std::endl;
      zlog << zTs() << "\tTotal moves: " << cmc::mc_total << std::endl;
      zlog << zTs() << "\t" << (cmc::mc_success/cmc::mc_total)*100.0    << "% Accepted" << std::endl;
      zlog << zTs() << "\t" << (cmc::energy_reject/cmc::mc_total)*100.0 << "% Rejected (Energy)" << std::endl;
      zlog << zTs() << "\t" << (cmc::sphere_reject/cmc::mc_total)*100.0 << "% Rejected (Sphere)" << std::endl;
   }

	//program::LLB_Boltzmann();

   // De-initialize GPU
   if(gpu::acceleration) gpu::finalize();

   // optionally save checkpoint file
   if(sim::save_checkpoint_flag && !sim::save_checkpoint_continuous_flag) save_checkpoint();

	return EXIT_SUCCESS;
}