예제 #1
0
void pme_gpu_launch_complex_transforms(gmx_pme_t      *pme,
                                       gmx_wallcycle  *wcycle)
{
    PmeGpu            *pmeGpu                 = pme->gpu;
    const bool         computeEnergyAndVirial = (pmeGpu->settings.currentFlags & GMX_PME_CALC_ENER_VIR) != 0;
    const bool         performBackFFT         = (pmeGpu->settings.currentFlags & (GMX_PME_CALC_F | GMX_PME_CALC_POT)) != 0;
    const unsigned int gridIndex              = 0;
    t_complex         *cfftgrid               = pme->cfftgrid[gridIndex];

    if (pmeGpu->settings.currentFlags & GMX_PME_SPREAD)
    {
        if (!pme_gpu_performs_FFT(pmeGpu))
        {
            wallcycle_start(wcycle, ewcWAIT_GPU_PME_SPREAD);
            pme_gpu_sync_spread_grid(pme->gpu);
            wallcycle_stop(wcycle, ewcWAIT_GPU_PME_SPREAD);
        }
    }

    try
    {
        if (pmeGpu->settings.currentFlags & GMX_PME_SOLVE)
        {
            /* do R2C 3D-FFT */
            parallel_3dfft_execute_gpu_wrapper(pme, gridIndex, GMX_FFT_REAL_TO_COMPLEX, wcycle);

            /* solve in k-space for our local cells */
            if (pme_gpu_performs_solve(pmeGpu))
            {
                const auto gridOrdering = pme_gpu_uses_dd(pmeGpu) ? GridOrdering::YZX : GridOrdering::XYZ;
                wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU);
                wallcycle_sub_start_nocount(wcycle, ewcsLAUNCH_GPU_PME);
                pme_gpu_solve(pmeGpu, cfftgrid, gridOrdering, computeEnergyAndVirial);
                wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_PME);
                wallcycle_stop(wcycle, ewcLAUNCH_GPU);
            }
            else
            {
                wallcycle_start(wcycle, ewcPME_SOLVE_MIXED_MODE);
#pragma omp parallel for num_threads(pme->nthread) schedule(static)
                for (int thread = 0; thread < pme->nthread; thread++)
                {
                    solve_pme_yzx(pme, cfftgrid, pme->boxVolume,
                                  computeEnergyAndVirial, pme->nthread, thread);
                }
                wallcycle_stop(wcycle, ewcPME_SOLVE_MIXED_MODE);
            }
        }

        if (performBackFFT)
        {
            parallel_3dfft_execute_gpu_wrapper(pme, gridIndex, GMX_FFT_COMPLEX_TO_REAL, wcycle);
        }
    } GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
}
예제 #2
0
/*! \brief
 * A convenience wrapper for launching either the GPU or CPU FFT.
 *
 * \param[in] pme            The PME structure.
 * \param[in] gridIndex      The grid index - should currently always be 0.
 * \param[in] dir            The FFT direction enum.
 * \param[in] wcycle         The wallclock counter.
 */
void inline parallel_3dfft_execute_gpu_wrapper(gmx_pme_t              *pme,
                                               const int               gridIndex,
                                               enum gmx_fft_direction  dir,
                                               gmx_wallcycle_t         wcycle)
{
    GMX_ASSERT(gridIndex == 0, "Only single grid supported");
    if (pme_gpu_performs_FFT(pme->gpu))
    {
        wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU);
        wallcycle_sub_start_nocount(wcycle, ewcsLAUNCH_GPU_PME);
        pme_gpu_3dfft(pme->gpu, dir, gridIndex);
        wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_PME);
        wallcycle_stop(wcycle, ewcLAUNCH_GPU);
    }
    else
    {
        wallcycle_start(wcycle, ewcPME_FFT_MIXED_MODE);
#pragma omp parallel for num_threads(pme->nthread) schedule(static)
        for (int thread = 0; thread < pme->nthread; thread++)
        {
            gmx_parallel_3dfft_execute(pme->pfft_setup[gridIndex], dir, thread, wcycle);
        }
        wallcycle_stop(wcycle, ewcPME_FFT_MIXED_MODE);
    }
}
예제 #3
0
void pme_gpu_launch_spread(gmx_pme_t            *pme,
                           const rvec           *x,
                           gmx_wallcycle        *wcycle)
{
    GMX_ASSERT(pme_gpu_active(pme), "This should be a GPU run of PME but it is not enabled.");

    PmeGpu *pmeGpu = pme->gpu;

    // The only spot of PME GPU where LAUNCH_GPU counter increases call-count
    wallcycle_start(wcycle, ewcLAUNCH_GPU);
    // The only spot of PME GPU where ewcsLAUNCH_GPU_PME subcounter increases call-count
    wallcycle_sub_start(wcycle, ewcsLAUNCH_GPU_PME);
    pme_gpu_copy_input_coordinates(pmeGpu, x);
    wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_PME);
    wallcycle_stop(wcycle, ewcLAUNCH_GPU);

    const unsigned int gridIndex  = 0;
    real              *fftgrid    = pme->fftgrid[gridIndex];
    if (pmeGpu->settings.currentFlags & GMX_PME_SPREAD)
    {
        /* Spread the coefficients on a grid */
        const bool computeSplines = true;
        const bool spreadCharges  = true;
        wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU);
        wallcycle_sub_start_nocount(wcycle, ewcsLAUNCH_GPU_PME);
        pme_gpu_spread(pmeGpu, gridIndex, fftgrid, computeSplines, spreadCharges);
        wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_PME);
        wallcycle_stop(wcycle, ewcLAUNCH_GPU);
    }
}
예제 #4
0
void mdoutf_tng_close(gmx_mdoutf_t of)
{
    if (of->tng || of->tng_low_prec)
    {
        wallcycle_start(of->wcycle, ewcTRAJ);
        gmx_tng_close(&of->tng);
        gmx_tng_close(&of->tng_low_prec);
        wallcycle_stop(of->wcycle, ewcTRAJ);
    }
}
예제 #5
0
void wallcycle_start_nocount(gmx_wallcycle_t wc, int ewc)
{
    if (wc == NULL)
    {
        return;
    }

    wallcycle_start(wc, ewc);
    wc->wcc[ewc].n--;
}
예제 #6
0
static void reset_pmeonly_counters(gmx_wallcycle_t           wcycle,
                                   gmx_walltime_accounting_t walltime_accounting,
                                   t_nrnb                   *nrnb,
                                   int64_t                   step,
                                   bool                      useGpuForPme)
{
    /* Reset all the counters related to performance over the run */
    wallcycle_stop(wcycle, ewcRUN);
    wallcycle_reset_all(wcycle);
    init_nrnb(nrnb);
    wallcycle_start(wcycle, ewcRUN);
    walltime_accounting_reset_time(walltime_accounting, step);

    if (useGpuForPme)
    {
        resetGpuProfiler();
    }
}
예제 #7
0
static void reset_pmeonly_counters(gmx_wallcycle_t wcycle,
                                   gmx_walltime_accounting_t walltime_accounting,
                                   t_nrnb *nrnb, t_inputrec *ir,
                                   gmx_int64_t step)
{
    /* Reset all the counters related to performance over the run */
    wallcycle_stop(wcycle, ewcRUN);
    wallcycle_reset_all(wcycle);
    init_nrnb(nrnb);
    if (ir->nsteps >= 0)
    {
        /* ir->nsteps is not used here, but we update it for consistency */
        ir->nsteps -= step - ir->init_step;
    }
    ir->init_step = step;
    wallcycle_start(wcycle, ewcRUN);
    walltime_accounting_start(walltime_accounting);
}
예제 #8
0
bool pme_gpu_try_finish_task(const gmx_pme_t                *pme,
                             gmx_wallcycle                  *wcycle,
                             gmx::ArrayRef<const gmx::RVec> *forces,
                             matrix                          virial,
                             real                           *energy,
                             GpuTaskCompletion               completionKind)
{
    GMX_ASSERT(pme_gpu_active(pme), "This should be a GPU run of PME but it is not enabled.");

    wallcycle_start_nocount(wcycle, ewcWAIT_GPU_PME_GATHER);

    if (completionKind == GpuTaskCompletion::Check)
    {
        // Query the PME stream for completion of all tasks enqueued and
        // if we're not done, stop the timer before early return.
        if (!pme_gpu_stream_query(pme->gpu))
        {
            wallcycle_stop(wcycle, ewcWAIT_GPU_PME_GATHER);
            return false;
        }
    }
    else
    {
        // Synchronize the whole PME stream at once, including D2H result transfers.
        pme_gpu_synchronize(pme->gpu);
    }
    wallcycle_stop(wcycle, ewcWAIT_GPU_PME_GATHER);

    // Time the final staged data handling separately with a counting call to get
    // the call count right.
    wallcycle_start(wcycle, ewcWAIT_GPU_PME_GATHER);
    pme_gpu_update_timings(pme->gpu);
    pme_gpu_get_staged_results(pme, forces, virial, energy);
    wallcycle_stop(wcycle, ewcWAIT_GPU_PME_GATHER);

    return true;
}
예제 #9
0
void
do_md_trajectory_writing(FILE           *fplog,
                         t_commrec      *cr,
                         int             nfile,
                         const t_filenm  fnm[],
                         gmx_int64_t     step,
                         gmx_int64_t     step_rel,
                         double          t,
                         t_inputrec     *ir,
                         t_state        *state,
                         t_state        *state_global,
                         gmx_mtop_t     *top_global,
                         t_forcerec     *fr,
                         gmx_mdoutf_t    outf,
                         t_mdebin       *mdebin,
                         gmx_ekindata_t *ekind,
                         rvec           *f,
                         rvec           *f_global,
                         int            *nchkpt,
                         gmx_bool        bCPT,
                         gmx_bool        bRerunMD,
                         gmx_bool        bLastStep,
                         gmx_bool        bDoConfOut,
                         gmx_bool        bSumEkinhOld
                        )
{
    int   mdof_flags;

    mdof_flags = 0;
    if (do_per_step(step, ir->nstxout))
    {
        mdof_flags |= MDOF_X;
    }
    if (do_per_step(step, ir->nstvout))
    {
        mdof_flags |= MDOF_V;
    }
    if (do_per_step(step, ir->nstfout))
    {
        mdof_flags |= MDOF_F;
    }
    if (do_per_step(step, ir->nstxout_compressed))
    {
        mdof_flags |= MDOF_X_COMPRESSED;
    }
    if (bCPT)
    {
        mdof_flags |= MDOF_CPT;
    }
    ;

#if defined(GMX_FAHCORE) || defined(GMX_WRITELASTSTEP)
    if (bLastStep)
    {
        /* Enforce writing positions and velocities at end of run */
        mdof_flags |= (MDOF_X | MDOF_V);
    }
#endif
#ifdef GMX_FAHCORE
    if (MASTER(cr))
    {
        fcReportProgress( ir->nsteps, step );
    }

#if defined(__native_client__)
    fcCheckin(MASTER(cr));
#endif

    /* sync bCPT and fc record-keeping */
    if (bCPT && MASTER(cr))
    {
        fcRequestCheckPoint();
    }
#endif

    if (mdof_flags != 0)
    {
        wallcycle_start(mdoutf_get_wcycle(outf), ewcTRAJ);
        if (bCPT)
        {
            if (MASTER(cr))
            {
                if (bSumEkinhOld)
                {
                    state_global->ekinstate.bUpToDate = FALSE;
                }
                else
                {
                    update_ekinstate(&state_global->ekinstate, ekind);
                    state_global->ekinstate.bUpToDate = TRUE;
                }
                update_energyhistory(&state_global->enerhist, mdebin);
            }
        }
        mdoutf_write_to_trajectory_files(fplog, cr, outf, mdof_flags, top_global,
                                         step, t, state, state_global, f, f_global);
        if (bCPT)
        {
            (*nchkpt)++;
        }
        debug_gmx();
        if (bLastStep && step_rel == ir->nsteps &&
                bDoConfOut && MASTER(cr) &&
                !bRerunMD)
        {
            /* x and v have been collected in mdoutf_write_to_trajectory_files,
             * because a checkpoint file will always be written
             * at the last step.
             */
            fprintf(stderr, "\nWriting final coordinates.\n");
            if (fr->bMolPBC)
            {
                /* Make molecules whole only for confout writing */
                do_pbc_mtop(fplog, ir->ePBC, state->box, top_global, state_global->x);
            }
            write_sto_conf_mtop(ftp2fn(efSTO, nfile, fnm),
                                *top_global->name, top_global,
                                state_global->x, state_global->v,
                                ir->ePBC, state->box);
            debug_gmx();
        }
        wallcycle_stop(mdoutf_get_wcycle(outf), ewcTRAJ);
    }
}
예제 #10
0
void compute_globals(FILE *fplog, gmx_global_stat_t gstat, t_commrec *cr, t_inputrec *ir,
                     t_forcerec *fr, gmx_ekindata_t *ekind,
                     t_state *state, t_state *state_global, t_mdatoms *mdatoms,
                     t_nrnb *nrnb, t_vcm *vcm, gmx_wallcycle_t wcycle,
                     gmx_enerdata_t *enerd, tensor force_vir, tensor shake_vir, tensor total_vir,
                     tensor pres, rvec mu_tot, gmx_constr_t constr,
                     globsig_t *gs, gmx_bool bInterSimGS,
                     matrix box, gmx_mtop_t *top_global,
                     gmx_bool *bSumEkinhOld, int flags)
{
    int      i, gsi;
    real     gs_buf[eglsNR];
    tensor   corr_vir, corr_pres;
    gmx_bool bEner, bPres, bTemp;
    gmx_bool bStopCM, bGStat,
             bReadEkin, bEkinAveVel, bScaleEkin, bConstrain;
    real     prescorr, enercorr, dvdlcorr, dvdl_ekin;

    /* translate CGLO flags to gmx_booleans */
    bStopCM       = flags & CGLO_STOPCM;
    bGStat        = flags & CGLO_GSTAT;
    bReadEkin     = (flags & CGLO_READEKIN);
    bScaleEkin    = (flags & CGLO_SCALEEKIN);
    bEner         = flags & CGLO_ENERGY;
    bTemp         = flags & CGLO_TEMPERATURE;
    bPres         = (flags & CGLO_PRESSURE);
    bConstrain    = (flags & CGLO_CONSTRAINT);

    /* we calculate a full state kinetic energy either with full-step velocity verlet
       or half step where we need the pressure */

    bEkinAveVel = (ir->eI == eiVV || (ir->eI == eiVVAK && bPres) || bReadEkin);

    /* in initalization, it sums the shake virial in vv, and to
       sums ekinh_old in leapfrog (or if we are calculating ekinh_old) for other reasons */

    /* ########## Kinetic energy  ############## */

    if (bTemp)
    {
        /* Non-equilibrium MD: this is parallellized, but only does communication
         * when there really is NEMD.
         */

        if (PAR(cr) && (ekind->bNEMD))
        {
            accumulate_u(cr, &(ir->opts), ekind);
        }
        debug_gmx();
        if (bReadEkin)
        {
            restore_ekinstate_from_state(cr, ekind, &state_global->ekinstate);
        }
        else
        {
            calc_ke_part(state, &(ir->opts), mdatoms, ekind, nrnb, bEkinAveVel);
        }

        debug_gmx();
    }

    /* Calculate center of mass velocity if necessary, also parallellized */
    if (bStopCM)
    {
        calc_vcm_grp(0, mdatoms->homenr, mdatoms,
                     state->x, state->v, vcm);
    }

    if (bTemp || bStopCM || bPres || bEner || bConstrain)
    {
        if (!bGStat)
        {
            /* We will not sum ekinh_old,
             * so signal that we still have to do it.
             */
            *bSumEkinhOld = TRUE;

        }
        else
        {
            if (gs != NULL)
            {
                for (i = 0; i < eglsNR; i++)
                {
                    gs_buf[i] = gs->sig[i];
                }
            }
            if (PAR(cr))
            {
                wallcycle_start(wcycle, ewcMoveE);
                global_stat(fplog, gstat, cr, enerd, force_vir, shake_vir, mu_tot,
                            ir, ekind, constr, bStopCM ? vcm : NULL,
                            gs != NULL ? eglsNR : 0, gs_buf,
                            top_global, state,
                            *bSumEkinhOld, flags);
                wallcycle_stop(wcycle, ewcMoveE);
            }
            if (gs != NULL)
            {
                if (MULTISIM(cr) && bInterSimGS)
                {
                    if (MASTER(cr))
                    {
                        /* Communicate the signals between the simulations */
                        gmx_sum_sim(eglsNR, gs_buf, cr->ms);
                    }
                    /* Communicate the signals form the master to the others */
                    gmx_bcast(eglsNR*sizeof(gs_buf[0]), gs_buf, cr);
                }
                for (i = 0; i < eglsNR; i++)
                {
                    if (bInterSimGS || gs_simlocal[i])
                    {
                        /* Set the communicated signal only when it is non-zero,
                         * since signals might not be processed at each MD step.
                         */
                        gsi = (gs_buf[i] >= 0 ?
                               (int)(gs_buf[i] + 0.5) :
                               (int)(gs_buf[i] - 0.5));
                        if (gsi != 0)
                        {
                            gs->set[i] = gsi;
                        }
                        /* Turn off the local signal */
                        gs->sig[i] = 0;
                    }
                }
            }
            *bSumEkinhOld = FALSE;
        }
    }

    if (!ekind->bNEMD && debug && bTemp && (vcm->nr > 0))
    {
        correct_ekin(debug,
                     0, mdatoms->homenr,
                     state->v, vcm->group_p[0],
                     mdatoms->massT, mdatoms->tmass, ekind->ekin);
    }

    /* Do center of mass motion removal */
    if (bStopCM)
    {
        check_cm_grp(fplog, vcm, ir, 1);
        do_stopcm_grp(0, mdatoms->homenr, mdatoms->cVCM,
                      state->x, state->v, vcm);
        inc_nrnb(nrnb, eNR_STOPCM, mdatoms->homenr);
    }

    if (bEner)
    {
        /* Calculate the amplitude of the cosine velocity profile */
        ekind->cosacc.vcos = ekind->cosacc.mvcos/mdatoms->tmass;
    }

    if (bTemp)
    {
        /* Sum the kinetic energies of the groups & calc temp */
        /* compute full step kinetic energies if vv, or if vv-avek and we are computing the pressure with IR_NPT_TROTTER */
        /* three maincase:  VV with AveVel (md-vv), vv with AveEkin (md-vv-avek), leap with AveEkin (md).
           Leap with AveVel is not supported; it's not clear that it will actually work.
           bEkinAveVel: If TRUE, we simply multiply ekin by ekinscale to get a full step kinetic energy.
           If FALSE, we average ekinh_old and ekinh*ekinscale_nhc to get an averaged half step kinetic energy.
         */
        enerd->term[F_TEMP] = sum_ekin(&(ir->opts), ekind, &dvdl_ekin,
                                       bEkinAveVel, bScaleEkin);
        enerd->dvdl_lin[efptMASS] = (double) dvdl_ekin;

        enerd->term[F_EKIN] = trace(ekind->ekin);
    }

    /* ##########  Long range energy information ###### */

    if (bEner || bPres || bConstrain)
    {
        calc_dispcorr(ir, fr, top_global->natoms, box, state->lambda[efptVDW],
                      corr_pres, corr_vir, &prescorr, &enercorr, &dvdlcorr);
    }

    if (bEner)
    {
        enerd->term[F_DISPCORR]  = enercorr;
        enerd->term[F_EPOT]     += enercorr;
        enerd->term[F_DVDL_VDW] += dvdlcorr;
    }

    /* ########## Now pressure ############## */
    if (bPres || bConstrain)
    {

        m_add(force_vir, shake_vir, total_vir);

        /* Calculate pressure and apply LR correction if PPPM is used.
         * Use the box from last timestep since we already called update().
         */

        enerd->term[F_PRES] = calc_pres(fr->ePBC, ir->nwall, box, ekind->ekin, total_vir, pres);

        /* Calculate long range corrections to pressure and energy */
        /* this adds to enerd->term[F_PRES] and enerd->term[F_ETOT],
           and computes enerd->term[F_DISPCORR].  Also modifies the
           total_vir and pres tesors */

        m_add(total_vir, corr_vir, total_vir);
        m_add(pres, corr_pres, pres);
        enerd->term[F_PDISPCORR] = prescorr;
        enerd->term[F_PRES]     += prescorr;
    }
}
예제 #11
0
void
do_md_trajectory_writing(FILE           *fplog,
                         t_commrec      *cr,
                         int             nfile,
                         const t_filenm  fnm[],
                         gmx_int64_t     step,
                         gmx_int64_t     step_rel,
                         double          t,
                         t_inputrec     *ir,
                         t_state        *state,
                         t_state        *state_global,
                         gmx_mtop_t     *top_global,
                         t_forcerec     *fr,
                         gmx_mdoutf_t    outf,
                         t_mdebin       *mdebin,
                         gmx_ekindata_t *ekind,
                         rvec           *f,
                         rvec           *f_global,
                         int            *nchkpt,
                         gmx_bool        bCPT,
                         gmx_bool        bRerunMD,
                         gmx_bool        bLastStep,
                         gmx_bool        bDoConfOut,
                         gmx_bool        bSumEkinhOld
                         )
{
    int   mdof_flags;
    rvec *x_for_confout = NULL;

    mdof_flags = 0;
    if (do_per_step(step, ir->nstxout))
    {
        mdof_flags |= MDOF_X;
    }
    if (do_per_step(step, ir->nstvout))
    {
        mdof_flags |= MDOF_V;
    }
    if (do_per_step(step, ir->nstfout))
    {
        mdof_flags |= MDOF_F;
    }
    if (do_per_step(step, ir->nstxout_compressed))
    {
        mdof_flags |= MDOF_X_COMPRESSED;
    }
    if (bCPT)
    {
        mdof_flags |= MDOF_CPT;
    }
    ;

#if defined(GMX_FAHCORE)
    if (bLastStep)
    {
        /* Enforce writing positions and velocities at end of run */
        mdof_flags |= (MDOF_X | MDOF_V);
    }
    if (MASTER(cr))
    {
        fcReportProgress( ir->nsteps, step );
    }

#if defined(__native_client__)
    fcCheckin(MASTER(cr));
#endif

    /* sync bCPT and fc record-keeping */
    if (bCPT && MASTER(cr))
    {
        fcRequestCheckPoint();
    }
#endif

    if (mdof_flags != 0)
    {
        wallcycle_start(mdoutf_get_wcycle(outf), ewcTRAJ);
        if (bCPT)
        {
            if (MASTER(cr))
            {
                if (bSumEkinhOld)
                {
                    state_global->ekinstate.bUpToDate = FALSE;
                }
                else
                {
                    update_ekinstate(&state_global->ekinstate, ekind);
                    state_global->ekinstate.bUpToDate = TRUE;
                }
                update_energyhistory(state_global->enerhist, mdebin);
            }
        }
        mdoutf_write_to_trajectory_files(fplog, cr, outf, mdof_flags, top_global,
                                         step, t, state, state_global, f, f_global);
        if (bCPT)
        {
            (*nchkpt)++;
        }
        debug_gmx();
        if (bLastStep && step_rel == ir->nsteps &&
            bDoConfOut && MASTER(cr) &&
            !bRerunMD)
        {
            if (fr->bMolPBC && state->x == state_global->x)
            {
                /* This (single-rank) run needs to allocate a
                   temporary array of size natoms so that any
                   periodicity removal for mdrun -confout does not
                   perturb the update and thus the final .edr
                   output. This makes .cpt restarts look binary
                   identical, and makes .edr restarts binary
                   identical. */
                snew(x_for_confout, state_global->natoms);
                copy_rvecn(state_global->x, x_for_confout, 0, state_global->natoms);
            }
            else
            {
                /* With DD, or no bMolPBC, it doesn't matter if
                   we change state_global->x */
                x_for_confout = state_global->x;
            }

            /* x and v have been collected in mdoutf_write_to_trajectory_files,
             * because a checkpoint file will always be written
             * at the last step.
             */
            fprintf(stderr, "\nWriting final coordinates.\n");
            if (fr->bMolPBC)
            {
                /* Make molecules whole only for confout writing */
                do_pbc_mtop(fplog, ir->ePBC, state->box, top_global, x_for_confout);
            }
            write_sto_conf_mtop(ftp2fn(efSTO, nfile, fnm),
                                *top_global->name, top_global,
                                x_for_confout, state_global->v,
                                ir->ePBC, state->box);
            if (fr->bMolPBC && state->x == state_global->x)
            {
                sfree(x_for_confout);
            }
            debug_gmx();
        }
        wallcycle_stop(mdoutf_get_wcycle(outf), ewcTRAJ);
    }
}
예제 #12
0
파일: force.c 프로젝트: yhalcyon/Gromacs
void do_force_lowlevel(FILE       *fplog,   gmx_large_int_t step,
                       t_forcerec *fr,      t_inputrec *ir,
                       t_idef     *idef,    t_commrec  *cr,
                       t_nrnb     *nrnb,    gmx_wallcycle_t wcycle,
                       t_mdatoms  *md,
                       t_grpopts  *opts,
                       rvec       x[],      history_t  *hist,
                       rvec       f[],
                       rvec       f_longrange[],
                       gmx_enerdata_t *enerd,
                       t_fcdata   *fcd,
                       gmx_mtop_t     *mtop,
                       gmx_localtop_t *top,
                       gmx_genborn_t *born,
                       t_atomtypes *atype,
                       gmx_bool       bBornRadii,
                       matrix     box,
                       t_lambda   *fepvals,
                       real       *lambda,
                       t_graph    *graph,
                       t_blocka   *excl,
                       rvec       mu_tot[],
                       int        flags,
                       float      *cycles_pme)
{
    int         i, j, status;
    int         donb_flags;
    gmx_bool    bDoEpot, bSepDVDL, bSB;
    int         pme_flags;
    matrix      boxs;
    rvec        box_size;
    real        Vsr, Vlr, Vcorr = 0;
    t_pbc       pbc;
    real        dvdgb;
    char        buf[22];
    double      clam_i, vlam_i;
    real        dvdl_dum[efptNR], dvdl, dvdl_nb[efptNR], lam_i[efptNR];
    real        dvdlsum;

#ifdef GMX_MPI
    double  t0 = 0.0, t1, t2, t3; /* time measurement for coarse load balancing */
#endif

#define PRINT_SEPDVDL(s, v, dvdlambda) if (bSepDVDL) {fprintf(fplog, sepdvdlformat, s, v, dvdlambda); }

    GMX_MPE_LOG(ev_force_start);
    set_pbc(&pbc, fr->ePBC, box);

    /* reset free energy components */
    for (i = 0; i < efptNR; i++)
    {
        dvdl_nb[i]  = 0;
        dvdl_dum[i] = 0;
    }

    /* Reset box */
    for (i = 0; (i < DIM); i++)
    {
        box_size[i] = box[i][i];
    }

    bSepDVDL = (fr->bSepDVDL && do_per_step(step, ir->nstlog));
    debug_gmx();

    /* do QMMM first if requested */
    if (fr->bQMMM)
    {
        enerd->term[F_EQM] = calculate_QMMM(cr, x, f, fr, md);
    }

    if (bSepDVDL)
    {
        fprintf(fplog, "Step %s: non-bonded V and dVdl for node %d:\n",
                gmx_step_str(step, buf), cr->nodeid);
    }

    /* Call the short range functions all in one go. */
    GMX_MPE_LOG(ev_do_fnbf_start);

#ifdef GMX_MPI
    /*#define TAKETIME ((cr->npmenodes) && (fr->timesteps < 12))*/
#define TAKETIME FALSE
    if (TAKETIME)
    {
        MPI_Barrier(cr->mpi_comm_mygroup);
        t0 = MPI_Wtime();
    }
#endif

    if (ir->nwall)
    {
        /* foreign lambda component for walls */
        dvdl = do_walls(ir, fr, box, md, x, f, lambda[efptVDW],
                        enerd->grpp.ener[egLJSR], nrnb);
        PRINT_SEPDVDL("Walls", 0.0, dvdl);
        enerd->dvdl_lin[efptVDW] += dvdl;
    }

    /* If doing GB, reset dvda and calculate the Born radii */
    if (ir->implicit_solvent)
    {
        wallcycle_sub_start(wcycle, ewcsNONBONDED);

        for (i = 0; i < born->nr; i++)
        {
            fr->dvda[i] = 0;
        }

        if (bBornRadii)
        {
            calc_gb_rad(cr, fr, ir, top, atype, x, &(fr->gblist), born, md, nrnb);
        }

        wallcycle_sub_stop(wcycle, ewcsNONBONDED);
    }

    where();
    /* We only do non-bonded calculation with group scheme here, the verlet
     * calls are done from do_force_cutsVERLET(). */
    if (fr->cutoff_scheme == ecutsGROUP && (flags & GMX_FORCE_NONBONDED))
    {
        donb_flags = 0;
        /* Add short-range interactions */
        donb_flags |= GMX_NONBONDED_DO_SR;

        if (flags & GMX_FORCE_FORCES)
        {
            donb_flags |= GMX_NONBONDED_DO_FORCE;
        }
        if (flags & GMX_FORCE_ENERGY)
        {
            donb_flags |= GMX_NONBONDED_DO_POTENTIAL;
        }
        if (flags & GMX_FORCE_DO_LR)
        {
            donb_flags |= GMX_NONBONDED_DO_LR;
        }

        wallcycle_sub_start(wcycle, ewcsNONBONDED);
        do_nonbonded(cr, fr, x, f, f_longrange, md, excl,
                     &enerd->grpp, box_size, nrnb,
                     lambda, dvdl_nb, -1, -1, donb_flags);

        /* If we do foreign lambda and we have soft-core interactions
         * we have to recalculate the (non-linear) energies contributions.
         */
        if (fepvals->n_lambda > 0 && (flags & GMX_FORCE_DHDL) && fepvals->sc_alpha != 0)
        {
            for (i = 0; i < enerd->n_lambda; i++)
            {
                for (j = 0; j < efptNR; j++)
                {
                    lam_i[j] = (i == 0 ? lambda[j] : fepvals->all_lambda[j][i-1]);
                }
                reset_foreign_enerdata(enerd);
                do_nonbonded(cr, fr, x, f, f_longrange, md, excl,
                             &(enerd->foreign_grpp), box_size, nrnb,
                             lam_i, dvdl_dum, -1, -1,
                             (donb_flags & ~GMX_NONBONDED_DO_FORCE) | GMX_NONBONDED_DO_FOREIGNLAMBDA);
                sum_epot(&ir->opts, &(enerd->foreign_grpp), enerd->foreign_term);
                enerd->enerpart_lambda[i] += enerd->foreign_term[F_EPOT];
            }
        }
        wallcycle_sub_stop(wcycle, ewcsNONBONDED);
        where();
    }

    /* If we are doing GB, calculate bonded forces and apply corrections
     * to the solvation forces */
    /* MRS: Eventually, many need to include free energy contribution here! */
    if (ir->implicit_solvent)
    {
        wallcycle_sub_start(wcycle, ewcsBONDED);
        calc_gb_forces(cr, md, born, top, atype, x, f, fr, idef,
                       ir->gb_algorithm, ir->sa_algorithm, nrnb, bBornRadii, &pbc, graph, enerd);
        wallcycle_sub_stop(wcycle, ewcsBONDED);
    }

#ifdef GMX_MPI
    if (TAKETIME)
    {
        t1          = MPI_Wtime();
        fr->t_fnbf += t1-t0;
    }
#endif

    if (fepvals->sc_alpha != 0)
    {
        enerd->dvdl_nonlin[efptVDW] += dvdl_nb[efptVDW];
    }
    else
    {
        enerd->dvdl_lin[efptVDW] += dvdl_nb[efptVDW];
    }

    if (fepvals->sc_alpha != 0)

    /* even though coulomb part is linear, we already added it, beacuse we
       need to go through the vdw calculation anyway */
    {
        enerd->dvdl_nonlin[efptCOUL] += dvdl_nb[efptCOUL];
    }
    else
    {
        enerd->dvdl_lin[efptCOUL] += dvdl_nb[efptCOUL];
    }

    Vsr = 0;
    if (bSepDVDL)
    {
        for (i = 0; i < enerd->grpp.nener; i++)
        {
            Vsr +=
                (fr->bBHAM ?
                 enerd->grpp.ener[egBHAMSR][i] :
                 enerd->grpp.ener[egLJSR][i])
                + enerd->grpp.ener[egCOULSR][i] + enerd->grpp.ener[egGB][i];
        }
        dvdlsum = dvdl_nb[efptVDW] + dvdl_nb[efptCOUL];
        PRINT_SEPDVDL("VdW and Coulomb SR particle-p.", Vsr, dvdlsum);
    }
    debug_gmx();

    GMX_MPE_LOG(ev_do_fnbf_finish);

    if (debug)
    {
        pr_rvecs(debug, 0, "fshift after SR", fr->fshift, SHIFTS);
    }

    /* Shift the coordinates. Must be done before bonded forces and PPPM,
     * but is also necessary for SHAKE and update, therefore it can NOT
     * go when no bonded forces have to be evaluated.
     */

    /* Here sometimes we would not need to shift with NBFonly,
     * but we do so anyhow for consistency of the returned coordinates.
     */
    if (graph)
    {
        shift_self(graph, box, x);
        if (TRICLINIC(box))
        {
            inc_nrnb(nrnb, eNR_SHIFTX, 2*graph->nnodes);
        }
        else
        {
            inc_nrnb(nrnb, eNR_SHIFTX, graph->nnodes);
        }
    }
    /* Check whether we need to do bondeds or correct for exclusions */
    if (fr->bMolPBC &&
        ((flags & GMX_FORCE_BONDED)
         || EEL_RF(fr->eeltype) || EEL_FULL(fr->eeltype)))
    {
        /* Since all atoms are in the rectangular or triclinic unit-cell,
         * only single box vector shifts (2 in x) are required.
         */
        set_pbc_dd(&pbc, fr->ePBC, cr->dd, TRUE, box);
    }
    debug_gmx();

    if (flags & GMX_FORCE_BONDED)
    {
        GMX_MPE_LOG(ev_calc_bonds_start);

        wallcycle_sub_start(wcycle, ewcsBONDED);
        calc_bonds(fplog, cr->ms,
                   idef, x, hist, f, fr, &pbc, graph, enerd, nrnb, lambda, md, fcd,
                   DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL, atype, born,
                   flags,
                   fr->bSepDVDL && do_per_step(step, ir->nstlog), step);

        /* Check if we have to determine energy differences
         * at foreign lambda's.
         */
        if (fepvals->n_lambda > 0 && (flags & GMX_FORCE_DHDL) &&
            idef->ilsort != ilsortNO_FE)
        {
            if (idef->ilsort != ilsortFE_SORTED)
            {
                gmx_incons("The bonded interactions are not sorted for free energy");
            }
            for (i = 0; i < enerd->n_lambda; i++)
            {
                reset_foreign_enerdata(enerd);
                for (j = 0; j < efptNR; j++)
                {
                    lam_i[j] = (i == 0 ? lambda[j] : fepvals->all_lambda[j][i-1]);
                }
                calc_bonds_lambda(fplog, idef, x, fr, &pbc, graph, &(enerd->foreign_grpp), enerd->foreign_term, nrnb, lam_i, md,
                                  fcd, DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL);
                sum_epot(&ir->opts, &(enerd->foreign_grpp), enerd->foreign_term);
                enerd->enerpart_lambda[i] += enerd->foreign_term[F_EPOT];
            }
        }
        debug_gmx();
        GMX_MPE_LOG(ev_calc_bonds_finish);
        wallcycle_sub_stop(wcycle, ewcsBONDED);
    }

    where();

    *cycles_pme = 0;
    if (EEL_FULL(fr->eeltype))
    {
        bSB = (ir->nwall == 2);
        if (bSB)
        {
            copy_mat(box, boxs);
            svmul(ir->wall_ewald_zfac, boxs[ZZ], boxs[ZZ]);
            box_size[ZZ] *= ir->wall_ewald_zfac;
        }

        clear_mat(fr->vir_el_recip);

        if (fr->bEwald)
        {
            Vcorr = 0;
            dvdl  = 0;

            /* With the Verlet scheme exclusion forces are calculated
             * in the non-bonded kernel.
             */
            /* The TPI molecule does not have exclusions with the rest
             * of the system and no intra-molecular PME grid contributions
             * will be calculated in gmx_pme_calc_energy.
             */
            if ((ir->cutoff_scheme == ecutsGROUP && fr->n_tpi == 0) ||
                ir->ewald_geometry != eewg3D ||
                ir->epsilon_surface != 0)
            {
                int nthreads, t;

                wallcycle_sub_start(wcycle, ewcsEWALD_CORRECTION);

                if (fr->n_tpi > 0)
                {
                    gmx_fatal(FARGS, "TPI with PME currently only works in a 3D geometry with tin-foil boundary conditions");
                }

                nthreads = gmx_omp_nthreads_get(emntBonded);
#pragma omp parallel for num_threads(nthreads) schedule(static)
                for (t = 0; t < nthreads; t++)
                {
                    int     s, e, i;
                    rvec   *fnv;
                    tensor *vir;
                    real   *Vcorrt, *dvdlt;
                    if (t == 0)
                    {
                        fnv    = fr->f_novirsum;
                        vir    = &fr->vir_el_recip;
                        Vcorrt = &Vcorr;
                        dvdlt  = &dvdl;
                    }
                    else
                    {
                        fnv    = fr->f_t[t].f;
                        vir    = &fr->f_t[t].vir;
                        Vcorrt = &fr->f_t[t].Vcorr;
                        dvdlt  = &fr->f_t[t].dvdl[efptCOUL];
                        for (i = 0; i < fr->natoms_force; i++)
                        {
                            clear_rvec(fnv[i]);
                        }
                        clear_mat(*vir);
                    }
                    *dvdlt  = 0;
                    *Vcorrt =
                        ewald_LRcorrection(fplog,
                                           fr->excl_load[t], fr->excl_load[t+1],
                                           cr, t, fr,
                                           md->chargeA,
                                           md->nChargePerturbed ? md->chargeB : NULL,
                                           ir->cutoff_scheme != ecutsVERLET,
                                           excl, x, bSB ? boxs : box, mu_tot,
                                           ir->ewald_geometry,
                                           ir->epsilon_surface,
                                           fnv, *vir,
                                           lambda[efptCOUL], dvdlt);
                }
                if (nthreads > 1)
                {
                    reduce_thread_forces(fr->natoms_force, fr->f_novirsum,
                                         fr->vir_el_recip,
                                         &Vcorr, efptCOUL, &dvdl,
                                         nthreads, fr->f_t);
                }

                wallcycle_sub_stop(wcycle, ewcsEWALD_CORRECTION);
            }

            if (fr->n_tpi == 0)
            {
                Vcorr += ewald_charge_correction(cr, fr, lambda[efptCOUL], box,
                                                 &dvdl, fr->vir_el_recip);
            }

            PRINT_SEPDVDL("Ewald excl./charge/dip. corr.", Vcorr, dvdl);
            enerd->dvdl_lin[efptCOUL] += dvdl;
        }

        status = 0;
        Vlr    = 0;
        dvdl   = 0;
        switch (fr->eeltype)
        {
            case eelPME:
            case eelPMESWITCH:
            case eelPMEUSER:
            case eelPMEUSERSWITCH:
            case eelP3M_AD:
                if (cr->duty & DUTY_PME)
                {
                    assert(fr->n_tpi >= 0);
                    if (fr->n_tpi == 0 || (flags & GMX_FORCE_STATECHANGED))
                    {
                        pme_flags = GMX_PME_SPREAD_Q | GMX_PME_SOLVE;
                        if (flags & GMX_FORCE_FORCES)
                        {
                            pme_flags |= GMX_PME_CALC_F;
                        }
                        if (flags & (GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY))
                        {
                            pme_flags |= GMX_PME_CALC_ENER_VIR;
                        }
                        if (fr->n_tpi > 0)
                        {
                            /* We don't calculate f, but we do want the potential */
                            pme_flags |= GMX_PME_CALC_POT;
                        }
                        wallcycle_start(wcycle, ewcPMEMESH);
                        status = gmx_pme_do(fr->pmedata,
                                            md->start, md->homenr - fr->n_tpi,
                                            x, fr->f_novirsum,
                                            md->chargeA, md->chargeB,
                                            bSB ? boxs : box, cr,
                                            DOMAINDECOMP(cr) ? dd_pme_maxshift_x(cr->dd) : 0,
                                            DOMAINDECOMP(cr) ? dd_pme_maxshift_y(cr->dd) : 0,
                                            nrnb, wcycle,
                                            fr->vir_el_recip, fr->ewaldcoeff,
                                            &Vlr, lambda[efptCOUL], &dvdl,
                                            pme_flags);
                        *cycles_pme = wallcycle_stop(wcycle, ewcPMEMESH);

                        /* We should try to do as little computation after
                         * this as possible, because parallel PME synchronizes
                         * the nodes, so we want all load imbalance of the rest
                         * of the force calculation to be before the PME call.
                         * DD load balancing is done on the whole time of
                         * the force call (without PME).
                         */
                    }
                    if (fr->n_tpi > 0)
                    {
                        /* Determine the PME grid energy of the test molecule
                         * with the PME grid potential of the other charges.
                         */
                        gmx_pme_calc_energy(fr->pmedata, fr->n_tpi,
                                            x + md->homenr - fr->n_tpi,
                                            md->chargeA + md->homenr - fr->n_tpi,
                                            &Vlr);
                    }
                    PRINT_SEPDVDL("PME mesh", Vlr, dvdl);
                }
                break;
            case eelEWALD:
                Vlr = do_ewald(fplog, FALSE, ir, x, fr->f_novirsum,
                               md->chargeA, md->chargeB,
                               box_size, cr, md->homenr,
                               fr->vir_el_recip, fr->ewaldcoeff,
                               lambda[efptCOUL], &dvdl, fr->ewald_table);
                PRINT_SEPDVDL("Ewald long-range", Vlr, dvdl);
                break;
            default:
                gmx_fatal(FARGS, "No such electrostatics method implemented %s",
                          eel_names[fr->eeltype]);
        }
        if (status != 0)
        {
            gmx_fatal(FARGS, "Error %d in long range electrostatics routine %s",
                      status, EELTYPE(fr->eeltype));
        }
        /* Note that with separate PME nodes we get the real energies later */
        enerd->dvdl_lin[efptCOUL] += dvdl;
        enerd->term[F_COUL_RECIP]  = Vlr + Vcorr;
        if (debug)
        {
            fprintf(debug, "Vlr = %g, Vcorr = %g, Vlr_corr = %g\n",
                    Vlr, Vcorr, enerd->term[F_COUL_RECIP]);
            pr_rvecs(debug, 0, "vir_el_recip after corr", fr->vir_el_recip, DIM);
            pr_rvecs(debug, 0, "fshift after LR Corrections", fr->fshift, SHIFTS);
        }
    }
    else
    {
        if (EEL_RF(fr->eeltype))
        {
            /* With the Verlet scheme exclusion forces are calculated
             * in the non-bonded kernel.
             */
            if (ir->cutoff_scheme != ecutsVERLET && fr->eeltype != eelRF_NEC)
            {
                dvdl                   = 0;
                enerd->term[F_RF_EXCL] =
                    RF_excl_correction(fplog, fr, graph, md, excl, x, f,
                                       fr->fshift, &pbc, lambda[efptCOUL], &dvdl);
            }

            enerd->dvdl_lin[efptCOUL] += dvdl;
            PRINT_SEPDVDL("RF exclusion correction",
                          enerd->term[F_RF_EXCL], dvdl);
        }
    }
    where();
    debug_gmx();

    if (debug)
    {
        print_nrnb(debug, nrnb);
    }
    debug_gmx();

#ifdef GMX_MPI
    if (TAKETIME)
    {
        t2 = MPI_Wtime();
        MPI_Barrier(cr->mpi_comm_mygroup);
        t3          = MPI_Wtime();
        fr->t_wait += t3-t2;
        if (fr->timesteps == 11)
        {
            fprintf(stderr, "* PP load balancing info: node %d, step %s, rel wait time=%3.0f%% , load string value: %7.2f\n",
                    cr->nodeid, gmx_step_str(fr->timesteps, buf),
                    100*fr->t_wait/(fr->t_wait+fr->t_fnbf),
                    (fr->t_fnbf+fr->t_wait)/fr->t_fnbf);
        }
        fr->timesteps++;
    }
#endif

    if (debug)
    {
        pr_rvecs(debug, 0, "fshift after bondeds", fr->fshift, SHIFTS);
    }

    GMX_MPE_LOG(ev_force_finish);

}
예제 #13
0
int gmx_pmeonly(struct gmx_pme_t *pme,
                t_commrec *cr,    t_nrnb *mynrnb,
                gmx_wallcycle_t wcycle,
                gmx_walltime_accounting_t walltime_accounting,
                real ewaldcoeff_q, real ewaldcoeff_lj,
                t_inputrec *ir)
{
    int                npmedata;
    struct gmx_pme_t **pmedata;
    gmx_pme_pp_t       pme_pp;
    int                ret;
    int                natoms;
    matrix             box;
    rvec              *x_pp       = NULL, *f_pp = NULL;
    real              *chargeA    = NULL, *chargeB = NULL;
    real              *c6A        = NULL, *c6B = NULL;
    real              *sigmaA     = NULL, *sigmaB = NULL;
    real               lambda_q   = 0;
    real               lambda_lj  = 0;
    int                maxshift_x = 0, maxshift_y = 0;
    real               energy_q, energy_lj, dvdlambda_q, dvdlambda_lj;
    matrix             vir_q, vir_lj;
    float              cycles;
    int                count;
    gmx_bool           bEnerVir;
    int                pme_flags;
    gmx_int64_t        step, step_rel;
    ivec               grid_switch;

    /* This data will only use with PME tuning, i.e. switching PME grids */
    npmedata = 1;
    snew(pmedata, npmedata);
    pmedata[0] = pme;

    pme_pp = gmx_pme_pp_init(cr);

    init_nrnb(mynrnb);

    count = 0;
    do /****** this is a quasi-loop over time steps! */
    {
        /* The reason for having a loop here is PME grid tuning/switching */
        do
        {
            /* Domain decomposition */
            ret = gmx_pme_recv_coeffs_coords(pme_pp,
                                             &natoms,
                                             &chargeA, &chargeB,
                                             &c6A, &c6B,
                                             &sigmaA, &sigmaB,
                                             box, &x_pp, &f_pp,
                                             &maxshift_x, &maxshift_y,
                                             &pme->bFEP_q, &pme->bFEP_lj,
                                             &lambda_q, &lambda_lj,
                                             &bEnerVir,
                                             &pme_flags,
                                             &step,
                                             grid_switch, &ewaldcoeff_q, &ewaldcoeff_lj);

            if (ret == pmerecvqxSWITCHGRID)
            {
                /* Switch the PME grid to grid_switch */
                gmx_pmeonly_switch(&npmedata, &pmedata, grid_switch, cr, ir, &pme);
            }

            if (ret == pmerecvqxRESETCOUNTERS)
            {
                /* Reset the cycle and flop counters */
                reset_pmeonly_counters(wcycle, walltime_accounting, mynrnb, ir, step);
            }
        }
        while (ret == pmerecvqxSWITCHGRID || ret == pmerecvqxRESETCOUNTERS);

        if (ret == pmerecvqxFINISH)
        {
            /* We should stop: break out of the loop */
            break;
        }

        step_rel = step - ir->init_step;

        if (count == 0)
        {
            wallcycle_start(wcycle, ewcRUN);
            walltime_accounting_start(walltime_accounting);
        }

        wallcycle_start(wcycle, ewcPMEMESH);

        dvdlambda_q  = 0;
        dvdlambda_lj = 0;
        clear_mat(vir_q);
        clear_mat(vir_lj);

        gmx_pme_do(pme, 0, natoms, x_pp, f_pp,
                   chargeA, chargeB, c6A, c6B, sigmaA, sigmaB, box,
                   cr, maxshift_x, maxshift_y, mynrnb, wcycle,
                   vir_q, ewaldcoeff_q, vir_lj, ewaldcoeff_lj,
                   &energy_q, &energy_lj, lambda_q, lambda_lj, &dvdlambda_q, &dvdlambda_lj,
                   pme_flags | GMX_PME_DO_ALL_F | (bEnerVir ? GMX_PME_CALC_ENER_VIR : 0));

        cycles = wallcycle_stop(wcycle, ewcPMEMESH);

        gmx_pme_send_force_vir_ener(pme_pp,
                                    f_pp, vir_q, energy_q, vir_lj, energy_lj,
                                    dvdlambda_q, dvdlambda_lj, cycles);

        count++;
    } /***** end of quasi-loop, we stop with the break above */
    while (TRUE);

    walltime_accounting_end(walltime_accounting);

    return 0;
}
예제 #14
0
int gmx_pmeonly(struct gmx_pme_t *pme,
                const t_commrec *cr, t_nrnb *mynrnb,
                gmx_wallcycle  *wcycle,
                gmx_walltime_accounting_t walltime_accounting,
                t_inputrec *ir, PmeRunMode runMode)
{
    int                ret;
    int                natoms = 0;
    matrix             box;
    real               lambda_q   = 0;
    real               lambda_lj  = 0;
    int                maxshift_x = 0, maxshift_y = 0;
    real               energy_q, energy_lj, dvdlambda_q, dvdlambda_lj;
    matrix             vir_q, vir_lj;
    float              cycles;
    int                count;
    gmx_bool           bEnerVir = FALSE;
    int64_t            step;

    /* This data will only use with PME tuning, i.e. switching PME grids */
    std::vector<gmx_pme_t *> pmedata;
    pmedata.push_back(pme);

    auto       pme_pp       = gmx_pme_pp_init(cr);
    //TODO the variable below should be queried from the task assignment info
    const bool useGpuForPme = (runMode == PmeRunMode::GPU) || (runMode == PmeRunMode::Mixed);
    if (useGpuForPme)
    {
        changePinningPolicy(&pme_pp->chargeA, pme_get_pinning_policy());
        changePinningPolicy(&pme_pp->x, pme_get_pinning_policy());
    }

    init_nrnb(mynrnb);

    count = 0;
    do /****** this is a quasi-loop over time steps! */
    {
        /* The reason for having a loop here is PME grid tuning/switching */
        do
        {
            /* Domain decomposition */
            ivec newGridSize;
            bool atomSetChanged = false;
            real ewaldcoeff_q   = 0, ewaldcoeff_lj = 0;
            ret = gmx_pme_recv_coeffs_coords(pme_pp.get(),
                                             &natoms,
                                             box,
                                             &maxshift_x, &maxshift_y,
                                             &lambda_q, &lambda_lj,
                                             &bEnerVir,
                                             &step,
                                             &newGridSize,
                                             &ewaldcoeff_q,
                                             &ewaldcoeff_lj,
                                             &atomSetChanged);

            if (ret == pmerecvqxSWITCHGRID)
            {
                /* Switch the PME grid to newGridSize */
                pme = gmx_pmeonly_switch(&pmedata, newGridSize, ewaldcoeff_q, ewaldcoeff_lj, cr, ir);
            }

            if (atomSetChanged)
            {
                gmx_pme_reinit_atoms(pme, natoms, pme_pp->chargeA.data());
            }

            if (ret == pmerecvqxRESETCOUNTERS)
            {
                /* Reset the cycle and flop counters */
                reset_pmeonly_counters(wcycle, walltime_accounting, mynrnb, step, useGpuForPme);
            }
        }
        while (ret == pmerecvqxSWITCHGRID || ret == pmerecvqxRESETCOUNTERS);

        if (ret == pmerecvqxFINISH)
        {
            /* We should stop: break out of the loop */
            break;
        }

        if (count == 0)
        {
            wallcycle_start(wcycle, ewcRUN);
            walltime_accounting_start_time(walltime_accounting);
        }

        wallcycle_start(wcycle, ewcPMEMESH);

        dvdlambda_q  = 0;
        dvdlambda_lj = 0;
        clear_mat(vir_q);
        clear_mat(vir_lj);
        energy_q  = 0;
        energy_lj = 0;

        // TODO Make a struct of array refs onto these per-atom fields
        // of pme_pp (maybe box, energy and virial, too; and likewise
        // from mdatoms for the other call to gmx_pme_do), so we have
        // fewer lines of code and less parameter passing.
        const int pmeFlags = GMX_PME_DO_ALL_F | (bEnerVir ? GMX_PME_CALC_ENER_VIR : 0);
        gmx::ArrayRef<const gmx::RVec> forces;
        if (useGpuForPme)
        {
            const bool boxChanged = false;
            //TODO this should be set properly by gmx_pme_recv_coeffs_coords,
            // or maybe use inputrecDynamicBox(ir), at the very least - change this when this codepath is tested!
            pme_gpu_prepare_computation(pme, boxChanged, box, wcycle, pmeFlags);
            pme_gpu_launch_spread(pme, pme_pp->x.rvec_array(), wcycle);
            pme_gpu_launch_complex_transforms(pme, wcycle);
            pme_gpu_launch_gather(pme, wcycle, PmeForceOutputHandling::Set);
            pme_gpu_wait_finish_task(pme, wcycle, &forces, vir_q, &energy_q);
            pme_gpu_reinit_computation(pme, wcycle);
        }
        else
        {
            gmx_pme_do(pme, 0, natoms, pme_pp->x.rvec_array(), as_rvec_array(pme_pp->f.data()),
                       pme_pp->chargeA.data(), pme_pp->chargeB.data(),
                       pme_pp->sqrt_c6A.data(), pme_pp->sqrt_c6B.data(),
                       pme_pp->sigmaA.data(), pme_pp->sigmaB.data(), box,
                       cr, maxshift_x, maxshift_y, mynrnb, wcycle,
                       vir_q, vir_lj,
                       &energy_q, &energy_lj, lambda_q, lambda_lj, &dvdlambda_q, &dvdlambda_lj,
                       pmeFlags);
            forces = pme_pp->f;
        }

        cycles = wallcycle_stop(wcycle, ewcPMEMESH);

        gmx_pme_send_force_vir_ener(pme_pp.get(), as_rvec_array(forces.data()),
                                    vir_q, energy_q, vir_lj, energy_lj,
                                    dvdlambda_q, dvdlambda_lj, cycles);

        count++;
    } /***** end of quasi-loop, we stop with the break above */
    while (TRUE);

    walltime_accounting_end_time(walltime_accounting);

    return 0;
}
예제 #15
0
파일: tpi.c 프로젝트: daniellandau/gromacs
double do_tpi(FILE *fplog, t_commrec *cr,
              int nfile, const t_filenm fnm[],
              const output_env_t oenv, gmx_bool bVerbose, gmx_bool gmx_unused bCompact,
              int gmx_unused nstglobalcomm,
              gmx_vsite_t gmx_unused *vsite, gmx_constr_t gmx_unused constr,
              int gmx_unused stepout,
              t_inputrec *inputrec,
              gmx_mtop_t *top_global, t_fcdata *fcd,
              t_state *state,
              t_mdatoms *mdatoms,
              t_nrnb *nrnb, gmx_wallcycle_t wcycle,
              gmx_edsam_t gmx_unused ed,
              t_forcerec *fr,
              int gmx_unused repl_ex_nst, int gmx_unused repl_ex_nex, int gmx_unused repl_ex_seed,
              gmx_membed_t gmx_unused membed,
              real gmx_unused cpt_period, real gmx_unused max_hours,
              const char gmx_unused *deviceOptions,
              int gmx_unused imdport,
              unsigned long gmx_unused Flags,
              gmx_walltime_accounting_t walltime_accounting)
{
    const char     *TPI = "Test Particle Insertion";
    gmx_localtop_t *top;
    gmx_groups_t   *groups;
    gmx_enerdata_t *enerd;
    rvec           *f;
    real            lambda, t, temp, beta, drmax, epot;
    double          embU, sum_embU, *sum_UgembU, V, V_all, VembU_all;
    t_trxstatus    *status;
    t_trxframe      rerun_fr;
    gmx_bool        bDispCorr, bCharge, bRFExcl, bNotLastFrame, bStateChanged, bNS;
    tensor          force_vir, shake_vir, vir, pres;
    int             cg_tp, a_tp0, a_tp1, ngid, gid_tp, nener, e;
    rvec           *x_mol;
    rvec            mu_tot, x_init, dx, x_tp;
    int             nnodes, frame;
    gmx_int64_t     frame_step_prev, frame_step;
    gmx_int64_t     nsteps, stepblocksize = 0, step;
    gmx_int64_t     rnd_count_stride, rnd_count;
    gmx_int64_t     seed;
    double          rnd[4];
    int             i, start, end;
    FILE           *fp_tpi = NULL;
    char           *ptr, *dump_pdb, **leg, str[STRLEN], str2[STRLEN];
    double          dbl, dump_ener;
    gmx_bool        bCavity;
    int             nat_cavity  = 0, d;
    real           *mass_cavity = NULL, mass_tot;
    int             nbin;
    double          invbinw, *bin, refvolshift, logV, bUlogV;
    real            dvdl, prescorr, enercorr, dvdlcorr;
    gmx_bool        bEnergyOutOfBounds;
    const char     *tpid_leg[2] = {"direct", "reweighted"};

    /* Since there is no upper limit to the insertion energies,
     * we need to set an upper limit for the distribution output.
     */
    real bU_bin_limit      = 50;
    real bU_logV_bin_limit = bU_bin_limit + 10;

    nnodes = cr->nnodes;

    top = gmx_mtop_generate_local_top(top_global, inputrec);

    groups = &top_global->groups;

    bCavity = (inputrec->eI == eiTPIC);
    if (bCavity)
    {
        ptr = getenv("GMX_TPIC_MASSES");
        if (ptr == NULL)
        {
            nat_cavity = 1;
        }
        else
        {
            /* Read (multiple) masses from env var GMX_TPIC_MASSES,
             * The center of mass of the last atoms is then used for TPIC.
             */
            nat_cavity = 0;
            while (sscanf(ptr, "%lf%n", &dbl, &i) > 0)
            {
                srenew(mass_cavity, nat_cavity+1);
                mass_cavity[nat_cavity] = dbl;
                fprintf(fplog, "mass[%d] = %f\n",
                        nat_cavity+1, mass_cavity[nat_cavity]);
                nat_cavity++;
                ptr += i;
            }
            if (nat_cavity == 0)
            {
                gmx_fatal(FARGS, "Found %d masses in GMX_TPIC_MASSES", nat_cavity);
            }
        }
    }

    /*
       init_em(fplog,TPI,inputrec,&lambda,nrnb,mu_tot,
       state->box,fr,mdatoms,top,cr,nfile,fnm,NULL,NULL);*/
    /* We never need full pbc for TPI */
    fr->ePBC = epbcXYZ;
    /* Determine the temperature for the Boltzmann weighting */
    temp = inputrec->opts.ref_t[0];
    if (fplog)
    {
        for (i = 1; (i < inputrec->opts.ngtc); i++)
        {
            if (inputrec->opts.ref_t[i] != temp)
            {
                fprintf(fplog, "\nWARNING: The temperatures of the different temperature coupling groups are not identical\n\n");
                fprintf(stderr, "\nWARNING: The temperatures of the different temperature coupling groups are not identical\n\n");
            }
        }
        fprintf(fplog,
                "\n  The temperature for test particle insertion is %.3f K\n\n",
                temp);
    }
    beta = 1.0/(BOLTZ*temp);

    /* Number of insertions per frame */
    nsteps = inputrec->nsteps;

    /* Use the same neighborlist with more insertions points
     * in a sphere of radius drmax around the initial point
     */
    /* This should be a proper mdp parameter */
    drmax = inputrec->rtpi;

    /* An environment variable can be set to dump all configurations
     * to pdb with an insertion energy <= this value.
     */
    dump_pdb  = getenv("GMX_TPI_DUMP");
    dump_ener = 0;
    if (dump_pdb)
    {
        sscanf(dump_pdb, "%lf", &dump_ener);
    }

    atoms2md(top_global, inputrec, 0, NULL, top_global->natoms, mdatoms);
    update_mdatoms(mdatoms, inputrec->fepvals->init_lambda);

    snew(enerd, 1);
    init_enerdata(groups->grps[egcENER].nr, inputrec->fepvals->n_lambda, enerd);
    snew(f, top_global->natoms);

    /* Print to log file  */
    walltime_accounting_start(walltime_accounting);
    wallcycle_start(wcycle, ewcRUN);
    print_start(fplog, cr, walltime_accounting, "Test Particle Insertion");

    /* The last charge group is the group to be inserted */
    cg_tp = top->cgs.nr - 1;
    a_tp0 = top->cgs.index[cg_tp];
    a_tp1 = top->cgs.index[cg_tp+1];
    if (debug)
    {
        fprintf(debug, "TPI cg %d, atoms %d-%d\n", cg_tp, a_tp0, a_tp1);
    }
    if (a_tp1 - a_tp0 > 1 &&
        (inputrec->rlist < inputrec->rcoulomb ||
         inputrec->rlist < inputrec->rvdw))
    {
        gmx_fatal(FARGS, "Can not do TPI for multi-atom molecule with a twin-range cut-off");
    }
    snew(x_mol, a_tp1-a_tp0);

    bDispCorr = (inputrec->eDispCorr != edispcNO);
    bCharge   = FALSE;
    for (i = a_tp0; i < a_tp1; i++)
    {
        /* Copy the coordinates of the molecule to be insterted */
        copy_rvec(state->x[i], x_mol[i-a_tp0]);
        /* Check if we need to print electrostatic energies */
        bCharge |= (mdatoms->chargeA[i] != 0 ||
                    (mdatoms->chargeB && mdatoms->chargeB[i] != 0));
    }
    bRFExcl = (bCharge && EEL_RF(fr->eeltype) && fr->eeltype != eelRF_NEC);

    calc_cgcm(fplog, cg_tp, cg_tp+1, &(top->cgs), state->x, fr->cg_cm);
    if (bCavity)
    {
        if (norm(fr->cg_cm[cg_tp]) > 0.5*inputrec->rlist && fplog)
        {
            fprintf(fplog, "WARNING: Your TPI molecule is not centered at 0,0,0\n");
            fprintf(stderr, "WARNING: Your TPI molecule is not centered at 0,0,0\n");
        }
    }
    else
    {
        /* Center the molecule to be inserted at zero */
        for (i = 0; i < a_tp1-a_tp0; i++)
        {
            rvec_dec(x_mol[i], fr->cg_cm[cg_tp]);
        }
    }

    if (fplog)
    {
        fprintf(fplog, "\nWill insert %d atoms %s partial charges\n",
                a_tp1-a_tp0, bCharge ? "with" : "without");

        fprintf(fplog, "\nWill insert %d times in each frame of %s\n",
                (int)nsteps, opt2fn("-rerun", nfile, fnm));
    }

    if (!bCavity)
    {
        if (inputrec->nstlist > 1)
        {
            if (drmax == 0 && a_tp1-a_tp0 == 1)
            {
                gmx_fatal(FARGS, "Re-using the neighborlist %d times for insertions of a single atom in a sphere of radius %f does not make sense", inputrec->nstlist, drmax);
            }
            if (fplog)
            {
                fprintf(fplog, "Will use the same neighborlist for %d insertions in a sphere of radius %f\n", inputrec->nstlist, drmax);
            }
        }
    }
    else
    {
        if (fplog)
        {
            fprintf(fplog, "Will insert randomly in a sphere of radius %f around the center of the cavity\n", drmax);
        }
    }

    ngid   = groups->grps[egcENER].nr;
    gid_tp = GET_CGINFO_GID(fr->cginfo[cg_tp]);
    nener  = 1 + ngid;
    if (bDispCorr)
    {
        nener += 1;
    }
    if (bCharge)
    {
        nener += ngid;
        if (bRFExcl)
        {
            nener += 1;
        }
        if (EEL_FULL(fr->eeltype))
        {
            nener += 1;
        }
    }
    snew(sum_UgembU, nener);

    /* Copy the random seed set by the user */
    seed = inputrec->ld_seed;
    /* We use the frame step number as one random counter.
     * The second counter use the insertion (step) count. But we
     * need multiple random numbers per insertion. This number is
     * not fixed, since we generate random locations in a sphere
     * by putting locations in a cube and some of these fail.
     * A count of 20 is already extremely unlikely, so 10000 is
     * a safe margin for random numbers per insertion.
     */
    rnd_count_stride = 10000;

    if (MASTER(cr))
    {
        fp_tpi = xvgropen(opt2fn("-tpi", nfile, fnm),
                          "TPI energies", "Time (ps)",
                          "(kJ mol\\S-1\\N) / (nm\\S3\\N)", oenv);
        xvgr_subtitle(fp_tpi, "f. are averages over one frame", oenv);
        snew(leg, 4+nener);
        e = 0;
        sprintf(str, "-kT log(<Ve\\S-\\betaU\\N>/<V>)");
        leg[e++] = strdup(str);
        sprintf(str, "f. -kT log<e\\S-\\betaU\\N>");
        leg[e++] = strdup(str);
        sprintf(str, "f. <e\\S-\\betaU\\N>");
        leg[e++] = strdup(str);
        sprintf(str, "f. V");
        leg[e++] = strdup(str);
        sprintf(str, "f. <Ue\\S-\\betaU\\N>");
        leg[e++] = strdup(str);
        for (i = 0; i < ngid; i++)
        {
            sprintf(str, "f. <U\\sVdW %s\\Ne\\S-\\betaU\\N>",
                    *(groups->grpname[groups->grps[egcENER].nm_ind[i]]));
            leg[e++] = strdup(str);
        }
        if (bDispCorr)
        {
            sprintf(str, "f. <U\\sdisp c\\Ne\\S-\\betaU\\N>");
            leg[e++] = strdup(str);
        }
        if (bCharge)
        {
            for (i = 0; i < ngid; i++)
            {
                sprintf(str, "f. <U\\sCoul %s\\Ne\\S-\\betaU\\N>",
                        *(groups->grpname[groups->grps[egcENER].nm_ind[i]]));
                leg[e++] = strdup(str);
            }
            if (bRFExcl)
            {
                sprintf(str, "f. <U\\sRF excl\\Ne\\S-\\betaU\\N>");
                leg[e++] = strdup(str);
            }
            if (EEL_FULL(fr->eeltype))
            {
                sprintf(str, "f. <U\\sCoul recip\\Ne\\S-\\betaU\\N>");
                leg[e++] = strdup(str);
            }
        }
        xvgr_legend(fp_tpi, 4+nener, (const char**)leg, oenv);
        for (i = 0; i < 4+nener; i++)
        {
            sfree(leg[i]);
        }
        sfree(leg);
    }
    clear_rvec(x_init);
    V_all     = 0;
    VembU_all = 0;

    invbinw = 10;
    nbin    = 10;
    snew(bin, nbin);

    /* Avoid frame step numbers <= -1 */
    frame_step_prev = -1;

    bNotLastFrame = read_first_frame(oenv, &status, opt2fn("-rerun", nfile, fnm),
                                     &rerun_fr, TRX_NEED_X);
    frame = 0;

    if (rerun_fr.natoms - (bCavity ? nat_cavity : 0) !=
        mdatoms->nr - (a_tp1 - a_tp0))
    {
        gmx_fatal(FARGS, "Number of atoms in trajectory (%d)%s "
                  "is not equal the number in the run input file (%d) "
                  "minus the number of atoms to insert (%d)\n",
                  rerun_fr.natoms, bCavity ? " minus one" : "",
                  mdatoms->nr, a_tp1-a_tp0);
    }

    refvolshift = log(det(rerun_fr.box));

    switch (inputrec->eI)
    {
        case eiTPI:
            stepblocksize = inputrec->nstlist;
            break;
        case eiTPIC:
            stepblocksize = 1;
            break;
        default:
            gmx_fatal(FARGS, "Unknown integrator %s", ei_names[inputrec->eI]);
    }

#ifdef GMX_SIMD
    /* Make sure we don't detect SIMD overflow generated before this point */
    gmx_simd_check_and_reset_overflow();
#endif

    while (bNotLastFrame)
    {
        frame_step      = rerun_fr.step;
        if (frame_step <= frame_step_prev)
        {
            /* We don't have step number in the trajectory file,
             * or we have constant or decreasing step numbers.
             * Ensure we have increasing step numbers, since we use
             * the step numbers as a counter for random numbers.
             */
            frame_step  = frame_step_prev + 1;
        }
        frame_step_prev = frame_step;

        lambda = rerun_fr.lambda;
        t      = rerun_fr.time;

        sum_embU = 0;
        for (e = 0; e < nener; e++)
        {
            sum_UgembU[e] = 0;
        }

        /* Copy the coordinates from the input trajectory */
        for (i = 0; i < rerun_fr.natoms; i++)
        {
            copy_rvec(rerun_fr.x[i], state->x[i]);
        }
        copy_mat(rerun_fr.box, state->box);

        V    = det(state->box);
        logV = log(V);

        bStateChanged = TRUE;
        bNS           = TRUE;

        step = cr->nodeid*stepblocksize;
        while (step < nsteps)
        {
            /* Initialize the second counter for random numbers using
             * the insertion step index. This ensures that we get
             * the same random numbers independently of how many
             * MPI ranks we use. Also for the same seed, we get
             * the same initial random sequence for different nsteps.
             */
            rnd_count = step*rnd_count_stride;

            if (!bCavity)
            {
                /* Random insertion in the whole volume */
                bNS = (step % inputrec->nstlist == 0);
                if (bNS)
                {
                    /* Generate a random position in the box */
                    gmx_rng_cycle_2uniform(frame_step, rnd_count++, seed, RND_SEED_TPI, rnd);
                    gmx_rng_cycle_2uniform(frame_step, rnd_count++, seed, RND_SEED_TPI, rnd+2);
                    for (d = 0; d < DIM; d++)
                    {
                        x_init[d] = rnd[d]*state->box[d][d];
                    }
                }
                if (inputrec->nstlist == 1)
                {
                    copy_rvec(x_init, x_tp);
                }
                else
                {
                    /* Generate coordinates within |dx|=drmax of x_init */
                    do
                    {
                        gmx_rng_cycle_2uniform(frame_step, rnd_count++, seed, RND_SEED_TPI, rnd);
                        gmx_rng_cycle_2uniform(frame_step, rnd_count++, seed, RND_SEED_TPI, rnd+2);
                        for (d = 0; d < DIM; d++)
                        {
                            dx[d] = (2*rnd[d] - 1)*drmax;
                        }
                    }
                    while (norm2(dx) > drmax*drmax);
                    rvec_add(x_init, dx, x_tp);
                }
            }
            else
            {
                /* Random insertion around a cavity location
                 * given by the last coordinate of the trajectory.
                 */
                if (step == 0)
                {
                    if (nat_cavity == 1)
                    {
                        /* Copy the location of the cavity */
                        copy_rvec(rerun_fr.x[rerun_fr.natoms-1], x_init);
                    }
                    else
                    {
                        /* Determine the center of mass of the last molecule */
                        clear_rvec(x_init);
                        mass_tot = 0;
                        for (i = 0; i < nat_cavity; i++)
                        {
                            for (d = 0; d < DIM; d++)
                            {
                                x_init[d] +=
                                    mass_cavity[i]*rerun_fr.x[rerun_fr.natoms-nat_cavity+i][d];
                            }
                            mass_tot += mass_cavity[i];
                        }
                        for (d = 0; d < DIM; d++)
                        {
                            x_init[d] /= mass_tot;
                        }
                    }
                }
                /* Generate coordinates within |dx|=drmax of x_init */
                do
                {
                    gmx_rng_cycle_2uniform(frame_step, rnd_count++, seed, RND_SEED_TPI, rnd);
                    gmx_rng_cycle_2uniform(frame_step, rnd_count++, seed, RND_SEED_TPI, rnd+2);
                    for (d = 0; d < DIM; d++)
                    {
                        dx[d] = (2*rnd[d] - 1)*drmax;
                    }
                }
                while (norm2(dx) > drmax*drmax);
                rvec_add(x_init, dx, x_tp);
            }

            if (a_tp1 - a_tp0 == 1)
            {
                /* Insert a single atom, just copy the insertion location */
                copy_rvec(x_tp, state->x[a_tp0]);
            }
            else
            {
                /* Copy the coordinates from the top file */
                for (i = a_tp0; i < a_tp1; i++)
                {
                    copy_rvec(x_mol[i-a_tp0], state->x[i]);
                }
                /* Rotate the molecule randomly */
                gmx_rng_cycle_2uniform(frame_step, rnd_count++, seed, RND_SEED_TPI, rnd);
                gmx_rng_cycle_2uniform(frame_step, rnd_count++, seed, RND_SEED_TPI, rnd+2);
                rotate_conf(a_tp1-a_tp0, state->x+a_tp0, NULL,
                            2*M_PI*rnd[0],
                            2*M_PI*rnd[1],
                            2*M_PI*rnd[2]);
                /* Shift to the insertion location */
                for (i = a_tp0; i < a_tp1; i++)
                {
                    rvec_inc(state->x[i], x_tp);
                }
            }

            /* Clear some matrix variables  */
            clear_mat(force_vir);
            clear_mat(shake_vir);
            clear_mat(vir);
            clear_mat(pres);

            /* Set the charge group center of mass of the test particle */
            copy_rvec(x_init, fr->cg_cm[top->cgs.nr-1]);

            /* Calc energy (no forces) on new positions.
             * Since we only need the intermolecular energy
             * and the RF exclusion terms of the inserted molecule occur
             * within a single charge group we can pass NULL for the graph.
             * This also avoids shifts that would move charge groups
             * out of the box.
             *
             * Some checks above ensure than we can not have
             * twin-range interactions together with nstlist > 1,
             * therefore we do not need to remember the LR energies.
             */
            /* Make do_force do a single node force calculation */
            cr->nnodes = 1;
            do_force(fplog, cr, inputrec,
                     step, nrnb, wcycle, top, &top_global->groups,
                     state->box, state->x, &state->hist,
                     f, force_vir, mdatoms, enerd, fcd,
                     state->lambda,
                     NULL, fr, NULL, mu_tot, t, NULL, NULL, FALSE,
                     GMX_FORCE_NONBONDED | GMX_FORCE_ENERGY |
                     (bNS ? GMX_FORCE_DYNAMICBOX | GMX_FORCE_NS | GMX_FORCE_DO_LR : 0) |
                     (bStateChanged ? GMX_FORCE_STATECHANGED : 0));
            cr->nnodes    = nnodes;
            bStateChanged = FALSE;
            bNS           = FALSE;

            /* Calculate long range corrections to pressure and energy */
            calc_dispcorr(fplog, inputrec, fr, step, top_global->natoms, state->box,
                          lambda, pres, vir, &prescorr, &enercorr, &dvdlcorr);
            /* figure out how to rearrange the next 4 lines MRS 8/4/2009 */
            enerd->term[F_DISPCORR]  = enercorr;
            enerd->term[F_EPOT]     += enercorr;
            enerd->term[F_PRES]     += prescorr;
            enerd->term[F_DVDL_VDW] += dvdlcorr;

            epot               = enerd->term[F_EPOT];
            bEnergyOutOfBounds = FALSE;
#ifdef GMX_SIMD_X86_SSE2_OR_HIGHER
            /* With SSE the energy can overflow, check for this */
            if (gmx_mm_check_and_reset_overflow())
            {
                if (debug)
                {
                    fprintf(debug, "Found an SSE overflow, assuming the energy is out of bounds\n");
                }
                bEnergyOutOfBounds = TRUE;
            }
#endif
            /* If the compiler doesn't optimize this check away
             * we catch the NAN energies.
             * The epot>GMX_REAL_MAX check catches inf values,
             * which should nicely result in embU=0 through the exp below,
             * but it does not hurt to check anyhow.
             */
            /* Non-bonded Interaction usually diverge at r=0.
             * With tabulated interaction functions the first few entries
             * should be capped in a consistent fashion between
             * repulsion, dispersion and Coulomb to avoid accidental
             * negative values in the total energy.
             * The table generation code in tables.c does this.
             * With user tbales the user should take care of this.
             */
            if (epot != epot || epot > GMX_REAL_MAX)
            {
                bEnergyOutOfBounds = TRUE;
            }
            if (bEnergyOutOfBounds)
            {
                if (debug)
                {
                    fprintf(debug, "\n  time %.3f, step %d: non-finite energy %f, using exp(-bU)=0\n", t, (int)step, epot);
                }
                embU = 0;
            }
            else
            {
                embU      = exp(-beta*epot);
                sum_embU += embU;
                /* Determine the weighted energy contributions of each energy group */
                e                = 0;
                sum_UgembU[e++] += epot*embU;
                if (fr->bBHAM)
                {
                    for (i = 0; i < ngid; i++)
                    {
                        sum_UgembU[e++] +=
                            (enerd->grpp.ener[egBHAMSR][GID(i, gid_tp, ngid)] +
                             enerd->grpp.ener[egBHAMLR][GID(i, gid_tp, ngid)])*embU;
                    }
                }
                else
                {
                    for (i = 0; i < ngid; i++)
                    {
                        sum_UgembU[e++] +=
                            (enerd->grpp.ener[egLJSR][GID(i, gid_tp, ngid)] +
                             enerd->grpp.ener[egLJLR][GID(i, gid_tp, ngid)])*embU;
                    }
                }
                if (bDispCorr)
                {
                    sum_UgembU[e++] += enerd->term[F_DISPCORR]*embU;
                }
                if (bCharge)
                {
                    for (i = 0; i < ngid; i++)
                    {
                        sum_UgembU[e++] +=
                            (enerd->grpp.ener[egCOULSR][GID(i, gid_tp, ngid)] +
                             enerd->grpp.ener[egCOULLR][GID(i, gid_tp, ngid)])*embU;
                    }
                    if (bRFExcl)
                    {
                        sum_UgembU[e++] += enerd->term[F_RF_EXCL]*embU;
                    }
                    if (EEL_FULL(fr->eeltype))
                    {
                        sum_UgembU[e++] += enerd->term[F_COUL_RECIP]*embU;
                    }
                }
            }

            if (embU == 0 || beta*epot > bU_bin_limit)
            {
                bin[0]++;
            }
            else
            {
                i = (int)((bU_logV_bin_limit
                           - (beta*epot - logV + refvolshift))*invbinw
                          + 0.5);
                if (i < 0)
                {
                    i = 0;
                }
                if (i >= nbin)
                {
                    realloc_bins(&bin, &nbin, i+10);
                }
                bin[i]++;
            }

            if (debug)
            {
                fprintf(debug, "TPI %7d %12.5e %12.5f %12.5f %12.5f\n",
                        (int)step, epot, x_tp[XX], x_tp[YY], x_tp[ZZ]);
            }

            if (dump_pdb && epot <= dump_ener)
            {
                sprintf(str, "t%g_step%d.pdb", t, (int)step);
                sprintf(str2, "t: %f step %d ener: %f", t, (int)step, epot);
                write_sto_conf_mtop(str, str2, top_global, state->x, state->v,
                                    inputrec->ePBC, state->box);
            }

            step++;
            if ((step/stepblocksize) % cr->nnodes != cr->nodeid)
            {
                /* Skip all steps assigned to the other MPI ranks */
                step += (cr->nnodes - 1)*stepblocksize;
            }
        }

        if (PAR(cr))
        {
            /* When running in parallel sum the energies over the processes */
            gmx_sumd(1,    &sum_embU, cr);
            gmx_sumd(nener, sum_UgembU, cr);
        }

        frame++;
        V_all     += V;
        VembU_all += V*sum_embU/nsteps;

        if (fp_tpi)
        {
            if (bVerbose || frame%10 == 0 || frame < 10)
            {
                fprintf(stderr, "mu %10.3e <mu> %10.3e\n",
                        -log(sum_embU/nsteps)/beta, -log(VembU_all/V_all)/beta);
            }

            fprintf(fp_tpi, "%10.3f %12.5e %12.5e %12.5e %12.5e",
                    t,
                    VembU_all == 0 ? 20/beta : -log(VembU_all/V_all)/beta,
                    sum_embU == 0  ? 20/beta : -log(sum_embU/nsteps)/beta,
                    sum_embU/nsteps, V);
            for (e = 0; e < nener; e++)
            {
                fprintf(fp_tpi, " %12.5e", sum_UgembU[e]/nsteps);
            }
            fprintf(fp_tpi, "\n");
            fflush(fp_tpi);
        }

        bNotLastFrame = read_next_frame(oenv, status, &rerun_fr);
    } /* End of the loop  */
    walltime_accounting_end(walltime_accounting);

    close_trj(status);

    if (fp_tpi != NULL)
    {
        gmx_fio_fclose(fp_tpi);
    }

    if (fplog != NULL)
    {
        fprintf(fplog, "\n");
        fprintf(fplog, "  <V>  = %12.5e nm^3\n", V_all/frame);
        fprintf(fplog, "  <mu> = %12.5e kJ/mol\n", -log(VembU_all/V_all)/beta);
    }

    /* Write the Boltzmann factor histogram */
    if (PAR(cr))
    {
        /* When running in parallel sum the bins over the processes */
        i = nbin;
        global_max(cr, &i);
        realloc_bins(&bin, &nbin, i);
        gmx_sumd(nbin, bin, cr);
    }
    if (MASTER(cr))
    {
        fp_tpi = xvgropen(opt2fn("-tpid", nfile, fnm),
                          "TPI energy distribution",
                          "\\betaU - log(V/<V>)", "count", oenv);
        sprintf(str, "number \\betaU > %g: %9.3e", bU_bin_limit, bin[0]);
        xvgr_subtitle(fp_tpi, str, oenv);
        xvgr_legend(fp_tpi, 2, (const char **)tpid_leg, oenv);
        for (i = nbin-1; i > 0; i--)
        {
            bUlogV = -i/invbinw + bU_logV_bin_limit - refvolshift + log(V_all/frame);
            fprintf(fp_tpi, "%6.2f %10d %12.5e\n",
                    bUlogV,
                    (int)(bin[i]+0.5),
                    bin[i]*exp(-bUlogV)*V_all/VembU_all);
        }
        gmx_fio_fclose(fp_tpi);
    }
    sfree(bin);

    sfree(sum_UgembU);

    walltime_accounting_set_nsteps_done(walltime_accounting, frame*inputrec->nsteps);

    return 0;
}
예제 #16
0
double do_md_openmm(FILE *fplog,t_commrec *cr,int nfile,const t_filenm fnm[],
                    const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
                    int nstglobalcomm,
                    gmx_vsite_t *vsite,gmx_constr_t constr,
                    int stepout,t_inputrec *ir,
                    gmx_mtop_t *top_global,
                    t_fcdata *fcd,
                    t_state *state_global,
                    t_mdatoms *mdatoms,
                    t_nrnb *nrnb,gmx_wallcycle_t wcycle,
                    gmx_edsam_t ed,t_forcerec *fr,
                    int repl_ex_nst,int repl_ex_seed,
                    real cpt_period,real max_hours,
                    const char *deviceOptions,
                    unsigned long Flags,
                    gmx_runtime_t *runtime)
{
    gmx_mdoutf_t *outf;
    gmx_large_int_t step,step_rel;
    double     run_time;
    double     t,t0,lam0;
    gmx_bool       bSimAnn,
    bFirstStep,bStateFromTPX,bLastStep,bStartingFromCpt;
    gmx_bool       bInitStep=TRUE;
    gmx_bool       do_ene,do_log, do_verbose,
    bX,bV,bF,bCPT;
    tensor     force_vir,shake_vir,total_vir,pres;
    int        i,m;
    int        mdof_flags;
    rvec       mu_tot;
    t_vcm      *vcm;
    int        nchkpt=1;
    gmx_localtop_t *top;
    t_mdebin *mdebin=NULL;
    t_state    *state=NULL;
    rvec       *f_global=NULL;
    int        n_xtc=-1;
    rvec       *x_xtc=NULL;
    gmx_enerdata_t *enerd;
    rvec       *f=NULL;
    gmx_global_stat_t gstat;
    gmx_update_t upd=NULL;
    t_graph    *graph=NULL;
    globsig_t   gs;

    gmx_groups_t *groups;
    gmx_ekindata_t *ekind, *ekind_save;
    gmx_bool        bAppend;
    int         a0,a1;
    matrix      lastbox;
    real        reset_counters=0,reset_counters_now=0;
    char        sbuf[STEPSTRSIZE],sbuf2[STEPSTRSIZE];
    int         handled_stop_condition=gmx_stop_cond_none; 

    const char *ommOptions = NULL;
    void   *openmmData;

    bAppend  = (Flags & MD_APPENDFILES);
    check_ir_old_tpx_versions(cr,fplog,ir,top_global);

    groups = &top_global->groups;

    /* Initial values */
    init_md(fplog,cr,ir,oenv,&t,&t0,&state_global->lambda,&lam0,
            nrnb,top_global,&upd,
            nfile,fnm,&outf,&mdebin,
            force_vir,shake_vir,mu_tot,&bSimAnn,&vcm,state_global,Flags);

    clear_mat(total_vir);
    clear_mat(pres);
    /* Energy terms and groups */
    snew(enerd,1);
    init_enerdata(top_global->groups.grps[egcENER].nr,ir->n_flambda,enerd);
    snew(f,top_global->natoms);

    /* Kinetic energy data */
    snew(ekind,1);
    init_ekindata(fplog,top_global,&(ir->opts),ekind);
    /* needed for iteration of constraints */
    snew(ekind_save,1);
    init_ekindata(fplog,top_global,&(ir->opts),ekind_save);
    /* Copy the cos acceleration to the groups struct */
    ekind->cosacc.cos_accel = ir->cos_accel;

    gstat = global_stat_init(ir);
    debug_gmx();

    {
        double io = compute_io(ir,top_global->natoms,groups,mdebin->ebin->nener,1);
        if ((io > 2000) && MASTER(cr))
            fprintf(stderr,
                    "\nWARNING: This run will generate roughly %.0f Mb of data\n\n",
                    io);
    }

    top = gmx_mtop_generate_local_top(top_global,ir);

    a0 = 0;
    a1 = top_global->natoms;

    state = partdec_init_local_state(cr,state_global);
    f_global = f;

    atoms2md(top_global,ir,0,NULL,a0,a1-a0,mdatoms);

    if (vsite)
    {
        set_vsite_top(vsite,top,mdatoms,cr);
    }

    if (ir->ePBC != epbcNONE && !ir->bPeriodicMols)
    {
        graph = mk_graph(fplog,&(top->idef),0,top_global->natoms,FALSE,FALSE);
    }

    update_mdatoms(mdatoms,state->lambda);

    if (deviceOptions[0]=='\0')
    {
        /* empty options, which should default to OpenMM in this build */
        ommOptions=deviceOptions;
    }
    else
    {
        if (gmx_strncasecmp(deviceOptions,"OpenMM",6)!=0)
        {
            gmx_fatal(FARGS, "This Gromacs version currently only works with OpenMM. Use -device \"OpenMM:<options>\"");
        }
        else
        {
            ommOptions=strchr(deviceOptions,':');
            if (NULL!=ommOptions)
            {
                /* Increase the pointer to skip the colon */
                ommOptions++;
            }
        }
    }

    openmmData = openmm_init(fplog, ommOptions, ir, top_global, top, mdatoms, fr, state);
    please_cite(fplog,"Friedrichs2009");

    if (MASTER(cr))
    {
        /* Update mdebin with energy history if appending to output files */
        if ( Flags & MD_APPENDFILES )
        {
            restore_energyhistory_from_state(mdebin,&state_global->enerhist);
        }
        /* Set the initial energy history in state to zero by updating once */
        update_energyhistory(&state_global->enerhist,mdebin);
    }

    if (constr)
    {
        set_constraints(constr,top,ir,mdatoms,cr);
    }

    if (!ir->bContinuation)
    {
        if (mdatoms->cFREEZE && (state->flags & (1<<estV)))
        {
            /* Set the velocities of frozen particles to zero */
            for (i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++)
            {
                for (m=0; m<DIM; m++)
                {
                    if (ir->opts.nFreeze[mdatoms->cFREEZE[i]][m])
                    {
                        state->v[i][m] = 0;
                    }
                }
            }
        }

        if (constr)
        {
            /* Constrain the initial coordinates and velocities */
            do_constrain_first(fplog,constr,ir,mdatoms,state,f,
                               graph,cr,nrnb,fr,top,shake_vir);
        }
        if (vsite)
        {
            /* Construct the virtual sites for the initial configuration */
            construct_vsites(fplog,vsite,state->x,nrnb,ir->delta_t,NULL,
                             top->idef.iparams,top->idef.il,
                             fr->ePBC,fr->bMolPBC,graph,cr,state->box);
        }
    }

    debug_gmx();

    if (MASTER(cr))
    {
        char tbuf[20];
        fprintf(fplog,"Initial temperature: %g K\n",enerd->term[F_TEMP]);
        fprintf(stderr,"starting mdrun '%s'\n",
                *(top_global->name));
        if (ir->nsteps >= 0)
        {
            sprintf(tbuf,"%8.1f",(ir->init_step+ir->nsteps)*ir->delta_t);
        }
        else
        {
            sprintf(tbuf,"%s","infinite");
        }
        if (ir->init_step > 0)
        {
            fprintf(stderr,"%s steps, %s ps (continuing from step %s, %8.1f ps).\n",
                    gmx_step_str(ir->init_step+ir->nsteps,sbuf),tbuf,
                    gmx_step_str(ir->init_step,sbuf2),
                    ir->init_step*ir->delta_t);
        }
        else
        {
            fprintf(stderr,"%s steps, %s ps.\n",
                    gmx_step_str(ir->nsteps,sbuf),tbuf);
        }
    }

    fprintf(fplog,"\n");

    /* Set and write start time */
    runtime_start(runtime);
    print_date_and_time(fplog,cr->nodeid,"Started mdrun",runtime);
    wallcycle_start(wcycle,ewcRUN);
    if (fplog)
        fprintf(fplog,"\n");

    /* safest point to do file checkpointing is here.  More general point would be immediately before integrator call */

    debug_gmx();
    /***********************************************************
     *
     *             Loop over MD steps
     *
     ************************************************************/

    /* loop over MD steps or if rerunMD to end of input trajectory */
    bFirstStep = TRUE;
    /* Skip the first Nose-Hoover integration when we get the state from tpx */
    bStateFromTPX = !opt2bSet("-cpi",nfile,fnm);
    bInitStep = bFirstStep && bStateFromTPX;
    bStartingFromCpt = (Flags & MD_STARTFROMCPT) && bInitStep;
    bLastStep = FALSE;

    init_global_signals(&gs,cr,ir,repl_ex_nst);

    step = ir->init_step;
    step_rel = 0;

    while (!bLastStep)
    {
        wallcycle_start(wcycle,ewcSTEP);

        GMX_MPE_LOG(ev_timestep1);

        bLastStep = (step_rel == ir->nsteps);
        t = t0 + step*ir->delta_t;

        if (gs.set[eglsSTOPCOND] != 0)
        {
            bLastStep = TRUE;
        }

        do_log = do_per_step(step,ir->nstlog) || bFirstStep || bLastStep;
        do_verbose = bVerbose &&
                     (step % stepout == 0 || bFirstStep || bLastStep);

        if (MASTER(cr) && do_log)
        {
            print_ebin_header(fplog,step,t,state->lambda);
        }

        clear_mat(force_vir);
        GMX_MPE_LOG(ev_timestep2);

        /* We write a checkpoint at this MD step when:
         * either when we signalled through gs (in OpenMM NS works different),
         * or at the last step (but not when we do not want confout),
         * but never at the first step.
         */
        bCPT = ((gs.set[eglsCHKPT] ||
                 (bLastStep && (Flags & MD_CONFOUT))) &&
                step > ir->init_step );
        if (bCPT)
        {
            gs.set[eglsCHKPT] = 0;
        }

        /* Now we have the energies and forces corresponding to the
         * coordinates at time t. We must output all of this before
         * the update.
         * for RerunMD t is read from input trajectory
         */
        GMX_MPE_LOG(ev_output_start);

        mdof_flags = 0;
        if (do_per_step(step,ir->nstxout))
        {
            mdof_flags |= MDOF_X;
        }
        if (do_per_step(step,ir->nstvout))
        {
            mdof_flags |= MDOF_V;
        }
        if (do_per_step(step,ir->nstfout))
        {
            mdof_flags |= MDOF_F;
        }
        if (do_per_step(step,ir->nstxtcout))
        {
            mdof_flags |= MDOF_XTC;
        }
        if (bCPT)
        {
            mdof_flags |= MDOF_CPT;
        };
        do_ene = (do_per_step(step,ir->nstenergy) || bLastStep);

        if (mdof_flags != 0 || do_ene || do_log)
        {
            wallcycle_start(wcycle,ewcTRAJ);
            bF = (mdof_flags & MDOF_F);
            bX = (mdof_flags & (MDOF_X | MDOF_XTC | MDOF_CPT));
            bV = (mdof_flags & (MDOF_V | MDOF_CPT));

            openmm_copy_state(openmmData, state, &t, f, enerd, bX, bV, bF, do_ene);

            upd_mdebin(mdebin, FALSE,TRUE,
                       t,mdatoms->tmass,enerd,state,lastbox,
                       shake_vir,force_vir,total_vir,pres,
                       ekind,mu_tot,constr);
            print_ebin(outf->fp_ene,do_ene,FALSE,FALSE,do_log?fplog:NULL,
                       step,t,
                       eprNORMAL,bCompact,mdebin,fcd,groups,&(ir->opts));
            write_traj(fplog,cr,outf,mdof_flags,top_global,
                       step,t,state,state_global,f,f_global,&n_xtc,&x_xtc);
            if (bCPT)
            {
                nchkpt++;
                bCPT = FALSE;
            }
            debug_gmx();
            if (bLastStep && step_rel == ir->nsteps &&
                    (Flags & MD_CONFOUT) && MASTER(cr))
            {
                /* x and v have been collected in write_traj,
                 * because a checkpoint file will always be written
                 * at the last step.
                 */
                fprintf(stderr,"\nWriting final coordinates.\n");
                if (ir->ePBC != epbcNONE && !ir->bPeriodicMols)
                {
                    /* Make molecules whole only for confout writing */
                    do_pbc_mtop(fplog,ir->ePBC,state->box,top_global,state_global->x);
                }
                write_sto_conf_mtop(ftp2fn(efSTO,nfile,fnm),
                                    *top_global->name,top_global,
                                    state_global->x,state_global->v,
                                    ir->ePBC,state->box);
                debug_gmx();
            }
            wallcycle_stop(wcycle,ewcTRAJ);
        }
        GMX_MPE_LOG(ev_output_finish);


        /* Determine the wallclock run time up till now */
        run_time = gmx_gettime() - (double)runtime->real;

        /* Check whether everything is still allright */
        if (((int)gmx_get_stop_condition() > handled_stop_condition)
#ifdef GMX_THREADS
            && MASTER(cr)
#endif
            )
        {
           /* this is just make gs.sig compatible with the hack 
               of sending signals around by MPI_Reduce with together with
               other floats */
            /* NOTE: this only works for serial code. For code that allows
               MPI nodes to propagate their condition, see kernel/md.c*/
            if ( gmx_get_stop_condition() == gmx_stop_cond_next_ns )
                gs.set[eglsSTOPCOND]=1;
            if ( gmx_get_stop_condition() == gmx_stop_cond_next )
                gs.set[eglsSTOPCOND]=1;
            /* < 0 means stop at next step, > 0 means stop at next NS step */
            if (fplog)
            {
                fprintf(fplog,
                        "\n\nReceived the %s signal, stopping at the next %sstep\n\n",
                        gmx_get_signal_name(),
                        gs.sig[eglsSTOPCOND]==1 ? "NS " : "");
                fflush(fplog);
            }
            fprintf(stderr,
                    "\n\nReceived the %s signal, stopping at the next %sstep\n\n",
                    gmx_get_signal_name(),
                    gs.sig[eglsSTOPCOND]==1 ? "NS " : "");
            fflush(stderr);
            handled_stop_condition=(int)gmx_get_stop_condition();
        }
        else if (MASTER(cr) &&
                 (max_hours > 0 && run_time > max_hours*60.0*60.0*0.99) &&
                 gs.set[eglsSTOPCOND] == 0)
        {
            /* Signal to terminate the run */
            gs.set[eglsSTOPCOND] = 1;
            if (fplog)
            {
                fprintf(fplog,"\nStep %s: Run time exceeded %.3f hours, will terminate the run\n",gmx_step_str(step,sbuf),max_hours*0.99);
            }
            fprintf(stderr, "\nStep %s: Run time exceeded %.3f hours, will terminate the run\n",gmx_step_str(step,sbuf),max_hours*0.99);
        }

        /* checkpoints */
        if (MASTER(cr) && (cpt_period >= 0 &&
                           (cpt_period == 0 ||
                            run_time >= nchkpt*cpt_period*60.0)) &&
                gs.set[eglsCHKPT] == 0)
        {
            gs.set[eglsCHKPT] = 1;
        }

        /* Time for performance */
        if (((step % stepout) == 0) || bLastStep)
        {
            runtime_upd_proc(runtime);
        }

        if (do_per_step(step,ir->nstlog))
        {
            if (fflush(fplog) != 0)
            {
                gmx_fatal(FARGS,"Cannot flush logfile - maybe you are out of quota?");
            }
        }

        /* Remaining runtime */
        if (MULTIMASTER(cr) && (do_verbose || gmx_got_usr_signal() ))
        {
            print_time(stderr,runtime,step,ir,cr);
        }

        bFirstStep = FALSE;
        bInitStep = FALSE;
        bStartingFromCpt = FALSE;
        step++;
        step_rel++;

        openmm_take_one_step(openmmData);
    }
    /* End of main MD loop */
    debug_gmx();

    /* Stop the time */
    runtime_end(runtime);

    if (MASTER(cr))
    {
        if (ir->nstcalcenergy > 0) 
        {
            print_ebin(outf->fp_ene,FALSE,FALSE,FALSE,fplog,step,t,
                       eprAVER,FALSE,mdebin,fcd,groups,&(ir->opts));
        }
    }

    openmm_cleanup(fplog, openmmData);

    done_mdoutf(outf);

    debug_gmx();

    runtime->nsteps_done = step_rel;

    return 0;
}
예제 #17
0
void do_force(FILE *fplog,t_commrec *cr,
	      t_inputrec *inputrec,
	      int step,t_nrnb *nrnb,gmx_wallcycle_t wcycle,
	      gmx_localtop_t *top,
	      gmx_groups_t *groups,
	      matrix box,rvec x[],history_t *hist,
	      rvec f[],rvec buf[],
	      tensor vir_force,
	      t_mdatoms *mdatoms,
	      gmx_enerdata_t *enerd,t_fcdata *fcd,
	      real lambda,t_graph *graph,
	      t_forcerec *fr,gmx_vsite_t *vsite,rvec mu_tot,
	      real t,FILE *field,gmx_edsam_t ed,
	      int flags)
{
  static rvec box_size;
  int    cg0,cg1,i,j;
  int    start,homenr;
  static double mu[2*DIM]; 
  rvec   mu_tot_AB[2];
  bool   bSepDVDL,bStateChanged,bNS,bFillGrid,bCalcCGCM,bBS,bDoForces;
  matrix boxs;
  real   e,v,dvdl;
  t_pbc  pbc;
  float  cycles_ppdpme,cycles_pme,cycles_force;
  
  start  = mdatoms->start;
  homenr = mdatoms->homenr;

  bSepDVDL = (fr->bSepDVDL && do_per_step(step,inputrec->nstlog));

  clear_mat(vir_force);
  
  if (PARTDECOMP(cr)) {
    pd_cg_range(cr,&cg0,&cg1);
  } else {
    cg0 = 0;
    if (DOMAINDECOMP(cr))
      cg1 = cr->dd->ncg_tot;
    else
      cg1 = top->cgs.nr;
    if (fr->n_tpi > 0)
      cg1--;
  }

  bStateChanged = (flags & GMX_FORCE_STATECHANGED);
  bNS           = (flags & GMX_FORCE_NS);
  bFillGrid     = (bNS && bStateChanged);
  bCalcCGCM     = (bFillGrid && !DOMAINDECOMP(cr));
  bDoForces     = (flags & GMX_FORCE_FORCES);

  if (bStateChanged) {
    update_forcerec(fplog,fr,box);
    
    /* Calculate total (local) dipole moment in a temporary common array. 
     * This makes it possible to sum them over nodes faster.
     */
    calc_mu(start,homenr,
	    x,mdatoms->chargeA,mdatoms->chargeB,mdatoms->nChargePerturbed,
	    mu,mu+DIM);
  }
  
  if (fr->ePBC != epbcNONE) { 
    /* Compute shift vectors every step,
     * because of pressure coupling or box deformation!
     */
    if (DYNAMIC_BOX(*inputrec) && bStateChanged)
      calc_shifts(box,fr->shift_vec);
    
    if (bCalcCGCM) { 
      put_charge_groups_in_box(fplog,cg0,cg1,fr->ePBC,box,
			       &(top->cgs),x,fr->cg_cm);
      inc_nrnb(nrnb,eNR_CGCM,homenr);
      inc_nrnb(nrnb,eNR_RESETX,cg1-cg0);
    } 
    else if (EI_ENERGY_MINIMIZATION(inputrec->eI) && graph) {
      unshift_self(graph,box,x);
    }
  } 
  else if (bCalcCGCM) {
    calc_cgcm(fplog,cg0,cg1,&(top->cgs),x,fr->cg_cm);
    inc_nrnb(nrnb,eNR_CGCM,homenr);
  }
  
  if (bCalcCGCM) {
    if (PAR(cr)) {
      move_cgcm(fplog,cr,fr->cg_cm);
    }
    if (gmx_debug_at)
      pr_rvecs(debug,0,"cgcm",fr->cg_cm,top->cgs.nr);
  }

#ifdef GMX_MPI
  if (!(cr->duty & DUTY_PME)) {
    /* Send particle coordinates to the pme nodes.
     * Since this is only implemented for domain decomposition
     * and domain decomposition does not use the graph,
     * we do not need to worry about shifting.
     */    

    wallcycle_start(wcycle,ewcPP_PMESENDX);
    GMX_MPE_LOG(ev_send_coordinates_start);

    bBS = (inputrec->nwall == 2);
    if (bBS) {
      copy_mat(box,boxs);
      svmul(inputrec->wall_ewald_zfac,boxs[ZZ],boxs[ZZ]);
    }

    gmx_pme_send_x(cr,bBS ? boxs : box,x,mdatoms->nChargePerturbed,lambda);

    GMX_MPE_LOG(ev_send_coordinates_finish);
    wallcycle_stop(wcycle,ewcPP_PMESENDX);
  }
#endif /* GMX_MPI */

  /* Communicate coordinates and sum dipole if necessary */
  if (PAR(cr)) {
    wallcycle_start(wcycle,ewcMOVEX);
    if (DOMAINDECOMP(cr)) {
      dd_move_x(cr->dd,box,x,buf);
    } else {
      move_x(fplog,cr,GMX_LEFT,GMX_RIGHT,x,nrnb);
    }
    /* When we don't need the total dipole we sum it in global_stat */
    if (NEED_MUTOT(*inputrec))
      gmx_sumd(2*DIM,mu,cr);
    wallcycle_stop(wcycle,ewcMOVEX);
  }
  for(i=0; i<2; i++)
    for(j=0;j<DIM;j++)
      mu_tot_AB[i][j] = mu[i*DIM + j];
  if (fr->efep == efepNO)
    copy_rvec(mu_tot_AB[0],mu_tot);
  else
    for(j=0; j<DIM; j++)
      mu_tot[j] = (1.0 - lambda)*mu_tot_AB[0][j] + lambda*mu_tot_AB[1][j];

  /* Reset energies */
  reset_energies(&(inputrec->opts),fr,bNS,enerd,MASTER(cr));    
  if (bNS) {
    wallcycle_start(wcycle,ewcNS);
    
    if (graph && bStateChanged)
      /* Calculate intramolecular shift vectors to make molecules whole */
      mk_mshift(fplog,graph,fr->ePBC,box,x);

    /* Reset long range forces if necessary */
    if (fr->bTwinRange) {
      clear_rvecs(fr->f_twin_n,fr->f_twin);
      clear_rvecs(SHIFTS,fr->fshift_twin);
    }
    /* Do the actual neighbour searching and if twin range electrostatics
     * also do the calculation of long range forces and energies.
     */
    dvdl = 0; 
    ns(fplog,fr,x,f,box,groups,&(inputrec->opts),top,mdatoms,
       cr,nrnb,step,lambda,&dvdl,&enerd->grpp,bFillGrid,bDoForces);
    if (bSepDVDL)
      fprintf(fplog,sepdvdlformat,"LR non-bonded",0,dvdl);
    enerd->dvdl_lr       = dvdl;
    enerd->term[F_DVDL] += dvdl;

    wallcycle_stop(wcycle,ewcNS);
  }
  
  if (DOMAINDECOMP(cr)) {
    if (!(cr->duty & DUTY_PME)) {
      wallcycle_start(wcycle,ewcPPDURINGPME);
      dd_force_flop_start(cr->dd,nrnb);
    }
  }
  /* Start the force cycle counter.
   * This counter is stopped in do_forcelow_level.
   * No parallel communication should occur while this counter is running,
   * since that will interfere with the dynamic load balancing.
   */
  wallcycle_start(wcycle,ewcFORCE);

  if (bDoForces) {
      /* Reset PME/Ewald forces if necessary */
    if (fr->bF_NoVirSum) 
    {
      GMX_BARRIER(cr->mpi_comm_mygroup);
      if (fr->bDomDec)
	clear_rvecs(fr->f_novirsum_n,fr->f_novirsum);
      else
	clear_rvecs(homenr,fr->f_novirsum+start);
      GMX_BARRIER(cr->mpi_comm_mygroup);
    }
    /* Copy long range forces into normal buffers */
    if (fr->bTwinRange) {
      for(i=0; i<fr->f_twin_n; i++)
	copy_rvec(fr->f_twin[i],f[i]);
      for(i=0; i<SHIFTS; i++)
	copy_rvec(fr->fshift_twin[i],fr->fshift[i]);
    } 
    else {
      if (DOMAINDECOMP(cr))
	clear_rvecs(cr->dd->nat_tot,f);
      else
	clear_rvecs(mdatoms->nr,f);
      clear_rvecs(SHIFTS,fr->fshift);
    }
    clear_rvec(fr->vir_diag_posres);
    GMX_BARRIER(cr->mpi_comm_mygroup);
  }
  if (inputrec->ePull == epullCONSTRAINT)
    clear_pull_forces(inputrec->pull);

  /* update QMMMrec, if necessary */
  if(fr->bQMMM)
    update_QMMMrec(cr,fr,x,mdatoms,box,top);

  if ((flags & GMX_FORCE_BONDED) && top->idef.il[F_POSRES].nr > 0) {
    /* Position restraints always require full pbc */
    set_pbc(&pbc,inputrec->ePBC,box);
    v = posres(top->idef.il[F_POSRES].nr,top->idef.il[F_POSRES].iatoms,
	       top->idef.iparams_posres,
	       (const rvec*)x,fr->f_novirsum,fr->vir_diag_posres,
	       inputrec->ePBC==epbcNONE ? NULL : &pbc,lambda,&dvdl,
	       fr->rc_scaling,fr->ePBC,fr->posres_com,fr->posres_comB);
    if (bSepDVDL) {
      fprintf(fplog,sepdvdlformat,
	      interaction_function[F_POSRES].longname,v,dvdl);
    }
    enerd->term[F_POSRES] += v;
    enerd->term[F_DVDL]   += dvdl;
    inc_nrnb(nrnb,eNR_POSRES,top->idef.il[F_POSRES].nr/2);
  }
  /* Compute the bonded and non-bonded forces */    
  do_force_lowlevel(fplog,step,fr,inputrec,&(top->idef),
		    cr,nrnb,wcycle,mdatoms,&(inputrec->opts),
		    x,hist,f,enerd,fcd,box,lambda,graph,&(top->excls),mu_tot_AB,
		    flags,&cycles_force);
  GMX_BARRIER(cr->mpi_comm_mygroup);

  if (ed) {
    do_flood(fplog,cr,x,f,ed,box,step);
  }
	
  if (DOMAINDECOMP(cr)) {
    dd_force_flop_stop(cr->dd,nrnb);
    if (wcycle)
      dd_cycles_add(cr->dd,cycles_force,ddCyclF);
  }
  
  if (bDoForces) {
    /* Compute forces due to electric field */
    calc_f_el(MASTER(cr) ? field : NULL,
	      start,homenr,mdatoms->chargeA,x,f,inputrec->ex,inputrec->et,t);
    
    /* When using PME/Ewald we compute the long range virial there.
     * otherwise we do it based on long range forces from twin range
     * cut-off based calculation (or not at all).
     */
    
    /* Communicate the forces */
    if (PAR(cr)) {
      wallcycle_start(wcycle,ewcMOVEF);
      if (DOMAINDECOMP(cr)) {
	dd_move_f(cr->dd,f,buf,fr->fshift);
	/* Position restraint do not introduce inter-cg forces */
	if (EEL_FULL(fr->eeltype) && cr->dd->n_intercg_excl)
	  dd_move_f(cr->dd,fr->f_novirsum,buf,NULL);
      } else {
	move_f(fplog,cr,GMX_LEFT,GMX_RIGHT,f,buf,nrnb);
      }
      wallcycle_stop(wcycle,ewcMOVEF);
    }
  }

  if (bDoForces) {
    if (vsite) {
      wallcycle_start(wcycle,ewcVSITESPREAD);
      spread_vsite_f(fplog,vsite,x,f,fr->fshift,nrnb,
		     &top->idef,fr->ePBC,fr->bMolPBC,graph,box,cr);
      wallcycle_stop(wcycle,ewcVSITESPREAD);
    }
    
    /* Calculation of the virial must be done after vsites! */
    calc_virial(fplog,mdatoms->start,mdatoms->homenr,x,f,
		vir_force,graph,box,nrnb,fr,inputrec->ePBC);
  }

  if (inputrec->ePull == epullUMBRELLA || inputrec->ePull == epullCONST_F) {
    /* Calculate the center of mass forces, this requires communication,
     * which is why pull_potential is called close to other communication.
     * The virial contribution is calculated directly,
     * which is why we call pull_potential after calc_virial.
     */
    set_pbc(&pbc,inputrec->ePBC,box);
    dvdl = 0; 
    enerd->term[F_COM_PULL] =
      pull_potential(inputrec->ePull,inputrec->pull,mdatoms,&pbc,
		     cr,t,lambda,x,f,vir_force,&dvdl);
    if (bSepDVDL)
      fprintf(fplog,sepdvdlformat,"Com pull",enerd->term[F_COM_PULL],dvdl);
    enerd->term[F_DVDL] += dvdl;
  }

  if (!(cr->duty & DUTY_PME)) {
    cycles_ppdpme = wallcycle_stop(wcycle,ewcPPDURINGPME);
    dd_cycles_add(cr->dd,cycles_ppdpme,ddCyclPPduringPME);
  }

#ifdef GMX_MPI
  if (PAR(cr) && !(cr->duty & DUTY_PME)) {
    /* In case of node-splitting, the PP nodes receive the long-range 
     * forces, virial and energy from the PME nodes here.
     */    
    wallcycle_start(wcycle,ewcPP_PMEWAITRECVF);
    dvdl = 0;
    gmx_pme_receive_f(cr,fr->f_novirsum,fr->vir_el_recip,&e,&dvdl,
		      &cycles_pme);
    if (bSepDVDL)
      fprintf(fplog,sepdvdlformat,"PME mesh",e,dvdl);
    enerd->term[F_COUL_RECIP] += e;
    enerd->term[F_DVDL] += dvdl;
    if (wcycle)
      dd_cycles_add(cr->dd,cycles_pme,ddCyclPME);
    wallcycle_stop(wcycle,ewcPP_PMEWAITRECVF);
  }
#endif

  if (bDoForces && fr->bF_NoVirSum) {
    if (vsite) {
      /* Spread the mesh force on virtual sites to the other particles... 
       * This is parallellized. MPI communication is performed
       * if the constructing atoms aren't local.
       */
      wallcycle_start(wcycle,ewcVSITESPREAD);
      spread_vsite_f(fplog,vsite,x,fr->f_novirsum,NULL,nrnb,
		     &top->idef,fr->ePBC,fr->bMolPBC,graph,box,cr);
      wallcycle_stop(wcycle,ewcVSITESPREAD);
    }
    /* Now add the forces, this is local */
    if (fr->bDomDec) {
      sum_forces(0,fr->f_novirsum_n,f,fr->f_novirsum);
    } else {
      sum_forces(start,start+homenr,f,fr->f_novirsum);
    }
    if (EEL_FULL(fr->eeltype)) {
      /* Add the mesh contribution to the virial */
      m_add(vir_force,fr->vir_el_recip,vir_force);
    }
    if (debug)
      pr_rvecs(debug,0,"vir_force",vir_force,DIM);
  }

  /* Sum the potential energy terms from group contributions */
  sum_epot(&(inputrec->opts),enerd);

  if (fr->print_force >= 0 && bDoForces)
    print_large_forces(stderr,mdatoms,cr,step,fr->print_force,x,f);
}
예제 #18
0
파일: force.c 프로젝트: nrego/indus
void do_force_lowlevel(FILE       *fplog,   gmx_large_int_t step,
                       t_forcerec *fr,      t_inputrec *ir,
                       t_idef     *idef,    t_commrec  *cr,
                       t_nrnb     *nrnb,    gmx_wallcycle_t wcycle,
                       t_mdatoms  *md,
                       t_grpopts  *opts,
                       rvec       x[],      history_t  *hist,
                       rvec       f[],
                       gmx_enerdata_t *enerd,
                       t_fcdata   *fcd,
                       gmx_mtop_t     *mtop,
                       gmx_localtop_t *top,
                       gmx_genborn_t *born,
                       t_atomtypes *atype,
                       gmx_bool       bBornRadii,
                       matrix     box,
                       real       lambda,
                       t_graph    *graph,
                       t_blocka   *excl,
                       rvec       mu_tot[],
                       int        flags,
                       float      *cycles_pme)
{
    int     i,status;
    int     donb_flags;
    gmx_bool    bDoEpot,bSepDVDL,bSB;
    int     pme_flags;
    matrix  boxs;
    rvec    box_size;
    real    dvdlambda,Vsr,Vlr,Vcorr=0,vdip,vcharge;
    t_pbc   pbc;
    real    dvdgb;
    char    buf[22];
    gmx_enerdata_t ed_lam;
    double  lam_i;
    real    dvdl_dum;

#ifdef GMX_MPI
    double  t0=0.0,t1,t2,t3; /* time measurement for coarse load balancing */
#endif

#define PRINT_SEPDVDL(s,v,dvdl) if (bSepDVDL) fprintf(fplog,sepdvdlformat,s,v,dvdl);

    GMX_MPE_LOG(ev_force_start);
    set_pbc(&pbc,fr->ePBC,box);

    /* Reset box */
    for(i=0; (i<DIM); i++)
    {
        box_size[i]=box[i][i];
    }

    bSepDVDL=(fr->bSepDVDL && do_per_step(step,ir->nstlog));
    debug_gmx();

    /* do QMMM first if requested */
    if(fr->bQMMM)
    {
        enerd->term[F_EQM] = calculate_QMMM(cr,x,f,fr,md);
    }

    if (bSepDVDL)
    {
        fprintf(fplog,"Step %s: non-bonded V and dVdl for node %d:\n",
                gmx_step_str(step,buf),cr->nodeid);
    }

    /* Call the short range functions all in one go. */
    GMX_MPE_LOG(ev_do_fnbf_start);

    dvdlambda = 0;

#ifdef GMX_MPI
    /*#define TAKETIME ((cr->npmenodes) && (fr->timesteps < 12))*/
#define TAKETIME FALSE
    if (TAKETIME)
    {
        MPI_Barrier(cr->mpi_comm_mygroup);
        t0=MPI_Wtime();
    }
#endif

    if (ir->nwall)
    {
        dvdlambda = do_walls(ir,fr,box,md,x,f,lambda,
                             enerd->grpp.ener[egLJSR],nrnb);
        PRINT_SEPDVDL("Walls",0.0,dvdlambda);
        enerd->dvdl_lin += dvdlambda;
    }

    /* If doing GB, reset dvda and calculate the Born radii */
    if (ir->implicit_solvent)
    {
        /* wallcycle_start(wcycle,ewcGB); */

        for(i=0; i<born->nr; i++)
        {
            fr->dvda[i]=0;
        }

        if(bBornRadii)
        {
            calc_gb_rad(cr,fr,ir,top,atype,x,&(fr->gblist),born,md,nrnb);
        }

        /* wallcycle_stop(wcycle, ewcGB); */
    }

    where();
    donb_flags = 0;
    if (flags & GMX_FORCE_FORCES)
    {
        donb_flags |= GMX_DONB_FORCES;
    }
    do_nonbonded(cr,fr,x,f,md,excl,
                 fr->bBHAM ?
                 enerd->grpp.ener[egBHAMSR] :
                 enerd->grpp.ener[egLJSR],
                 enerd->grpp.ener[egCOULSR],
                 enerd->grpp.ener[egGB],box_size,nrnb,
                 lambda,&dvdlambda,-1,-1,donb_flags);
    /* If we do foreign lambda and we have soft-core interactions
     * we have to recalculate the (non-linear) energies contributions.
     */
    if (ir->n_flambda > 0 && (flags & GMX_FORCE_DHDL) && ir->sc_alpha != 0)
    {
        init_enerdata(mtop->groups.grps[egcENER].nr,ir->n_flambda,&ed_lam);

        for(i=0; i<enerd->n_lambda; i++)
        {
            lam_i = (i==0 ? lambda : ir->flambda[i-1]);
            dvdl_dum = 0;
            reset_enerdata(&ir->opts,fr,TRUE,&ed_lam,FALSE);
            do_nonbonded(cr,fr,x,f,md,excl,
                         fr->bBHAM ?
                         ed_lam.grpp.ener[egBHAMSR] :
                         ed_lam.grpp.ener[egLJSR],
                         ed_lam.grpp.ener[egCOULSR],
                         enerd->grpp.ener[egGB], box_size,nrnb,
                         lam_i,&dvdl_dum,-1,-1,
                         GMX_DONB_FOREIGNLAMBDA);
            sum_epot(&ir->opts,&ed_lam);
            enerd->enerpart_lambda[i] += ed_lam.term[F_EPOT];
        }
        destroy_enerdata(&ed_lam);
    }
    where();

    /* If we are doing GB, calculate bonded forces and apply corrections
     * to the solvation forces */
    if (ir->implicit_solvent)  {
        calc_gb_forces(cr,md,born,top,atype,x,f,fr,idef,
                       ir->gb_algorithm,ir->sa_algorithm,nrnb,bBornRadii,&pbc,graph,enerd);
    }

#ifdef GMX_MPI
    if (TAKETIME)
    {
        t1=MPI_Wtime();
        fr->t_fnbf += t1-t0;
    }
#endif

    if (ir->sc_alpha != 0)
    {
        enerd->dvdl_nonlin += dvdlambda;
    }
    else
    {
        enerd->dvdl_lin    += dvdlambda;
    }
    Vsr = 0;
    if (bSepDVDL)
    {
        for(i=0; i<enerd->grpp.nener; i++)
        {
            Vsr +=
                (fr->bBHAM ?
                 enerd->grpp.ener[egBHAMSR][i] :
                 enerd->grpp.ener[egLJSR][i])
                + enerd->grpp.ener[egCOULSR][i] + enerd->grpp.ener[egGB][i];
        }
    }
    PRINT_SEPDVDL("VdW and Coulomb SR particle-p.",Vsr,dvdlambda);
    debug_gmx();

    GMX_MPE_LOG(ev_do_fnbf_finish);

    if (debug)
    {
        pr_rvecs(debug,0,"fshift after SR",fr->fshift,SHIFTS);
    }

    /* Shift the coordinates. Must be done before bonded forces and PPPM,
     * but is also necessary for SHAKE and update, therefore it can NOT
     * go when no bonded forces have to be evaluated.
     */

    /* Here sometimes we would not need to shift with NBFonly,
     * but we do so anyhow for consistency of the returned coordinates.
     */
    if (graph)
    {
        shift_self(graph,box,x);
        if (TRICLINIC(box))
        {
            inc_nrnb(nrnb,eNR_SHIFTX,2*graph->nnodes);
        }
        else
        {
            inc_nrnb(nrnb,eNR_SHIFTX,graph->nnodes);
        }
    }
    /* Check whether we need to do bondeds or correct for exclusions */
    if (fr->bMolPBC &&
            ((flags & GMX_FORCE_BONDED)
             || EEL_RF(fr->eeltype) || EEL_FULL(fr->eeltype)))
    {
        /* Since all atoms are in the rectangular or triclinic unit-cell,
         * only single box vector shifts (2 in x) are required.
         */
        set_pbc_dd(&pbc,fr->ePBC,cr->dd,TRUE,box);
    }
    debug_gmx();

    if (flags & GMX_FORCE_BONDED)
    {
        GMX_MPE_LOG(ev_calc_bonds_start);
        calc_bonds(fplog,cr->ms,
                   idef,x,hist,f,fr,&pbc,graph,enerd,nrnb,lambda,md,fcd,
                   DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL, atype, born,
                   fr->bSepDVDL && do_per_step(step,ir->nstlog),step);

        /* Check if we have to determine energy differences
         * at foreign lambda's.
         */
        if (ir->n_flambda > 0 && (flags & GMX_FORCE_DHDL) &&
                idef->ilsort != ilsortNO_FE)
        {
            if (idef->ilsort != ilsortFE_SORTED)
            {
                gmx_incons("The bonded interactions are not sorted for free energy");
            }
            init_enerdata(mtop->groups.grps[egcENER].nr,ir->n_flambda,&ed_lam);

            for(i=0; i<enerd->n_lambda; i++)
            {
                lam_i = (i==0 ? lambda : ir->flambda[i-1]);
                dvdl_dum = 0;
                reset_enerdata(&ir->opts,fr,TRUE,&ed_lam,FALSE);
                calc_bonds_lambda(fplog,
                                  idef,x,fr,&pbc,graph,&ed_lam,nrnb,lam_i,md,
                                  fcd,
                                  DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL);
                sum_epot(&ir->opts,&ed_lam);
                enerd->enerpart_lambda[i] += ed_lam.term[F_EPOT];
            }
            destroy_enerdata(&ed_lam);
        }
        debug_gmx();
        GMX_MPE_LOG(ev_calc_bonds_finish);
    }

    where();

    *cycles_pme = 0;
    if (EEL_FULL(fr->eeltype))
    {
        bSB = (ir->nwall == 2);
        if (bSB)
        {
            copy_mat(box,boxs);
            svmul(ir->wall_ewald_zfac,boxs[ZZ],boxs[ZZ]);
            box_size[ZZ] *= ir->wall_ewald_zfac;
        }

        clear_mat(fr->vir_el_recip);

        if (fr->bEwald)
        {
            if (fr->n_tpi == 0)
            {
                dvdlambda = 0;
                Vcorr = ewald_LRcorrection(fplog,md->start,md->start+md->homenr,
                                           cr,fr,
                                           md->chargeA,
                                           md->nChargePerturbed ? md->chargeB : NULL,
                                           excl,x,bSB ? boxs : box,mu_tot,
                                           ir->ewald_geometry,
                                           ir->epsilon_surface,
                                           lambda,&dvdlambda,&vdip,&vcharge);
                PRINT_SEPDVDL("Ewald excl./charge/dip. corr.",Vcorr,dvdlambda);
                enerd->dvdl_lin += dvdlambda;
            }
            else
            {
                if (ir->ewald_geometry != eewg3D || ir->epsilon_surface != 0)
                {
                    gmx_fatal(FARGS,"TPI with PME currently only works in a 3D geometry with tin-foil boundary conditions");
                }
                /* The TPI molecule does not have exclusions with the rest
                 * of the system and no intra-molecular PME grid contributions
                 * will be calculated in gmx_pme_calc_energy.
                 */
                Vcorr = 0;
            }
        }
        else
        {
            Vcorr = shift_LRcorrection(fplog,md->start,md->homenr,cr,fr,
                                       md->chargeA,excl,x,TRUE,box,
                                       fr->vir_el_recip);
        }

        dvdlambda = 0;
        status = 0;
        switch (fr->eeltype)
        {
        case eelPPPM:
            status = gmx_pppm_do(fplog,fr->pmedata,FALSE,x,fr->f_novirsum,
                                 md->chargeA,
                                 box_size,fr->phi,cr,md->start,md->homenr,
                                 nrnb,ir->pme_order,&Vlr);
            break;
        case eelPME:
        case eelPMESWITCH:
        case eelPMEUSER:
        case eelPMEUSERSWITCH:
            if (cr->duty & DUTY_PME)
            {
                if (fr->n_tpi == 0 || (flags & GMX_FORCE_STATECHANGED))
                {
                    pme_flags = GMX_PME_SPREAD_Q | GMX_PME_SOLVE;
                    if (flags & GMX_FORCE_FORCES)
                    {
                        pme_flags |= GMX_PME_CALC_F;
                    }
                    if (flags & GMX_FORCE_VIRIAL)
                    {
                        pme_flags |= GMX_PME_CALC_ENER_VIR;
                    }
                    if (fr->n_tpi > 0)
                    {
                        /* We don't calculate f, but we do want the potential */
                        pme_flags |= GMX_PME_CALC_POT;
                    }
                    wallcycle_start(wcycle,ewcPMEMESH);
                    status = gmx_pme_do(fr->pmedata,
                                        md->start,md->homenr - fr->n_tpi,
                                        x,fr->f_novirsum,
                                        md->chargeA,md->chargeB,
                                        bSB ? boxs : box,cr,
                                        DOMAINDECOMP(cr) ? dd_pme_maxshift_x(cr->dd) : 0,
                                        DOMAINDECOMP(cr) ? dd_pme_maxshift_y(cr->dd) : 0,
                                        nrnb,wcycle,
                                        fr->vir_el_recip,fr->ewaldcoeff,
                                        &Vlr,lambda,&dvdlambda,
                                        pme_flags);
                    *cycles_pme = wallcycle_stop(wcycle,ewcPMEMESH);

                    /* We should try to do as little computation after
                     * this as possible, because parallel PME synchronizes
                     * the nodes, so we want all load imbalance of the rest
                     * of the force calculation to be before the PME call.
                     * DD load balancing is done on the whole time of
                     * the force call (without PME).
                     */
                }
                if (fr->n_tpi > 0)
                {
                    /* Determine the PME grid energy of the test molecule
                     * with the PME grid potential of the other charges.
                     */
                    gmx_pme_calc_energy(fr->pmedata,fr->n_tpi,
                                        x + md->homenr - fr->n_tpi,
                                        md->chargeA + md->homenr - fr->n_tpi,
                                        &Vlr);
                }
                PRINT_SEPDVDL("PME mesh",Vlr,dvdlambda);
            }
            else
            {
                /* Energies and virial are obtained later from the PME nodes */
                /* but values have to be zeroed out here */
                Vlr=0.0;
            }
            break;
        case eelEWALD:
            Vlr = do_ewald(fplog,FALSE,ir,x,fr->f_novirsum,
                           md->chargeA,md->chargeB,
                           box_size,cr,md->homenr,
                           fr->vir_el_recip,fr->ewaldcoeff,
                           lambda,&dvdlambda,fr->ewald_table);
            PRINT_SEPDVDL("Ewald long-range",Vlr,dvdlambda);
            break;
        default:
            Vlr = 0;
            gmx_fatal(FARGS,"No such electrostatics method implemented %s",
                      eel_names[fr->eeltype]);
        }
        if (status != 0)
        {
            gmx_fatal(FARGS,"Error %d in long range electrostatics routine %s",
                      status,EELTYPE(fr->eeltype));
        }
        enerd->dvdl_lin += dvdlambda;
        enerd->term[F_COUL_RECIP] = Vlr + Vcorr;
        if (debug)
        {
            fprintf(debug,"Vlr = %g, Vcorr = %g, Vlr_corr = %g\n",
                    Vlr,Vcorr,enerd->term[F_COUL_RECIP]);
            pr_rvecs(debug,0,"vir_el_recip after corr",fr->vir_el_recip,DIM);
            pr_rvecs(debug,0,"fshift after LR Corrections",fr->fshift,SHIFTS);
        }
    }
    else
    {
        if (EEL_RF(fr->eeltype))
        {
            dvdlambda = 0;

            if (fr->eeltype != eelRF_NEC)
            {
                enerd->term[F_RF_EXCL] =
                    RF_excl_correction(fplog,fr,graph,md,excl,x,f,
                                       fr->fshift,&pbc,lambda,&dvdlambda);
            }

            enerd->dvdl_lin += dvdlambda;
            PRINT_SEPDVDL("RF exclusion correction",
                          enerd->term[F_RF_EXCL],dvdlambda);
        }
    }
    where();
    debug_gmx();

    if (debug)
    {
        print_nrnb(debug,nrnb);
    }
    debug_gmx();

#ifdef GMX_MPI
    if (TAKETIME)
    {
        t2=MPI_Wtime();
        MPI_Barrier(cr->mpi_comm_mygroup);
        t3=MPI_Wtime();
        fr->t_wait += t3-t2;
        if (fr->timesteps == 11)
        {
            fprintf(stderr,"* PP load balancing info: node %d, step %s, rel wait time=%3.0f%% , load string value: %7.2f\n",
                    cr->nodeid, gmx_step_str(fr->timesteps,buf),
                    100*fr->t_wait/(fr->t_wait+fr->t_fnbf),
                    (fr->t_fnbf+fr->t_wait)/fr->t_fnbf);
        }
        fr->timesteps++;
    }
#endif

    if (debug)
    {
        pr_rvecs(debug,0,"fshift after bondeds",fr->fshift,SHIFTS);
    }

    GMX_MPE_LOG(ev_force_finish);

}
예제 #19
0
/* TODO Specialize this routine into init-time and loop-time versions?
   e.g. bReadEkin is only true when restoring from checkpoint */
void compute_globals(FILE *fplog, gmx_global_stat *gstat, t_commrec *cr, t_inputrec *ir,
                     t_forcerec *fr, gmx_ekindata_t *ekind,
                     t_state *state, t_mdatoms *mdatoms,
                     t_nrnb *nrnb, t_vcm *vcm, gmx_wallcycle_t wcycle,
                     gmx_enerdata_t *enerd, tensor force_vir, tensor shake_vir, tensor total_vir,
                     tensor pres, rvec mu_tot, gmx_constr_t constr,
                     gmx::SimulationSignaller *signalCoordinator,
                     matrix box, int *totalNumberOfBondedInteractions,
                     gmx_bool *bSumEkinhOld, int flags)
{
    tensor   corr_vir, corr_pres;
    gmx_bool bEner, bPres, bTemp;
    gmx_bool bStopCM, bGStat,
             bReadEkin, bEkinAveVel, bScaleEkin, bConstrain;
    real     prescorr, enercorr, dvdlcorr, dvdl_ekin;

    /* translate CGLO flags to gmx_booleans */
    bStopCM       = flags & CGLO_STOPCM;
    bGStat        = flags & CGLO_GSTAT;
    bReadEkin     = (flags & CGLO_READEKIN);
    bScaleEkin    = (flags & CGLO_SCALEEKIN);
    bEner         = flags & CGLO_ENERGY;
    bTemp         = flags & CGLO_TEMPERATURE;
    bPres         = (flags & CGLO_PRESSURE);
    bConstrain    = (flags & CGLO_CONSTRAINT);

    /* we calculate a full state kinetic energy either with full-step velocity verlet
       or half step where we need the pressure */

    bEkinAveVel = (ir->eI == eiVV || (ir->eI == eiVVAK && bPres) || bReadEkin);

    /* in initalization, it sums the shake virial in vv, and to
       sums ekinh_old in leapfrog (or if we are calculating ekinh_old) for other reasons */

    /* ########## Kinetic energy  ############## */

    if (bTemp)
    {
        /* Non-equilibrium MD: this is parallellized, but only does communication
         * when there really is NEMD.
         */

        if (PAR(cr) && (ekind->bNEMD))
        {
            accumulate_u(cr, &(ir->opts), ekind);
        }
        if (!bReadEkin)
        {
            calc_ke_part(state, &(ir->opts), mdatoms, ekind, nrnb, bEkinAveVel);
        }
    }

    /* Calculate center of mass velocity if necessary, also parallellized */
    if (bStopCM)
    {
        calc_vcm_grp(0, mdatoms->homenr, mdatoms,
                     state->x, state->v, vcm);
    }

    if (bTemp || bStopCM || bPres || bEner || bConstrain)
    {
        if (!bGStat)
        {
            /* We will not sum ekinh_old,
             * so signal that we still have to do it.
             */
            *bSumEkinhOld = TRUE;

        }
        else
        {
            gmx::ArrayRef<real> signalBuffer = signalCoordinator->getCommunicationBuffer();
            if (PAR(cr))
            {
                wallcycle_start(wcycle, ewcMoveE);
                global_stat(gstat, cr, enerd, force_vir, shake_vir, mu_tot,
                            ir, ekind, constr, bStopCM ? vcm : NULL,
                            signalBuffer.size(), signalBuffer.data(),
                            totalNumberOfBondedInteractions,
                            *bSumEkinhOld, flags);
                wallcycle_stop(wcycle, ewcMoveE);
            }
            signalCoordinator->finalizeSignals();
            *bSumEkinhOld = FALSE;
        }
    }

    if (!ekind->bNEMD && debug && bTemp && (vcm->nr > 0))
    {
        correct_ekin(debug,
                     0, mdatoms->homenr,
                     state->v, vcm->group_p[0],
                     mdatoms->massT, mdatoms->tmass, ekind->ekin);
    }

    /* Do center of mass motion removal */
    if (bStopCM)
    {
        check_cm_grp(fplog, vcm, ir, 1);
        do_stopcm_grp(0, mdatoms->homenr, mdatoms->cVCM,
                      state->x, state->v, vcm);
        inc_nrnb(nrnb, eNR_STOPCM, mdatoms->homenr);
    }

    if (bEner)
    {
        /* Calculate the amplitude of the cosine velocity profile */
        ekind->cosacc.vcos = ekind->cosacc.mvcos/mdatoms->tmass;
    }

    if (bTemp)
    {
        /* Sum the kinetic energies of the groups & calc temp */
        /* compute full step kinetic energies if vv, or if vv-avek and we are computing the pressure with inputrecNptTrotter */
        /* three maincase:  VV with AveVel (md-vv), vv with AveEkin (md-vv-avek), leap with AveEkin (md).
           Leap with AveVel is not supported; it's not clear that it will actually work.
           bEkinAveVel: If TRUE, we simply multiply ekin by ekinscale to get a full step kinetic energy.
           If FALSE, we average ekinh_old and ekinh*ekinscale_nhc to get an averaged half step kinetic energy.
         */
        enerd->term[F_TEMP] = sum_ekin(&(ir->opts), ekind, &dvdl_ekin,
                                       bEkinAveVel, bScaleEkin);
        enerd->dvdl_lin[efptMASS] = (double) dvdl_ekin;

        enerd->term[F_EKIN] = trace(ekind->ekin);
    }

    /* ##########  Long range energy information ###### */

    if (bEner || bPres || bConstrain)
    {
        calc_dispcorr(ir, fr, box, state->lambda[efptVDW],
                      corr_pres, corr_vir, &prescorr, &enercorr, &dvdlcorr);
    }

    if (bEner)
    {
        enerd->term[F_DISPCORR]  = enercorr;
        enerd->term[F_EPOT]     += enercorr;
        enerd->term[F_DVDL_VDW] += dvdlcorr;
    }

    /* ########## Now pressure ############## */
    if (bPres || bConstrain)
    {

        m_add(force_vir, shake_vir, total_vir);

        /* Calculate pressure and apply LR correction if PPPM is used.
         * Use the box from last timestep since we already called update().
         */

        enerd->term[F_PRES] = calc_pres(fr->ePBC, ir->nwall, box, ekind->ekin, total_vir, pres);

        /* Calculate long range corrections to pressure and energy */
        /* this adds to enerd->term[F_PRES] and enerd->term[F_ETOT],
           and computes enerd->term[F_DISPCORR].  Also modifies the
           total_vir and pres tesors */

        m_add(total_vir, corr_vir, total_vir);
        m_add(pres, corr_pres, pres);
        enerd->term[F_PDISPCORR] = prescorr;
        enerd->term[F_PRES]     += prescorr;
    }
}
예제 #20
0
void do_force_lowlevel(t_forcerec *fr,      t_inputrec *ir,
                       t_idef     *idef,    t_commrec  *cr,
                       t_nrnb     *nrnb,    gmx_wallcycle_t wcycle,
                       t_mdatoms  *md,
                       rvec       x[],      history_t  *hist,
                       rvec       f[],
                       rvec       f_longrange[],
                       gmx_enerdata_t *enerd,
                       t_fcdata   *fcd,
                       gmx_localtop_t *top,
                       gmx_genborn_t *born,
                       gmx_bool       bBornRadii,
                       matrix     box,
                       t_lambda   *fepvals,
                       real       *lambda,
                       t_graph    *graph,
                       t_blocka   *excl,
                       rvec       mu_tot[],
                       int        flags,
                       float      *cycles_pme)
{
    int         i, j;
    int         donb_flags;
    gmx_bool    bSB;
    int         pme_flags;
    matrix      boxs;
    rvec        box_size;
    t_pbc       pbc;
    real        dvdl_dum[efptNR], dvdl_nb[efptNR];

#ifdef GMX_MPI
    double  t0 = 0.0, t1, t2, t3; /* time measurement for coarse load balancing */
#endif

    set_pbc(&pbc, fr->ePBC, box);

    /* reset free energy components */
    for (i = 0; i < efptNR; i++)
    {
        dvdl_nb[i]  = 0;
        dvdl_dum[i] = 0;
    }

    /* Reset box */
    for (i = 0; (i < DIM); i++)
    {
        box_size[i] = box[i][i];
    }

    debug_gmx();

    /* do QMMM first if requested */
    if (fr->bQMMM)
    {
        enerd->term[F_EQM] = calculate_QMMM(cr, x, f, fr);
    }

    /* Call the short range functions all in one go. */

#ifdef GMX_MPI
    /*#define TAKETIME ((cr->npmenodes) && (fr->timesteps < 12))*/
#define TAKETIME FALSE
    if (TAKETIME)
    {
        MPI_Barrier(cr->mpi_comm_mygroup);
        t0 = MPI_Wtime();
    }
#endif

    if (ir->nwall)
    {
        /* foreign lambda component for walls */
        real dvdl_walls = do_walls(ir, fr, box, md, x, f, lambda[efptVDW],
                                   enerd->grpp.ener[egLJSR], nrnb);
        enerd->dvdl_lin[efptVDW] += dvdl_walls;
    }

    /* If doing GB, reset dvda and calculate the Born radii */
    if (ir->implicit_solvent)
    {
        wallcycle_sub_start(wcycle, ewcsNONBONDED);

        for (i = 0; i < born->nr; i++)
        {
            fr->dvda[i] = 0;
        }

        if (bBornRadii)
        {
            calc_gb_rad(cr, fr, ir, top, x, &(fr->gblist), born, md, nrnb);
        }

        wallcycle_sub_stop(wcycle, ewcsNONBONDED);
    }

    where();
    /* We only do non-bonded calculation with group scheme here, the verlet
     * calls are done from do_force_cutsVERLET(). */
    if (fr->cutoff_scheme == ecutsGROUP && (flags & GMX_FORCE_NONBONDED))
    {
        donb_flags = 0;
        /* Add short-range interactions */
        donb_flags |= GMX_NONBONDED_DO_SR;

        /* Currently all group scheme kernels always calculate (shift-)forces */
        if (flags & GMX_FORCE_FORCES)
        {
            donb_flags |= GMX_NONBONDED_DO_FORCE;
        }
        if (flags & GMX_FORCE_VIRIAL)
        {
            donb_flags |= GMX_NONBONDED_DO_SHIFTFORCE;
        }
        if (flags & GMX_FORCE_ENERGY)
        {
            donb_flags |= GMX_NONBONDED_DO_POTENTIAL;
        }
        if (flags & GMX_FORCE_DO_LR)
        {
            donb_flags |= GMX_NONBONDED_DO_LR;
        }

        wallcycle_sub_start(wcycle, ewcsNONBONDED);
        do_nonbonded(fr, x, f, f_longrange, md, excl,
                     &enerd->grpp, nrnb,
                     lambda, dvdl_nb, -1, -1, donb_flags);

        /* If we do foreign lambda and we have soft-core interactions
         * we have to recalculate the (non-linear) energies contributions.
         */
        if (fepvals->n_lambda > 0 && (flags & GMX_FORCE_DHDL) && fepvals->sc_alpha != 0)
        {
            for (i = 0; i < enerd->n_lambda; i++)
            {
                real lam_i[efptNR];

                for (j = 0; j < efptNR; j++)
                {
                    lam_i[j] = (i == 0 ? lambda[j] : fepvals->all_lambda[j][i-1]);
                }
                reset_foreign_enerdata(enerd);
                do_nonbonded(fr, x, f, f_longrange, md, excl,
                             &(enerd->foreign_grpp), nrnb,
                             lam_i, dvdl_dum, -1, -1,
                             (donb_flags & ~GMX_NONBONDED_DO_FORCE) | GMX_NONBONDED_DO_FOREIGNLAMBDA);
                sum_epot(&(enerd->foreign_grpp), enerd->foreign_term);
                enerd->enerpart_lambda[i] += enerd->foreign_term[F_EPOT];
            }
        }
        wallcycle_sub_stop(wcycle, ewcsNONBONDED);
        where();
    }

    /* If we are doing GB, calculate bonded forces and apply corrections
     * to the solvation forces */
    /* MRS: Eventually, many need to include free energy contribution here! */
    if (ir->implicit_solvent)
    {
        wallcycle_sub_start(wcycle, ewcsLISTED);
        calc_gb_forces(cr, md, born, top, x, f, fr, idef,
                       ir->gb_algorithm, ir->sa_algorithm, nrnb, &pbc, graph, enerd);
        wallcycle_sub_stop(wcycle, ewcsLISTED);
    }

#ifdef GMX_MPI
    if (TAKETIME)
    {
        t1          = MPI_Wtime();
        fr->t_fnbf += t1-t0;
    }
#endif

    if (fepvals->sc_alpha != 0)
    {
        enerd->dvdl_nonlin[efptVDW] += dvdl_nb[efptVDW];
    }
    else
    {
        enerd->dvdl_lin[efptVDW] += dvdl_nb[efptVDW];
    }

    if (fepvals->sc_alpha != 0)

    /* even though coulomb part is linear, we already added it, beacuse we
       need to go through the vdw calculation anyway */
    {
        enerd->dvdl_nonlin[efptCOUL] += dvdl_nb[efptCOUL];
    }
    else
    {
        enerd->dvdl_lin[efptCOUL] += dvdl_nb[efptCOUL];
    }

    debug_gmx();


    if (debug)
    {
        pr_rvecs(debug, 0, "fshift after SR", fr->fshift, SHIFTS);
    }

    /* Shift the coordinates. Must be done before listed forces and PPPM,
     * but is also necessary for SHAKE and update, therefore it can NOT
     * go when no listed forces have to be evaluated.
     *
     * The shifting and PBC code is deliberately not timed, since with
     * the Verlet scheme it only takes non-zero time with triclinic
     * boxes, and even then the time is around a factor of 100 less
     * than the next smallest counter.
     */


    /* Here sometimes we would not need to shift with NBFonly,
     * but we do so anyhow for consistency of the returned coordinates.
     */
    if (graph)
    {
        shift_self(graph, box, x);
        if (TRICLINIC(box))
        {
            inc_nrnb(nrnb, eNR_SHIFTX, 2*graph->nnodes);
        }
        else
        {
            inc_nrnb(nrnb, eNR_SHIFTX, graph->nnodes);
        }
    }
    /* Check whether we need to do listed interactions or correct for exclusions */
    if (fr->bMolPBC &&
        ((flags & GMX_FORCE_LISTED)
         || EEL_RF(fr->eeltype) || EEL_FULL(fr->eeltype) || EVDW_PME(fr->vdwtype)))
    {
        /* TODO There are no electrostatics methods that require this
           transformation, when using the Verlet scheme, so update the
           above conditional. */
        /* Since all atoms are in the rectangular or triclinic unit-cell,
         * only single box vector shifts (2 in x) are required.
         */
        set_pbc_dd(&pbc, fr->ePBC, cr->dd, TRUE, box);
    }
    debug_gmx();

    do_force_listed(wcycle, box, ir->fepvals, cr->ms,
                    idef, (const rvec *) x, hist, f, fr,
                    &pbc, graph, enerd, nrnb, lambda, md, fcd,
                    DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL,
                    flags);

    where();

    *cycles_pme = 0;
    clear_mat(fr->vir_el_recip);
    clear_mat(fr->vir_lj_recip);

    /* Do long-range electrostatics and/or LJ-PME, including related short-range
     * corrections.
     */
    if (EEL_FULL(fr->eeltype) || EVDW_PME(fr->vdwtype))
    {
        int  status            = 0;
        real Vlr_q             = 0, Vlr_lj = 0, Vcorr_q = 0, Vcorr_lj = 0;
        real dvdl_long_range_q = 0, dvdl_long_range_lj = 0;

        bSB = (ir->nwall == 2);
        if (bSB)
        {
            copy_mat(box, boxs);
            svmul(ir->wall_ewald_zfac, boxs[ZZ], boxs[ZZ]);
            box_size[ZZ] *= ir->wall_ewald_zfac;
        }

        if (EEL_PME_EWALD(fr->eeltype) || EVDW_PME(fr->vdwtype))
        {
            real dvdl_long_range_correction_q   = 0;
            real dvdl_long_range_correction_lj  = 0;
            /* With the Verlet scheme exclusion forces are calculated
             * in the non-bonded kernel.
             */
            /* The TPI molecule does not have exclusions with the rest
             * of the system and no intra-molecular PME grid
             * contributions will be calculated in
             * gmx_pme_calc_energy.
             */
            if ((ir->cutoff_scheme == ecutsGROUP && fr->n_tpi == 0) ||
                ir->ewald_geometry != eewg3D ||
                ir->epsilon_surface != 0)
            {
                int nthreads, t;

                wallcycle_sub_start(wcycle, ewcsEWALD_CORRECTION);

                if (fr->n_tpi > 0)
                {
                    gmx_fatal(FARGS, "TPI with PME currently only works in a 3D geometry with tin-foil boundary conditions");
                }

                nthreads = gmx_omp_nthreads_get(emntBonded);
#pragma omp parallel for num_threads(nthreads) schedule(static)
                for (t = 0; t < nthreads; t++)
                {
                    int     i;
                    rvec   *fnv;
                    tensor *vir_q, *vir_lj;
                    real   *Vcorrt_q, *Vcorrt_lj, *dvdlt_q, *dvdlt_lj;
                    if (t == 0)
                    {
                        fnv       = fr->f_novirsum;
                        vir_q     = &fr->vir_el_recip;
                        vir_lj    = &fr->vir_lj_recip;
                        Vcorrt_q  = &Vcorr_q;
                        Vcorrt_lj = &Vcorr_lj;
                        dvdlt_q   = &dvdl_long_range_correction_q;
                        dvdlt_lj  = &dvdl_long_range_correction_lj;
                    }
                    else
                    {
                        fnv       = fr->f_t[t].f;
                        vir_q     = &fr->f_t[t].vir_q;
                        vir_lj    = &fr->f_t[t].vir_lj;
                        Vcorrt_q  = &fr->f_t[t].Vcorr_q;
                        Vcorrt_lj = &fr->f_t[t].Vcorr_lj;
                        dvdlt_q   = &fr->f_t[t].dvdl[efptCOUL];
                        dvdlt_lj  = &fr->f_t[t].dvdl[efptVDW];
                        for (i = 0; i < fr->natoms_force; i++)
                        {
                            clear_rvec(fnv[i]);
                        }
                        clear_mat(*vir_q);
                        clear_mat(*vir_lj);
                    }
                    *dvdlt_q  = 0;
                    *dvdlt_lj = 0;

                    ewald_LRcorrection(fr->excl_load[t], fr->excl_load[t+1],
                                       cr, t, fr,
                                       md->chargeA, md->chargeB,
                                       md->sqrt_c6A, md->sqrt_c6B,
                                       md->sigmaA, md->sigmaB,
                                       md->sigma3A, md->sigma3B,
                                       md->nChargePerturbed || md->nTypePerturbed,
                                       ir->cutoff_scheme != ecutsVERLET,
                                       excl, x, bSB ? boxs : box, mu_tot,
                                       ir->ewald_geometry,
                                       ir->epsilon_surface,
                                       fnv, *vir_q, *vir_lj,
                                       Vcorrt_q, Vcorrt_lj,
                                       lambda[efptCOUL], lambda[efptVDW],
                                       dvdlt_q, dvdlt_lj);
                }
                if (nthreads > 1)
                {
                    reduce_thread_forces(fr->natoms_force, fr->f_novirsum,
                                         fr->vir_el_recip, fr->vir_lj_recip,
                                         &Vcorr_q, &Vcorr_lj,
                                         &dvdl_long_range_correction_q,
                                         &dvdl_long_range_correction_lj,
                                         nthreads, fr->f_t);
                }
                wallcycle_sub_stop(wcycle, ewcsEWALD_CORRECTION);
            }

            if (EEL_PME_EWALD(fr->eeltype) && fr->n_tpi == 0)
            {
                /* This is not in a subcounter because it takes a
                   negligible and constant-sized amount of time */
                Vcorr_q += ewald_charge_correction(cr, fr, lambda[efptCOUL], box,
                                                   &dvdl_long_range_correction_q,
                                                   fr->vir_el_recip);
            }

            enerd->dvdl_lin[efptCOUL] += dvdl_long_range_correction_q;
            enerd->dvdl_lin[efptVDW]  += dvdl_long_range_correction_lj;

            if ((EEL_PME(fr->eeltype) || EVDW_PME(fr->vdwtype)) && (cr->duty & DUTY_PME))
            {
                /* Do reciprocal PME for Coulomb and/or LJ. */
                assert(fr->n_tpi >= 0);
                if (fr->n_tpi == 0 || (flags & GMX_FORCE_STATECHANGED))
                {
                    pme_flags = GMX_PME_SPREAD | GMX_PME_SOLVE;
                    if (EEL_PME(fr->eeltype))
                    {
                        pme_flags     |= GMX_PME_DO_COULOMB;
                    }
                    if (EVDW_PME(fr->vdwtype))
                    {
                        pme_flags |= GMX_PME_DO_LJ;
                    }
                    if (flags & GMX_FORCE_FORCES)
                    {
                        pme_flags |= GMX_PME_CALC_F;
                    }
                    if (flags & GMX_FORCE_VIRIAL)
                    {
                        pme_flags |= GMX_PME_CALC_ENER_VIR;
                    }
                    if (fr->n_tpi > 0)
                    {
                        /* We don't calculate f, but we do want the potential */
                        pme_flags |= GMX_PME_CALC_POT;
                    }
                    wallcycle_start(wcycle, ewcPMEMESH);
                    status = gmx_pme_do(fr->pmedata,
                                        0, md->homenr - fr->n_tpi,
                                        x, fr->f_novirsum,
                                        md->chargeA, md->chargeB,
                                        md->sqrt_c6A, md->sqrt_c6B,
                                        md->sigmaA, md->sigmaB,
                                        bSB ? boxs : box, cr,
                                        DOMAINDECOMP(cr) ? dd_pme_maxshift_x(cr->dd) : 0,
                                        DOMAINDECOMP(cr) ? dd_pme_maxshift_y(cr->dd) : 0,
                                        nrnb, wcycle,
                                        fr->vir_el_recip, fr->ewaldcoeff_q,
                                        fr->vir_lj_recip, fr->ewaldcoeff_lj,
                                        &Vlr_q, &Vlr_lj,
                                        lambda[efptCOUL], lambda[efptVDW],
                                        &dvdl_long_range_q, &dvdl_long_range_lj, pme_flags);
                    *cycles_pme = wallcycle_stop(wcycle, ewcPMEMESH);
                    if (status != 0)
                    {
                        gmx_fatal(FARGS, "Error %d in reciprocal PME routine", status);
                    }
                    /* We should try to do as little computation after
                     * this as possible, because parallel PME synchronizes
                     * the nodes, so we want all load imbalance of the
                     * rest of the force calculation to be before the PME
                     * call.  DD load balancing is done on the whole time
                     * of the force call (without PME).
                     */
                }
                if (fr->n_tpi > 0)
                {
                    if (EVDW_PME(ir->vdwtype))
                    {

                        gmx_fatal(FARGS, "Test particle insertion not implemented with LJ-PME");
                    }
                    /* Determine the PME grid energy of the test molecule
                     * with the PME grid potential of the other charges.
                     */
                    gmx_pme_calc_energy(fr->pmedata, fr->n_tpi,
                                        x + md->homenr - fr->n_tpi,
                                        md->chargeA + md->homenr - fr->n_tpi,
                                        &Vlr_q);
                }
            }
        }

        if (!EEL_PME(fr->eeltype) && EEL_PME_EWALD(fr->eeltype))
        {
            Vlr_q = do_ewald(ir, x, fr->f_novirsum,
                             md->chargeA, md->chargeB,
                             box_size, cr, md->homenr,
                             fr->vir_el_recip, fr->ewaldcoeff_q,
                             lambda[efptCOUL], &dvdl_long_range_q, fr->ewald_table);
        }

        /* Note that with separate PME nodes we get the real energies later */
        enerd->dvdl_lin[efptCOUL] += dvdl_long_range_q;
        enerd->dvdl_lin[efptVDW]  += dvdl_long_range_lj;
        enerd->term[F_COUL_RECIP]  = Vlr_q + Vcorr_q;
        enerd->term[F_LJ_RECIP]    = Vlr_lj + Vcorr_lj;
        if (debug)
        {
            fprintf(debug, "Vlr_q = %g, Vcorr_q = %g, Vlr_corr_q = %g\n",
                    Vlr_q, Vcorr_q, enerd->term[F_COUL_RECIP]);
            pr_rvecs(debug, 0, "vir_el_recip after corr", fr->vir_el_recip, DIM);
            pr_rvecs(debug, 0, "fshift after LR Corrections", fr->fshift, SHIFTS);
            fprintf(debug, "Vlr_lj: %g, Vcorr_lj = %g, Vlr_corr_lj = %g\n",
                    Vlr_lj, Vcorr_lj, enerd->term[F_LJ_RECIP]);
            pr_rvecs(debug, 0, "vir_lj_recip after corr", fr->vir_lj_recip, DIM);
        }
    }
    else
    {
        /* Is there a reaction-field exclusion correction needed? */
        if (EEL_RF(fr->eeltype) && eelRF_NEC != fr->eeltype)
        {
            /* With the Verlet scheme, exclusion forces are calculated
             * in the non-bonded kernel.
             */
            if (ir->cutoff_scheme != ecutsVERLET)
            {
                real dvdl_rf_excl      = 0;
                enerd->term[F_RF_EXCL] =
                    RF_excl_correction(fr, graph, md, excl, x, f,
                                       fr->fshift, &pbc, lambda[efptCOUL], &dvdl_rf_excl);

                enerd->dvdl_lin[efptCOUL] += dvdl_rf_excl;
            }
        }
    }
    where();
    debug_gmx();

    if (debug)
    {
        print_nrnb(debug, nrnb);
    }
    debug_gmx();

#ifdef GMX_MPI
    if (TAKETIME)
    {
        t2 = MPI_Wtime();
        MPI_Barrier(cr->mpi_comm_mygroup);
        t3          = MPI_Wtime();
        fr->t_wait += t3-t2;
        if (fr->timesteps == 11)
        {
            char buf[22];
            fprintf(stderr, "* PP load balancing info: rank %d, step %s, rel wait time=%3.0f%% , load string value: %7.2f\n",
                    cr->nodeid, gmx_step_str(fr->timesteps, buf),
                    100*fr->t_wait/(fr->t_wait+fr->t_fnbf),
                    (fr->t_fnbf+fr->t_wait)/fr->t_fnbf);
        }
        fr->timesteps++;
    }
#endif

    if (debug)
    {
        pr_rvecs(debug, 0, "fshift after bondeds", fr->fshift, SHIFTS);
    }

}