예제 #1
0
/*! \brief
 * A convenience wrapper for launching either the GPU or CPU FFT.
 *
 * \param[in] pme            The PME structure.
 * \param[in] gridIndex      The grid index - should currently always be 0.
 * \param[in] dir            The FFT direction enum.
 * \param[in] wcycle         The wallclock counter.
 */
void inline parallel_3dfft_execute_gpu_wrapper(gmx_pme_t              *pme,
                                               const int               gridIndex,
                                               enum gmx_fft_direction  dir,
                                               gmx_wallcycle_t         wcycle)
{
    GMX_ASSERT(gridIndex == 0, "Only single grid supported");
    if (pme_gpu_performs_FFT(pme->gpu))
    {
        wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU);
        wallcycle_sub_start_nocount(wcycle, ewcsLAUNCH_GPU_PME);
        pme_gpu_3dfft(pme->gpu, dir, gridIndex);
        wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_PME);
        wallcycle_stop(wcycle, ewcLAUNCH_GPU);
    }
    else
    {
        wallcycle_start(wcycle, ewcPME_FFT_MIXED_MODE);
#pragma omp parallel for num_threads(pme->nthread) schedule(static)
        for (int thread = 0; thread < pme->nthread; thread++)
        {
            gmx_parallel_3dfft_execute(pme->pfft_setup[gridIndex], dir, thread, wcycle);
        }
        wallcycle_stop(wcycle, ewcPME_FFT_MIXED_MODE);
    }
}
예제 #2
0
void
posres_wrapper_lambda(struct gmx_wallcycle *wcycle,
                      const t_lambda       *fepvals,
                      const t_idef         *idef,
                      const struct t_pbc   *pbc,
                      const rvec            x[],
                      gmx_enerdata_t       *enerd,
                      real                 *lambda,
                      t_forcerec           *fr)
{
    real  v;
    int   i;

    if (0 == idef->il[F_POSRES].nr)
    {
        return;
    }

    wallcycle_sub_start_nocount(wcycle, ewcsRESTRAINTS);
    for (i = 0; i < enerd->n_lambda; i++)
    {
        real dvdl_dum = 0, lambda_dum;

        lambda_dum = (i == 0 ? lambda[efptRESTRAINT] : fepvals->all_lambda[efptRESTRAINT][i-1]);
        v          = posres(idef->il[F_POSRES].nr, idef->il[F_POSRES].iatoms,
                            idef->iparams_posres,
                            x, NULL, NULL,
                            fr->ePBC == epbcNONE ? NULL : pbc, lambda_dum, &dvdl_dum,
                            fr->rc_scaling, fr->ePBC, fr->posres_com, fr->posres_comB);
        enerd->enerpart_lambda[i] += v;
    }
    wallcycle_sub_stop(wcycle, ewcsRESTRAINTS);
}
예제 #3
0
void pme_gpu_launch_spread(gmx_pme_t            *pme,
                           const rvec           *x,
                           gmx_wallcycle        *wcycle)
{
    GMX_ASSERT(pme_gpu_active(pme), "This should be a GPU run of PME but it is not enabled.");

    PmeGpu *pmeGpu = pme->gpu;

    // The only spot of PME GPU where LAUNCH_GPU counter increases call-count
    wallcycle_start(wcycle, ewcLAUNCH_GPU);
    // The only spot of PME GPU where ewcsLAUNCH_GPU_PME subcounter increases call-count
    wallcycle_sub_start(wcycle, ewcsLAUNCH_GPU_PME);
    pme_gpu_copy_input_coordinates(pmeGpu, x);
    wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_PME);
    wallcycle_stop(wcycle, ewcLAUNCH_GPU);

    const unsigned int gridIndex  = 0;
    real              *fftgrid    = pme->fftgrid[gridIndex];
    if (pmeGpu->settings.currentFlags & GMX_PME_SPREAD)
    {
        /* Spread the coefficients on a grid */
        const bool computeSplines = true;
        const bool spreadCharges  = true;
        wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU);
        wallcycle_sub_start_nocount(wcycle, ewcsLAUNCH_GPU_PME);
        pme_gpu_spread(pmeGpu, gridIndex, fftgrid, computeSplines, spreadCharges);
        wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_PME);
        wallcycle_stop(wcycle, ewcLAUNCH_GPU);
    }
}
예제 #4
0
void pme_gpu_launch_complex_transforms(gmx_pme_t      *pme,
                                       gmx_wallcycle  *wcycle)
{
    PmeGpu            *pmeGpu                 = pme->gpu;
    const bool         computeEnergyAndVirial = (pmeGpu->settings.currentFlags & GMX_PME_CALC_ENER_VIR) != 0;
    const bool         performBackFFT         = (pmeGpu->settings.currentFlags & (GMX_PME_CALC_F | GMX_PME_CALC_POT)) != 0;
    const unsigned int gridIndex              = 0;
    t_complex         *cfftgrid               = pme->cfftgrid[gridIndex];

    if (pmeGpu->settings.currentFlags & GMX_PME_SPREAD)
    {
        if (!pme_gpu_performs_FFT(pmeGpu))
        {
            wallcycle_start(wcycle, ewcWAIT_GPU_PME_SPREAD);
            pme_gpu_sync_spread_grid(pme->gpu);
            wallcycle_stop(wcycle, ewcWAIT_GPU_PME_SPREAD);
        }
    }

    try
    {
        if (pmeGpu->settings.currentFlags & GMX_PME_SOLVE)
        {
            /* do R2C 3D-FFT */
            parallel_3dfft_execute_gpu_wrapper(pme, gridIndex, GMX_FFT_REAL_TO_COMPLEX, wcycle);

            /* solve in k-space for our local cells */
            if (pme_gpu_performs_solve(pmeGpu))
            {
                const auto gridOrdering = pme_gpu_uses_dd(pmeGpu) ? GridOrdering::YZX : GridOrdering::XYZ;
                wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU);
                wallcycle_sub_start_nocount(wcycle, ewcsLAUNCH_GPU_PME);
                pme_gpu_solve(pmeGpu, cfftgrid, gridOrdering, computeEnergyAndVirial);
                wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_PME);
                wallcycle_stop(wcycle, ewcLAUNCH_GPU);
            }
            else
            {
                wallcycle_start(wcycle, ewcPME_SOLVE_MIXED_MODE);
#pragma omp parallel for num_threads(pme->nthread) schedule(static)
                for (int thread = 0; thread < pme->nthread; thread++)
                {
                    solve_pme_yzx(pme, cfftgrid, pme->boxVolume,
                                  computeEnergyAndVirial, pme->nthread, thread);
                }
                wallcycle_stop(wcycle, ewcPME_SOLVE_MIXED_MODE);
            }
        }

        if (performBackFFT)
        {
            parallel_3dfft_execute_gpu_wrapper(pme, gridIndex, GMX_FFT_COMPLEX_TO_REAL, wcycle);
        }
    } GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
}
예제 #5
0
void pme_gpu_reinit_computation(const gmx_pme_t *pme,
                                gmx_wallcycle   *wcycle)
{
    GMX_ASSERT(pme_gpu_active(pme), "This should be a GPU run of PME but it is not enabled.");

    wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU);
    wallcycle_sub_start_nocount(wcycle, ewcsLAUNCH_GPU_PME);

    pme_gpu_clear_grids(pme->gpu);
    pme_gpu_clear_energy_virial(pme->gpu);

    wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_PME);
    wallcycle_stop(wcycle, ewcLAUNCH_GPU);
}
예제 #6
0
void pme_gpu_launch_gather(const gmx_pme_t                 *pme,
                           gmx_wallcycle gmx_unused        *wcycle,
                           PmeForceOutputHandling           forceTreatment)
{
    GMX_ASSERT(pme_gpu_active(pme), "This should be a GPU run of PME but it is not enabled.");

    if (!pme_gpu_performs_gather(pme->gpu))
    {
        return;
    }

    wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU);
    wallcycle_sub_start_nocount(wcycle, ewcsLAUNCH_GPU_PME);
    const unsigned int gridIndex  = 0;
    real              *fftgrid    = pme->fftgrid[gridIndex];
    pme_gpu_gather(pme->gpu, forceTreatment, reinterpret_cast<float *>(fftgrid));
    wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_PME);
    wallcycle_stop(wcycle, ewcLAUNCH_GPU);
}
예제 #7
0
void pme_gpu_prepare_computation(gmx_pme_t            *pme,
                                 bool                  needToUpdateBox,
                                 const matrix          box,
                                 gmx_wallcycle        *wcycle,
                                 int                   flags)
{
    GMX_ASSERT(pme_gpu_active(pme), "This should be a GPU run of PME but it is not enabled.");
    GMX_ASSERT(pme->nnodes > 0, "");
    GMX_ASSERT(pme->nnodes == 1 || pme->ndecompdim > 0, "");

    PmeGpu *pmeGpu = pme->gpu;
    pmeGpu->settings.currentFlags = flags;
    // TODO these flags are only here to honor the CPU PME code, and probably should be removed

    bool shouldUpdateBox = false;
    for (int i = 0; i < DIM; ++i)
    {
        for (int j = 0; j <= i; ++j)
        {
            shouldUpdateBox                  |= (pmeGpu->common->previousBox[i][j] != box[i][j]);
            pmeGpu->common->previousBox[i][j] = box[i][j];
        }
    }

    if (needToUpdateBox || shouldUpdateBox) // || is to make the first computation always update
    {
        wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU);
        wallcycle_sub_start_nocount(wcycle, ewcsLAUNCH_GPU_PME);
        pme_gpu_update_input_box(pmeGpu, box);
        wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_PME);
        wallcycle_stop(wcycle, ewcLAUNCH_GPU);

        if (!pme_gpu_performs_solve(pmeGpu))
        {
            // TODO remove code duplication and add test coverage
            matrix scaledBox;
            pmeGpu->common->boxScaler->scaleBox(box, scaledBox);
            gmx::invertBoxMatrix(scaledBox, pme->recipbox);
            pme->boxVolume = scaledBox[XX][XX] * scaledBox[YY][YY] * scaledBox[ZZ][ZZ];
        }
    }
}