Beispiel #1
0
//check eriks mail for one thing left to do
int main(int argc,char** argv) {
    int prank,i,j,k;

    MPI_Init(&argc,&argv);
    MPI_Comm_rank(MPI_COMM_WORLD,&prank);
    debug=stderr;
    if (argc!=6) {
        if (prank==0) printf("Usage: gmx_test_3dfft N M K P1 P2\n");
        return 1;
    }

    gmx_parallel_3dfft_t      pfft_setup;
    ivec                      ndata = {atoi(argv[1]),atoi(argv[2]),atoi(argv[3])};
    real *                    real_data;
    t_complex *               complex_data;
    MPI_Comm                  comm[2];
    ivec                      local_ndata;
    ivec                      local_offset;
    ivec                      local_size;

    int P[]= {atoi(argv[4]),atoi(argv[5])};
    real* compare;

    split_communicator(MPI_COMM_WORLD,comm,P);
    gmx_parallel_3dfft_init   (&pfft_setup, ndata, &real_data, &complex_data, comm, 0, 0); //last two: slab2index, bReprodusible
    gmx_parallel_3dfft_real_limits(pfft_setup,local_ndata,local_offset,local_size);
    snew(compare,ndata[0]*ndata[1]*ndata[2]);
    srand(time(0)+prank);
    if (debug) {
        fprintf(debug,"local_ndata: %d %d %d\n",local_ndata[0],local_ndata[1],local_ndata[2]);
        fprintf(debug,"local_size: %d %d %d\n",local_size[0],local_size[1],local_size[2]);
        fprintf(debug,"local_offset: %d %d %d\n",local_offset[0],local_offset[1],local_offset[2]);
    }
    for (i=0; i<local_ndata[0]; i++)  {
        for (j=0; j<local_ndata[1]; j++)  {
            for (k=0; k<local_ndata[2]; k++)  {
                compare[i*local_ndata[1]*local_ndata[2]+j*local_ndata[2]+k]=real_data[i*local_size[1]*local_size[2]+j*local_size[2]+k]=((real)rand())/RAND_MAX;
            }
        }
    }
    gmx_parallel_3dfft_execute(pfft_setup,GMX_FFT_REAL_TO_COMPLEX,0,0);
    gmx_parallel_3dfft_execute(pfft_setup,GMX_FFT_COMPLEX_TO_REAL,0,0);
    for (i=0; i<local_ndata[0]; i++)  {
        for (j=0; j<local_ndata[1]; j++)  {
            for (k=0; k<local_ndata[2]; k++)  {
                if (fabs(compare[i*local_ndata[1]*local_ndata[2]+j*local_ndata[2]+k] -
                         real_data[i*local_size[1]*local_size[2]+j*local_size[2]+k]/(ndata[0]*ndata[1]*ndata[2]))>2*ndata[0]*ndata[1]*ndata[2]*GMX_REAL_EPS) {
                    printf("error: %d %d %d: %f %f\n",i,j,k,compare[i*local_ndata[1]*local_ndata[2]+j*local_ndata[2]+k],
                           real_data[i*local_size[1]*local_size[2]+j*local_size[2]+k]/(ndata[0]*ndata[1]*ndata[2]));
                }
            }
        }
    }
    gmx_parallel_3dfft_destroy(pfft_setup);
    MPI_Finalize();
    return 0;
}
Beispiel #2
0
/*! \brief
 * A convenience wrapper for launching either the GPU or CPU FFT.
 *
 * \param[in] pme            The PME structure.
 * \param[in] gridIndex      The grid index - should currently always be 0.
 * \param[in] dir            The FFT direction enum.
 * \param[in] wcycle         The wallclock counter.
 */
void inline parallel_3dfft_execute_gpu_wrapper(gmx_pme_t              *pme,
                                               const int               gridIndex,
                                               enum gmx_fft_direction  dir,
                                               gmx_wallcycle_t         wcycle)
{
    GMX_ASSERT(gridIndex == 0, "Only single grid supported");
    if (pme_gpu_performs_FFT(pme->gpu))
    {
        wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU);
        wallcycle_sub_start_nocount(wcycle, ewcsLAUNCH_GPU_PME);
        pme_gpu_3dfft(pme->gpu, dir, gridIndex);
        wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_PME);
        wallcycle_stop(wcycle, ewcLAUNCH_GPU);
    }
    else
    {
        wallcycle_start(wcycle, ewcPME_FFT_MIXED_MODE);
#pragma omp parallel for num_threads(pme->nthread) schedule(static)
        for (int thread = 0; thread < pme->nthread; thread++)
        {
            gmx_parallel_3dfft_execute(pme->pfft_setup[gridIndex], dir, thread, wcycle);
        }
        wallcycle_stop(wcycle, ewcPME_FFT_MIXED_MODE);
    }
}