int main(int argc, char **argv)
{
  int np[2];
  ptrdiff_t n[4];
  ptrdiff_t alloc_local;
  ptrdiff_t local_ni[4], local_i_start[4];
  ptrdiff_t local_no[4], local_o_start[4];
  double err, *in;
  pfft_complex *out;
  pfft_plan plan_forw=NULL, plan_back=NULL;
  MPI_Comm comm_cart_2d;
  
  /* Set size of FFT and process mesh */
  n[0] = 13; n[1] = 14; n[2] = 19; n[3] = 17;
  np[0] = 2; np[1] = 2;
  
  /* Initialize MPI and PFFT */
  MPI_Init(&argc, &argv);
  pfft_init();

  /* Create two-dimensional process grid of size np[0] x np[1], if possible */
  if( pfft_create_procmesh_2d(MPI_COMM_WORLD, np[0], np[1], &comm_cart_2d) ){
    pfft_fprintf(MPI_COMM_WORLD, stderr, "Error: This test file only works with %d processes.\n", np[0]*np[1]);
    MPI_Finalize();
    return 1;
  }
  
  /* Get parameters of data distribution */
  alloc_local = pfft_local_size_dft_r2c(4, n, comm_cart_2d, PFFT_TRANSPOSED_OUT,
      local_ni, local_i_start, local_no, local_o_start);

  /* Allocate memory */
  in  = pfft_alloc_real(2 * alloc_local);
  out = pfft_alloc_complex(alloc_local);

  /* Plan parallel forward FFT */
  plan_forw = pfft_plan_dft_r2c(
      4, n, in, out, comm_cart_2d, PFFT_FORWARD, PFFT_TRANSPOSED_OUT| PFFT_MEASURE| PFFT_DESTROY_INPUT);
  
  /* Plan parallel backward FFT */
  plan_back = pfft_plan_dft_c2r(
      4, n, out, in, comm_cart_2d, PFFT_BACKWARD, PFFT_TRANSPOSED_IN| PFFT_MEASURE| PFFT_DESTROY_INPUT);

  /* Initialize input with random numbers */
  pfft_init_input_real(4, n, local_ni, local_i_start,
      in);

  /* execute parallel forward FFT */
  pfft_execute(plan_forw);
  
  /* execute parallel backward FFT */
  pfft_execute(plan_back);
  
  /* Scale data */
  for(ptrdiff_t l=0; l < local_ni[0] * local_ni[1] * local_ni[2] * local_ni[3]; l++)
    in[l] /= (n[0]*n[1]*n[2]*n[3]);
  
  /* Print error of back transformed data */
  MPI_Barrier(MPI_COMM_WORLD);
  err = pfft_check_output_real(4, n, local_ni, local_i_start, in, comm_cart_2d);
  pfft_printf(comm_cart_2d, "Error after one forward and backward trafo of size n=(%td, %td, %td, %td):\n", n[0], n[1], n[2], n[3]); 
  pfft_printf(comm_cart_2d, "maxerror = %6.2e;\n", err);

  /* free mem and finalize */
  pfft_destroy_plan(plan_forw);
  pfft_destroy_plan(plan_back);
  MPI_Comm_free(&comm_cart_2d);
  pfft_free(in); pfft_free(out);
  MPI_Finalize();
  return 0;
}
Esempio n. 2
0
void pm_init(PM * pm, PMInit * init, MPI_Comm comm) {

    pm->init = *init;
    pm->mem = _libfastpm_get_gmem();

    /* initialize the domain */
    MPI_Comm_rank(comm, &pm->ThisTask);
    MPI_Comm_size(comm, &pm->NTask);

    int Ny = init->NprocY;
    int Nx;
    if(Ny <= 0) {
        Ny = 1;
        Nx = pm->NTask;
        if(!init->use_fftw) {
            for(; Ny * Ny < pm->NTask; Ny ++) continue;
            for(; Ny >= 1; Ny--) {
                if (pm->NTask % Ny == 0) break;
                continue;
            }
        }
    } else {
        if(pm->NTask % Ny != 0) {
            fastpm_raise(-1, "NprocY(%d) and NTask(%d) is incompatible\n", Ny, pm->NTask);
        }
    }
    Nx = pm->NTask / Ny;
    pm->Nproc[0] = Nx;
    pm->Nproc[1] = Ny;
    if(init->use_fftw) {
        if(Ny != 1) {
            fastpm_raise(-1, "FFTW requires Ny == 1; Ny = %d\n", Ny);
        }
    }
    int d;

    pm->Norm = 1.0;
    pm->Volume = 1.0;
    for(d = 0; d < 3; d ++) {
        pm->Nmesh[d] = init->Nmesh;
        pm->BoxSize[d] = init->BoxSize;

        pm->Below[d] = 0;
        pm->Above[d] = 1;

        pm->CellSize[d] = pm->BoxSize[d] / pm->Nmesh[d];
        pm->InvCellSize[d] = 1.0 / pm->CellSize[d]; 
        pm->Norm *= pm->Nmesh[d];
        pm->Volume *= pm->BoxSize[d];
    }


    pfft_create_procmesh(2, comm, pm->Nproc, &pm->Comm2D);

    if(init->use_fftw) {
        pm->allocsize = 2 * fftw_local_size_dft_r2c(
                3, pm->Nmesh, pm->Comm2D, 
                (pm->init.transposed?FFTW_MPI_TRANSPOSED_OUT:0),
                pm->IRegion.size, pm->IRegion.start,
                pm->ORegion.size, pm->ORegion.start);
    } else {
        pm->allocsize = 2 * pfft_local_size_dft_r2c(
                3, pm->Nmesh, pm->Comm2D, 
                (pm->init.transposed?PFFT_TRANSPOSED_OUT:0)
                | PFFT_PADDED_R2C, 
                pm->IRegion.size, pm->IRegion.start,
                pm->ORegion.size, pm->ORegion.start);
    }

    /* Note that we need to fix up the padded size of the real data;
     * and transpose with strides , */


    pm->IRegion.strides[2] = 1;
    pm->IRegion.strides[1] = pm->IRegion.size[2];
    pm->IRegion.strides[0] = pm->IRegion.size[1] * pm->IRegion.strides[1];
    pm->IRegion.total = pm->IRegion.size[0] * pm->IRegion.strides[0];

    /* remove padding from the view */
    pm->IRegion.size[2] = pm->Nmesh[2];

    if(pm->init.transposed) {
        if(pm->init.use_fftw) {
            /* FFTW transposed, y, x, z */
            pm->ORegion.strides[2] = 1;
            pm->ORegion.strides[0] = pm->ORegion.size[2];
            pm->ORegion.strides[1] = pm->ORegion.size[0] * pm->ORegion.strides[0];
            pm->ORegion.total = pm->ORegion.size[1] * pm->ORegion.strides[1];

        } else {
            /* PFFT transposed, y, z, x */
            pm->ORegion.strides[0] = 1;
            pm->ORegion.strides[2] = pm->ORegion.size[0];
            pm->ORegion.strides[1] = pm->ORegion.size[2] * pm->ORegion.strides[2];
            pm->ORegion.total = pm->ORegion.size[1] * pm->ORegion.strides[1];
        }
    } else {
        /* non-transposed */
        pm->ORegion.strides[2] = 1;
        pm->ORegion.strides[1] = pm->ORegion.size[2];
        pm->ORegion.strides[0] = pm->ORegion.size[1] * pm->ORegion.strides[1];
        pm->ORegion.total = pm->ORegion.size[0] * pm->ORegion.strides[0];
    }

    for(d = 0; d < 2; d ++) {
        MPI_Comm projected;
        int remain_dims[2] = {0, 0};
        remain_dims[d] = 1; 

        pm->Grid.edges_int[d] = 
            malloc(sizeof(pm->Grid.edges_int[0][0]) * (pm->Nproc[d] + 1));
        pm->Grid.edges_float[d] = 
            malloc(sizeof(pm->Grid.edges_float[0][0]) * (pm->Nproc[d] + 1));

        pm->Grid.MeshtoCart[d] = malloc(sizeof(int) * pm->Nmesh[d]);

        MPI_Cart_sub(pm->Comm2D, remain_dims, &projected);
        MPI_Allgather(&pm->IRegion.start[d], 1, MPI_PTRDIFF, 
            pm->Grid.edges_int[d], 1, MPI_PTRDIFF, projected);
        int ntask;
        MPI_Comm_size(projected, &ntask);

        MPI_Comm_free(&projected);
        int j;
        for(j = 0; j < pm->Nproc[d]; j ++) {
            pm->Grid.edges_float[d][j] = 1.0 * pm->Grid.edges_int[d][j] / pm->Nmesh[d] * pm->BoxSize[d];
        }
        /* Last edge is at the edge of the box */
        pm->Grid.edges_float[d][j] = pm->BoxSize[d];
        pm->Grid.edges_int[d][j] = pm->Nmesh[d];
        /* fill in the look up table */
        for(j = 0; j < pm->Nproc[d]; j ++) {
            int i;
            for(i = pm->Grid.edges_int[d][j]; i < pm->Grid.edges_int[d][j+1]; i ++) {
                pm->Grid.MeshtoCart[d][i] = j;
            }
        }
    }

    FastPMFloat * canvas = pm_alloc(pm);
    FastPMFloat * workspace = pm_alloc(pm);

    if(pm->init.use_fftw) {
        pm->r2c = plan_dft_r2c_fftw(
                3, pm->Nmesh, (void*) workspace, (void*) canvas, 
                pm->Comm2D, 
                (pm->init.transposed?FFTW_MPI_TRANSPOSED_OUT:0)
                | FFTW_ESTIMATE 
                | FFTW_DESTROY_INPUT
                );
        pm->c2r = plan_dft_c2r_fftw(
                3, pm->Nmesh, (void*) canvas, (void*) canvas, 
                pm->Comm2D, 
                (pm->init.transposed?FFTW_MPI_TRANSPOSED_IN:0)
                | FFTW_ESTIMATE 
                | FFTW_DESTROY_INPUT
                );
    } else {
        pm->r2c = plan_dft_r2c(
                3, pm->Nmesh, (void*) workspace, (void*) canvas, 
                pm->Comm2D,
                PFFT_FORWARD, 
                (pm->init.transposed?PFFT_TRANSPOSED_OUT:0)
                | PFFT_PADDED_R2C 
                | PFFT_ESTIMATE 
                | PFFT_TUNE
                //| PFFT_MEASURE
                | PFFT_DESTROY_INPUT
                );
        pm->c2r = plan_dft_c2r(
                3, pm->Nmesh, (void*) workspace, (void*) workspace, 
                pm->Comm2D,
                PFFT_BACKWARD, 
                (pm->init.transposed?PFFT_TRANSPOSED_IN:0)
                | PFFT_PADDED_C2R 
                | PFFT_ESTIMATE 
                //| PFFT_MEASURE
                | PFFT_TUNE
                | PFFT_DESTROY_INPUT
                );
    }

    pm_free(pm, workspace);
    pm_free(pm, canvas);

    for(d = 0; d < 3; d++) {
        pm->MeshtoK[d] = malloc(pm->Nmesh[d] * sizeof(double));
        int i;
        for(i = 0; i < pm->Nmesh[d]; i++) {
            int ii = i;
            if(ii >= pm->Nmesh[d] / 2) {
                ii -= pm->Nmesh[d];
            }
            pm->MeshtoK[d][i] = ii * 2 * M_PI / pm->BoxSize[d];
        }
    }
}