Esempio n. 1
0
int main(int argc, char **argv)
{
  int np[3];
  ptrdiff_t n[4], N[4];
  ptrdiff_t alloc_local;
  ptrdiff_t local_ni[4], local_i_start[4];
  ptrdiff_t local_no[4], local_o_start[4];
  double err, *in, *out;
  pfft_plan plan_forw=NULL, plan_back=NULL;
  MPI_Comm comm_cart_3d;
  pfft_r2r_kind kinds_forw[4], kinds_back[4];
  
  /* Set size of FFT and process mesh */
  n[0] = 13; n[1] = 14; n[2] = 19; n[3] = 17;
  np[0] = 2; np[1] = 2; np[2] = 2;
  
  /* Set FFTW kinds of 1d R2R trafos */
  kinds_forw[0] = PFFT_REDFT00; kinds_back[0] = PFFT_REDFT00;
  kinds_forw[1] = PFFT_REDFT01; kinds_back[1] = PFFT_REDFT10;
  kinds_forw[2] = PFFT_RODFT00; kinds_back[2] = PFFT_RODFT00;
  kinds_forw[3] = PFFT_RODFT10; kinds_back[3] = PFFT_RODFT01;

  /* Set logical DFT sizes corresponding to FFTW manual:
   * for REDFT00 N=2*(n-1), for RODFT00 N=2*(n+1), otherwise N=2*n */
  N[0] = 2*(n[0]-1);
  N[1] = 2*n[1];
  N[2] = 2*(n[2]+1); 
  N[3] = 2*n[3];

  /* Initialize MPI and PFFT */
  MPI_Init(&argc, &argv);
  pfft_init();

  /* Create three-dimensional process grid of size np[0] x np[1] x np[2], if possible */
  if( pfft_create_procmesh(3, MPI_COMM_WORLD, np, &comm_cart_3d) ){
    pfft_fprintf(MPI_COMM_WORLD, stderr, "Error: This test file only works with %d processes.\n", np[0]*np[1]*np[2]);
    MPI_Finalize();
    return 1;
  }
  
  /* Get parameters of data distribution */
  alloc_local = pfft_local_size_r2r(4, n, comm_cart_3d, PFFT_TRANSPOSED_NONE,
      local_ni, local_i_start, local_no, local_o_start);

  /* Allocate memory */
  in  = pfft_alloc_real(alloc_local);
  out = pfft_alloc_real(alloc_local);

  /* Plan parallel forward FFT */
  plan_forw = pfft_plan_r2r(
      4, n, in, out, comm_cart_3d, kinds_forw, PFFT_TRANSPOSED_NONE| PFFT_MEASURE| PFFT_DESTROY_INPUT);
  
  /* Plan parallel backward FFT */
  plan_back = pfft_plan_r2r(
      4, n, out, in, comm_cart_3d, kinds_back, PFFT_TRANSPOSED_NONE| PFFT_MEASURE| PFFT_DESTROY_INPUT);

  /* Initialize input with random numbers */
  pfft_init_input_real(4, n, local_ni, local_i_start,
      in);

  /* execute parallel forward FFT */
  pfft_execute(plan_forw);

  /* clear the old input */
  pfft_clear_input_real(4, n, local_ni, local_i_start,
      in);
  
  /* execute parallel backward FFT */
  pfft_execute(plan_back);
  
  /* Scale data */
  for(ptrdiff_t l=0; l < local_ni[0] * local_ni[1] * local_ni[2] * local_ni[3]; l++)
    in[l] /= (N[0]*N[1]*N[2]*N[3]);
  
  /* Print error of back transformed data */
  MPI_Barrier(MPI_COMM_WORLD);
  err = pfft_check_output_real(4, n, local_ni, local_i_start, in, comm_cart_3d);
  pfft_printf(comm_cart_3d, "Error after one forward and backward trafo of size n=(%td, %td, %td, %td):\n", n[0], n[1], n[2], n[3]); 
  pfft_printf(comm_cart_3d, "maxerror = %6.2e;\n", err);
  
  /* free mem and finalize */
  pfft_destroy_plan(plan_forw);
  pfft_destroy_plan(plan_back);
  MPI_Comm_free(&comm_cart_3d);
  pfft_free(in); pfft_free(out);
  MPI_Finalize();
  return 0;
}
int main(int argc, char **argv){
  ptrdiff_t n[3], gc_below[3], gc_above[3];
  ptrdiff_t local_ni[3], local_i_start[3];
  ptrdiff_t local_no[3], local_o_start[3];
  ptrdiff_t local_ngc[3], local_gc_start[3];
  ptrdiff_t alloc_local, alloc_local_gc;
  int np[3], rnk_self, size, patience, verbose;
  unsigned pfft_flags=0;
  double err;
  MPI_Comm comm_cart_3d;
  pfft_complex *data;
  pfft_gcplan ths;
  
  MPI_Init(&argc, &argv);
  pfft_init();
  MPI_Comm_rank(MPI_COMM_WORLD, &rnk_self);
  MPI_Comm_size(MPI_COMM_WORLD, &size);
  
  /* default values */
  n[0] = n[1] = n[2] = 2; /*  n[0] = 3; n[1] = 5; n[2] = 7;*/
  np[0]=1; np[1]=1; np[2] = 3;
  verbose = 1;
  for(int t=0; t<3; t++){
    gc_below[t] = 0;
    gc_above[t] = 0;
  }
  gc_below[0] = 0;
  gc_above[0] = 2;

  /* set values by commandline */
  init_parameters(argc, argv, n, np, gc_below, gc_above, &patience, &verbose);

  switch(patience){
    case 0: pfft_flags = PFFT_ESTIMATE; break;
    case 2: pfft_flags = PFFT_PATIENT; break;
    case 3: pfft_flags = PFFT_EXHAUSTIVE; break;
    default: pfft_flags = PFFT_MEASURE;
  }

  /* Create three-dimensional process grid of size np[0] x np[1] x np[2], if possible */
  if( pfft_create_procmesh(3, MPI_COMM_WORLD, np, &comm_cart_3d) ){
    pfft_fprintf(MPI_COMM_WORLD, stderr, "Error: This test file only works with %d processes.\n", np[0]*np[1]*np[2]);
    MPI_Finalize();
    return 1;
  }

  /* Get parameters of data distribution */
  alloc_local = pfft_local_size_dft_3d(n, comm_cart_3d, PFFT_TRANSPOSED_NONE,
      local_ni, local_i_start, local_no, local_o_start);

  alloc_local_gc = pfft_local_size_gc_3d(
      local_ni, local_i_start, alloc_local, gc_below, gc_above,
      local_ngc, local_gc_start);

  /* Allocate memory */
  data = pfft_alloc_complex(alloc_local_gc);

  /* Plan parallel ghost cell send */
  ths = pfft_plan_cgc_3d(n, gc_below, gc_above,
      data, comm_cart_3d, PFFT_GC_NONTRANSPOSED);

  /* Initialize input with random numbers */
  pfft_init_input_complex_3d(n, local_ni, local_i_start,
      data);

  /* check gcell input */
  if(verbose)
    pfft_apr_complex_3d(data, local_ni, local_i_start, "gcell input", comm_cart_3d);

  /* Execute parallel ghost cell send */
  pfft_exchange(ths);

  /* check output */
  if(verbose)
    pfft_apr_complex_3d(data, local_ngc, local_gc_start, "exchanged gcells", comm_cart_3d);
  
  /* Execute adjoint parallel ghost cell send */
  pfft_reduce(ths);

  /* check input */
  if(verbose)
    pfft_apr_complex_3d(data, local_no, local_o_start, "reduced gcells", comm_cart_3d);

  /* Scale data */
  for(ptrdiff_t l=0; l < local_ni[0] * local_ni[1] * local_ni[2]; l++)
    data[l] /= 4;

  /* Print error of back transformed data */
  MPI_Barrier(comm_cart_3d);
  err = pfft_check_output_complex_3d(n, local_ni, local_i_start, data, comm_cart_3d);
  pfft_printf(comm_cart_3d, "Error after one forward and backward trafo of size n=(%td, %td, %td):\n", n[0], n[1], n[2]); 
  pfft_printf(comm_cart_3d, "maxerror = %6.2e;\n", err);


  /* free mem and finalize */
  pfft_destroy_gcplan(ths);
  MPI_Comm_free(&comm_cart_3d);
  pfft_free(data);
  MPI_Finalize();
  return 0;
}
Esempio n. 3
0
int main(int argc, char **argv)
{
  int np[3];
  ptrdiff_t n[4];
  ptrdiff_t alloc_local;
  ptrdiff_t local_ni[4], local_i_start[4];
  ptrdiff_t local_no[4], local_o_start[4];
  double err;
  pfft_complex *in, *out;
  pfft_plan plan_forw=NULL, plan_back=NULL;
  MPI_Comm comm_cart_3d;
  
  /* Set size of FFT and process mesh */
  n[0] = 13; n[1] = 14; n[2] = 19; n[3] = 17;
  np[0] = 2; np[1] = 2; np[2] = 2;
  
  /* Initialize MPI and PFFT */
  MPI_Init(&argc, &argv);
  pfft_init();

  /* Create three-dimensional process grid of size np[0] x np[1] x np[2], if possible */
  if( pfft_create_procmesh(3, MPI_COMM_WORLD, np, &comm_cart_3d) ){
    pfft_fprintf(MPI_COMM_WORLD, stderr, "Error: This test file only works with %d processes.\n", np[0]*np[1]*np[2]);
    MPI_Finalize();
    return 1;
  }
  
  /* Get parameters of data distribution */
  alloc_local = pfft_local_size_dft(4, n, comm_cart_3d, PFFT_TRANSPOSED_NONE,
      local_ni, local_i_start, local_no, local_o_start);

  /* Allocate memory */
  in  = pfft_alloc_complex(alloc_local);
  out = pfft_alloc_complex(alloc_local);

  /* Plan parallel forward FFT */
  plan_forw = pfft_plan_dft(
      4, n, in, out, comm_cart_3d, PFFT_FORWARD, PFFT_TRANSPOSED_NONE| PFFT_MEASURE| PFFT_DESTROY_INPUT);
  
  /* Plan parallel backward FFT */
  plan_back = pfft_plan_dft(
      4, n, out, in, comm_cart_3d, PFFT_BACKWARD, PFFT_TRANSPOSED_NONE| PFFT_MEASURE| PFFT_DESTROY_INPUT);

  /* Initialize input with random numbers */
  pfft_init_input_complex(4, n, local_ni, local_i_start,
      in);

  /* execute parallel forward FFT */
  pfft_execute(plan_forw);

  /* clear the old input */
  pfft_clear_input_complex(4, n, local_ni, local_i_start,
      in);
  
  /* execute parallel backward FFT */
  pfft_execute(plan_back);
  
  /* Scale data */
  for(ptrdiff_t l=0; l < local_ni[0] * local_ni[1] * local_ni[2] * local_ni[3]; l++)
    in[l] /= (n[0]*n[1]*n[2]*n[3]);
  
  /* Print error of back transformed data */
  err = pfft_check_output_complex(4, n, local_ni, local_i_start, in, comm_cart_3d);
  pfft_printf(comm_cart_3d, "Error after one forward and backward trafo of size n=(%td, %td, %td, %td):\n", n[0], n[1], n[2], n[3]); 
  pfft_printf(comm_cart_3d, "maxerror = %6.2e;\n", err);

  /* free mem and finalize */
  pfft_destroy_plan(plan_forw);
  pfft_destroy_plan(plan_back);
  MPI_Comm_free(&comm_cart_3d);
  pfft_free(in); pfft_free(out);
  MPI_Finalize();
  return 0;
}
Esempio n. 4
0
void pm_init(PM * pm, PMInit * init, MPI_Comm comm) {

    pm->init = *init;
    pm->mem = _libfastpm_get_gmem();

    /* initialize the domain */
    MPI_Comm_rank(comm, &pm->ThisTask);
    MPI_Comm_size(comm, &pm->NTask);

    int Ny = init->NprocY;
    int Nx;
    if(Ny <= 0) {
        Ny = 1;
        Nx = pm->NTask;
        if(!init->use_fftw) {
            for(; Ny * Ny < pm->NTask; Ny ++) continue;
            for(; Ny >= 1; Ny--) {
                if (pm->NTask % Ny == 0) break;
                continue;
            }
        }
    } else {
        if(pm->NTask % Ny != 0) {
            fastpm_raise(-1, "NprocY(%d) and NTask(%d) is incompatible\n", Ny, pm->NTask);
        }
    }
    Nx = pm->NTask / Ny;
    pm->Nproc[0] = Nx;
    pm->Nproc[1] = Ny;
    if(init->use_fftw) {
        if(Ny != 1) {
            fastpm_raise(-1, "FFTW requires Ny == 1; Ny = %d\n", Ny);
        }
    }
    int d;

    pm->Norm = 1.0;
    pm->Volume = 1.0;
    for(d = 0; d < 3; d ++) {
        pm->Nmesh[d] = init->Nmesh;
        pm->BoxSize[d] = init->BoxSize;

        pm->Below[d] = 0;
        pm->Above[d] = 1;

        pm->CellSize[d] = pm->BoxSize[d] / pm->Nmesh[d];
        pm->InvCellSize[d] = 1.0 / pm->CellSize[d]; 
        pm->Norm *= pm->Nmesh[d];
        pm->Volume *= pm->BoxSize[d];
    }


    pfft_create_procmesh(2, comm, pm->Nproc, &pm->Comm2D);

    if(init->use_fftw) {
        pm->allocsize = 2 * fftw_local_size_dft_r2c(
                3, pm->Nmesh, pm->Comm2D, 
                (pm->init.transposed?FFTW_MPI_TRANSPOSED_OUT:0),
                pm->IRegion.size, pm->IRegion.start,
                pm->ORegion.size, pm->ORegion.start);
    } else {
        pm->allocsize = 2 * pfft_local_size_dft_r2c(
                3, pm->Nmesh, pm->Comm2D, 
                (pm->init.transposed?PFFT_TRANSPOSED_OUT:0)
                | PFFT_PADDED_R2C, 
                pm->IRegion.size, pm->IRegion.start,
                pm->ORegion.size, pm->ORegion.start);
    }

    /* Note that we need to fix up the padded size of the real data;
     * and transpose with strides , */


    pm->IRegion.strides[2] = 1;
    pm->IRegion.strides[1] = pm->IRegion.size[2];
    pm->IRegion.strides[0] = pm->IRegion.size[1] * pm->IRegion.strides[1];
    pm->IRegion.total = pm->IRegion.size[0] * pm->IRegion.strides[0];

    /* remove padding from the view */
    pm->IRegion.size[2] = pm->Nmesh[2];

    if(pm->init.transposed) {
        if(pm->init.use_fftw) {
            /* FFTW transposed, y, x, z */
            pm->ORegion.strides[2] = 1;
            pm->ORegion.strides[0] = pm->ORegion.size[2];
            pm->ORegion.strides[1] = pm->ORegion.size[0] * pm->ORegion.strides[0];
            pm->ORegion.total = pm->ORegion.size[1] * pm->ORegion.strides[1];

        } else {
            /* PFFT transposed, y, z, x */
            pm->ORegion.strides[0] = 1;
            pm->ORegion.strides[2] = pm->ORegion.size[0];
            pm->ORegion.strides[1] = pm->ORegion.size[2] * pm->ORegion.strides[2];
            pm->ORegion.total = pm->ORegion.size[1] * pm->ORegion.strides[1];
        }
    } else {
        /* non-transposed */
        pm->ORegion.strides[2] = 1;
        pm->ORegion.strides[1] = pm->ORegion.size[2];
        pm->ORegion.strides[0] = pm->ORegion.size[1] * pm->ORegion.strides[1];
        pm->ORegion.total = pm->ORegion.size[0] * pm->ORegion.strides[0];
    }

    for(d = 0; d < 2; d ++) {
        MPI_Comm projected;
        int remain_dims[2] = {0, 0};
        remain_dims[d] = 1; 

        pm->Grid.edges_int[d] = 
            malloc(sizeof(pm->Grid.edges_int[0][0]) * (pm->Nproc[d] + 1));
        pm->Grid.edges_float[d] = 
            malloc(sizeof(pm->Grid.edges_float[0][0]) * (pm->Nproc[d] + 1));

        pm->Grid.MeshtoCart[d] = malloc(sizeof(int) * pm->Nmesh[d]);

        MPI_Cart_sub(pm->Comm2D, remain_dims, &projected);
        MPI_Allgather(&pm->IRegion.start[d], 1, MPI_PTRDIFF, 
            pm->Grid.edges_int[d], 1, MPI_PTRDIFF, projected);
        int ntask;
        MPI_Comm_size(projected, &ntask);

        MPI_Comm_free(&projected);
        int j;
        for(j = 0; j < pm->Nproc[d]; j ++) {
            pm->Grid.edges_float[d][j] = 1.0 * pm->Grid.edges_int[d][j] / pm->Nmesh[d] * pm->BoxSize[d];
        }
        /* Last edge is at the edge of the box */
        pm->Grid.edges_float[d][j] = pm->BoxSize[d];
        pm->Grid.edges_int[d][j] = pm->Nmesh[d];
        /* fill in the look up table */
        for(j = 0; j < pm->Nproc[d]; j ++) {
            int i;
            for(i = pm->Grid.edges_int[d][j]; i < pm->Grid.edges_int[d][j+1]; i ++) {
                pm->Grid.MeshtoCart[d][i] = j;
            }
        }
    }

    FastPMFloat * canvas = pm_alloc(pm);
    FastPMFloat * workspace = pm_alloc(pm);

    if(pm->init.use_fftw) {
        pm->r2c = plan_dft_r2c_fftw(
                3, pm->Nmesh, (void*) workspace, (void*) canvas, 
                pm->Comm2D, 
                (pm->init.transposed?FFTW_MPI_TRANSPOSED_OUT:0)
                | FFTW_ESTIMATE 
                | FFTW_DESTROY_INPUT
                );
        pm->c2r = plan_dft_c2r_fftw(
                3, pm->Nmesh, (void*) canvas, (void*) canvas, 
                pm->Comm2D, 
                (pm->init.transposed?FFTW_MPI_TRANSPOSED_IN:0)
                | FFTW_ESTIMATE 
                | FFTW_DESTROY_INPUT
                );
    } else {
        pm->r2c = plan_dft_r2c(
                3, pm->Nmesh, (void*) workspace, (void*) canvas, 
                pm->Comm2D,
                PFFT_FORWARD, 
                (pm->init.transposed?PFFT_TRANSPOSED_OUT:0)
                | PFFT_PADDED_R2C 
                | PFFT_ESTIMATE 
                | PFFT_TUNE
                //| PFFT_MEASURE
                | PFFT_DESTROY_INPUT
                );
        pm->c2r = plan_dft_c2r(
                3, pm->Nmesh, (void*) workspace, (void*) workspace, 
                pm->Comm2D,
                PFFT_BACKWARD, 
                (pm->init.transposed?PFFT_TRANSPOSED_IN:0)
                | PFFT_PADDED_C2R 
                | PFFT_ESTIMATE 
                //| PFFT_MEASURE
                | PFFT_TUNE
                | PFFT_DESTROY_INPUT
                );
    }

    pm_free(pm, workspace);
    pm_free(pm, canvas);

    for(d = 0; d < 3; d++) {
        pm->MeshtoK[d] = malloc(pm->Nmesh[d] * sizeof(double));
        int i;
        for(i = 0; i < pm->Nmesh[d]; i++) {
            int ii = i;
            if(ii >= pm->Nmesh[d] / 2) {
                ii -= pm->Nmesh[d];
            }
            pm->MeshtoK[d][i] = ii * 2 * M_PI / pm->BoxSize[d];
        }
    }
}