コード例 #1
0
ファイル: manual_c2c_3d.c プロジェクト: biotrump/pfft
int main(int argc, char **argv){
  int np[2];
  ptrdiff_t n[3];
  ptrdiff_t alloc_local;
  ptrdiff_t local_ni[3], local_i_start[3];
  ptrdiff_t local_no[3], local_o_start[3];
  pfft_complex *in, *out;
  pfft_plan plan=NULL;
  MPI_Comm comm_cart_2d;
  
  /* Set size of FFT and process mesh */
  n[0] = 2; n[1] = 2; n[2] = 4;
  np[0] = 2; np[1] = 2;
  
  /* Initialize MPI and PFFT */
  MPI_Init(&argc, &argv);
  pfft_init();

  /* Create two-dimensional process grid of size np[0] x np[1] */
  pfft_create_procmesh_2d(MPI_COMM_WORLD, np[0], np[1],
      &comm_cart_2d);

  /* Get parameters of data distribution */
  alloc_local = pfft_local_size_dft_3d(
      n, comm_cart_2d, PFFT_TRANSPOSED_NONE,
      local_ni, local_i_start, local_no, local_o_start);

  /* Allocate memory */
  in  = pfft_alloc_complex(alloc_local);
  out = pfft_alloc_complex(alloc_local);

  /* Plan parallel forward FFT */
  plan = pfft_plan_dft_3d(n, in, out, comm_cart_2d,
      PFFT_FORWARD, PFFT_TRANSPOSED_NONE);

  /* Initialize input with random numbers */
  pfft_init_input_complex_3d(n, local_ni, local_i_start,
      in);

  /* Execute parallel forward FFT */
  pfft_execute(plan);

  /* free mem and finalize MPI */
  pfft_destroy_plan(plan);
  MPI_Comm_free(&comm_cart_2d);
  pfft_free(in); pfft_free(out);
  MPI_Finalize();
  return 0;
}
コード例 #2
0
ファイル: serial_c2c.c プロジェクト: arnolda/scafacos
int main(int argc, char **argv)
{
  int n[3];
  pfft_complex *in, *out;
  FFTW(plan) plan_forw=NULL, plan_back=NULL;
  double err, time, time_fftw[2], max_time_fftw[2];
  unsigned fftw_flag;

  /* setup default parameters */
  int iter = 10, inplace = 0, patience = 0;  
  
  /* Set size of FFT and process mesh */
  n[0] = n[1] = n[2] = 16;
 
  /* Initialize MPI and PFFT */
  MPI_Init(&argc, &argv);
  pfft_init();

  /* read parameters from command line */
  init_parameters(argc, argv, n, &iter, &inplace, &patience);

  /* setup FFTWs planing depth */  
  switch(patience){
    case 1: fftw_flag = FFTW_MEASURE; break;
    case 2: fftw_flag = FFTW_PATIENT; break;
    case 3: fftw_flag = FFTW_EXHAUSTIVE; break;
    default: fftw_flag = FFTW_ESTIMATE;
  }
  
  if(!inplace)
    fftw_flag |= FFTW_DESTROY_INPUT;

  /* Allocate memory */
  in = pfft_alloc_complex(n[0]*n[1]*n[2]);
  out = (inplace) ? in : pfft_alloc_complex(n[0]*n[1]*n[2]);

  /* We often want to scale large FFTs, which do not fit on few processes. */
  if( (in == NULL) || (out == NULL)){
    fprintf(stderr, "!!! Error: Not enough memory to allocate input/output arrays !!!\n");
    MPI_Finalize();
    MPI_Finalize();
    return 1;
  }

  ptrdiff_t local_ni[3], local_i_start[3], n_ptr[3];
  for(int t=0; t<3; t++){
    local_i_start[t] = 0;
    n_ptr[t] = local_ni[t] = (ptrdiff_t) n[t];
  }
  
  plan_forw = FFTW(plan_dft_3d)(n[0], n[1], n[2], in, out, FFTW_FORWARD, fftw_flag);
  plan_back = FFTW(plan_dft_3d)(n[0], n[1], n[2], out, in, FFTW_BACKWARD, fftw_flag);
  
  /* Initialize input with random numbers */
  pfft_init_input_complex_3d(n_ptr, local_ni, local_i_start,
      in);

  time_fftw[0] = time_fftw[1] = 0;
  for(int t=0; t<iter; t++){
    /* execute parallel forward FFT */
    time_fftw[0] -= MPI_Wtime();
    FFTW(execute)(plan_forw);
    time_fftw[0] += MPI_Wtime();
  
    /* execute parallel backward FFT */
    time_fftw[1] -= MPI_Wtime();
    FFTW(execute)(plan_back);
    time_fftw[1] += MPI_Wtime();
  }
  
  /* Scale data */
  for(int t=0; t<iter; t++)
    for(ptrdiff_t l=0; l < n[0] * n[1] * n[2]; l++)
      in[l] /= (n[0]*n[1]*n[2]);

  printf("fftw_forw = %.2e, fftw_back = %.2e\n", time_fftw[0]/iter, time_fftw[1]/iter);
 
  err = pfft_check_output_complex_3d(n_ptr, local_ni, local_i_start, in, MPI_COMM_WORLD);
  printf("Error after several forward and backward FFTWs of size n=(%td, %td, %td):\n", n[0], n[1], n[2]); 
  printf("maxerror = %6.2e;\n", err);
  
  /* free mem and finalize */
  FFTW(destroy_plan)(plan_forw);
  FFTW(destroy_plan)(plan_back);
  pfft_free(in); if(!inplace) pfft_free(out);
 
  MPI_Finalize();
  return 0;
}
コード例 #3
0
int main(int argc, char **argv)
{
  int np[2];
  ptrdiff_t n[3];
  ptrdiff_t alloc_local;
  ptrdiff_t local_ni[3], local_i_start[3];
  ptrdiff_t local_no[3], local_o_start[3];
  double err;
  pfft_complex *in, *out;
  pfft_plan plan_forw=NULL, plan_back=NULL;
  MPI_Comm comm_cart_2d;
  
  /* Set size of FFT and process mesh */
  n[0] = 29; n[1] = 27; n[2] = 31;
  np[0] = 2; np[1] = 2;
  
  /* Initialize MPI and PFFT */
  MPI_Init(&argc, &argv);
  pfft_init();

  /* Create two-dimensional process grid of size np[0] x np[1], if possible */
  if( pfft_create_procmesh_2d(MPI_COMM_WORLD, np[0], np[1], &comm_cart_2d) ){
    pfft_fprintf(MPI_COMM_WORLD, stderr, "Error: This test file only works with %d processes.\n", np[0]*np[1]);
    MPI_Finalize();
    return 1;
  }
  
  /* Get parameters of data distribution */
  alloc_local = pfft_local_size_dft_3d(n, comm_cart_2d, PFFT_TRANSPOSED_OUT,
      local_ni, local_i_start, local_no, local_o_start);

  /* Allocate memory */
  in  = pfft_alloc_complex(alloc_local);
  out = pfft_alloc_complex(alloc_local);

  /* Plan parallel forward FFT */
  plan_forw = pfft_plan_dft_3d(
      n, in, out, comm_cart_2d, PFFT_FORWARD, PFFT_TRANSPOSED_OUT| PFFT_MEASURE| PFFT_DESTROY_INPUT);
  
  /* Plan parallel backward FFT */
  plan_back = pfft_plan_dft_3d(
      n, out, in, comm_cart_2d, PFFT_BACKWARD, PFFT_TRANSPOSED_IN| PFFT_MEASURE| PFFT_DESTROY_INPUT);

  /* Initialize input with random numbers */
  pfft_init_input_complex_3d(n, local_ni, local_i_start,
      in);

  /* execute parallel forward FFT */
  pfft_execute(plan_forw);
  
  /* execute parallel backward FFT */
  pfft_execute(plan_back);
  
  /* Scale data */
  for(ptrdiff_t l=0; l < local_ni[0] * local_ni[1] * local_ni[2]; l++)
    in[l] /= (n[0]*n[1]*n[2]);

  /* Print error of back transformed data */
  MPI_Barrier(MPI_COMM_WORLD);
  err = pfft_check_output_complex_3d(n, local_ni, local_i_start, in, comm_cart_2d);
  pfft_printf(comm_cart_2d, "Error after one forward and backward trafo of size n=(%td, %td, %td):\n", n[0], n[1], n[2]); 
  pfft_printf(comm_cart_2d, "maxerror = %6.2e;\n", err);

  /* free mem and finalize */
  pfft_destroy_plan(plan_forw);
  pfft_destroy_plan(plan_back);
  MPI_Comm_free(&comm_cart_2d);
  pfft_free(in); pfft_free(out);
  MPI_Finalize();
  return 0;
}
コード例 #4
0
int main(int argc, char **argv){
  ptrdiff_t n[3], gc_below[3], gc_above[3];
  ptrdiff_t local_ni[3], local_i_start[3];
  ptrdiff_t local_no[3], local_o_start[3];
  ptrdiff_t local_ngc[3], local_gc_start[3];
  ptrdiff_t alloc_local, alloc_local_gc;
  int np[3], rnk_self, size, patience, verbose;
  unsigned pfft_flags=0;
  double err;
  MPI_Comm comm_cart_3d;
  pfft_complex *data;
  pfft_gcplan ths;
  
  MPI_Init(&argc, &argv);
  pfft_init();
  MPI_Comm_rank(MPI_COMM_WORLD, &rnk_self);
  MPI_Comm_size(MPI_COMM_WORLD, &size);
  
  /* default values */
  n[0] = n[1] = n[2] = 2; /*  n[0] = 3; n[1] = 5; n[2] = 7;*/
  np[0]=1; np[1]=1; np[2] = 3;
  verbose = 1;
  for(int t=0; t<3; t++){
    gc_below[t] = 0;
    gc_above[t] = 0;
  }
  gc_below[0] = 0;
  gc_above[0] = 2;

  /* set values by commandline */
  init_parameters(argc, argv, n, np, gc_below, gc_above, &patience, &verbose);

  switch(patience){
    case 0: pfft_flags = PFFT_ESTIMATE; break;
    case 2: pfft_flags = PFFT_PATIENT; break;
    case 3: pfft_flags = PFFT_EXHAUSTIVE; break;
    default: pfft_flags = PFFT_MEASURE;
  }

  /* Create three-dimensional process grid of size np[0] x np[1] x np[2], if possible */
  if( pfft_create_procmesh(3, MPI_COMM_WORLD, np, &comm_cart_3d) ){
    pfft_fprintf(MPI_COMM_WORLD, stderr, "Error: This test file only works with %d processes.\n", np[0]*np[1]*np[2]);
    MPI_Finalize();
    return 1;
  }

  /* Get parameters of data distribution */
  alloc_local = pfft_local_size_dft_3d(n, comm_cart_3d, PFFT_TRANSPOSED_NONE,
      local_ni, local_i_start, local_no, local_o_start);

  alloc_local_gc = pfft_local_size_gc_3d(
      local_ni, local_i_start, alloc_local, gc_below, gc_above,
      local_ngc, local_gc_start);

  /* Allocate memory */
  data = pfft_alloc_complex(alloc_local_gc);

  /* Plan parallel ghost cell send */
  ths = pfft_plan_cgc_3d(n, gc_below, gc_above,
      data, comm_cart_3d, PFFT_GC_NONTRANSPOSED);

  /* Initialize input with random numbers */
  pfft_init_input_complex_3d(n, local_ni, local_i_start,
      data);

  /* check gcell input */
  if(verbose)
    pfft_apr_complex_3d(data, local_ni, local_i_start, "gcell input", comm_cart_3d);

  /* Execute parallel ghost cell send */
  pfft_exchange(ths);

  /* check output */
  if(verbose)
    pfft_apr_complex_3d(data, local_ngc, local_gc_start, "exchanged gcells", comm_cart_3d);
  
  /* Execute adjoint parallel ghost cell send */
  pfft_reduce(ths);

  /* check input */
  if(verbose)
    pfft_apr_complex_3d(data, local_no, local_o_start, "reduced gcells", comm_cart_3d);

  /* Scale data */
  for(ptrdiff_t l=0; l < local_ni[0] * local_ni[1] * local_ni[2]; l++)
    data[l] /= 4;

  /* Print error of back transformed data */
  MPI_Barrier(comm_cart_3d);
  err = pfft_check_output_complex_3d(n, local_ni, local_i_start, data, comm_cart_3d);
  pfft_printf(comm_cart_3d, "Error after one forward and backward trafo of size n=(%td, %td, %td):\n", n[0], n[1], n[2]); 
  pfft_printf(comm_cart_3d, "maxerror = %6.2e;\n", err);


  /* free mem and finalize */
  pfft_destroy_gcplan(ths);
  MPI_Comm_free(&comm_cart_3d);
  pfft_free(data);
  MPI_Finalize();
  return 0;
}
コード例 #5
0
int main(int argc, char **argv)
{
  int nthreads=1; /*number of threads to initialize openmp with*/
  int runs=1; /*number of runs for testing*/
  int np[2];
  ptrdiff_t n[3];
  ptrdiff_t alloc_local;
  ptrdiff_t local_ni[3], local_i_start[3];
  ptrdiff_t local_no[3], local_o_start[3];
  double err;
  pfft_complex *in, *out;
  pfft_plan plan_forw=NULL, plan_back=NULL;
  MPI_Comm comm_cart_2d;

  /* Init OpenMP */
  pfft_get_args(argc,argv,"-pfft_omp_threads",1,PFFT_INT,&nthreads);
  pfft_get_args(argc,argv,"-pfft_runs",1,PFFT_INT,&runs);
  pfft_plan_with_nthreads(nthreads);

  /* Set size of FFT and process mesh */
  n[0] = NNN;n[1] =NNN; n[2] =NNN;
  np[0] = 1; np[1] = 1;
  
  /* Initialize MPI and PFFT */
  MPI_Init(&argc, &argv);
  pfft_init();

  pfft_plan_with_nthreads(nthreads);
  pfft_printf(MPI_COMM_WORLD, "# %4d threads will be used for openmp (default is 1)\n", nthreads);

 /* Create two-dimensional process grid of size np[0] x np[1], if possible */
  if( pfft_create_procmesh_2d(MPI_COMM_WORLD, np[0], np[1], &comm_cart_2d) ){
    pfft_fprintf(MPI_COMM_WORLD, stderr, "Error: This test file only works with %d processes.\n", np[0]*np[1]);
    MPI_Finalize();
    return 1;
  }

  /* Get parameters of data distribution */
  alloc_local = pfft_local_size_dft_3d(n, comm_cart_2d, PFFT_TRANSPOSED_NONE,
      local_ni, local_i_start, local_no, local_o_start);

  /* Allocate memory */
  in  = pfft_alloc_complex(alloc_local);
  out = pfft_alloc_complex(alloc_local);

  /* Plan parallel forward FFT */
  plan_forw = pfft_plan_dft_3d(
      n, in, out, comm_cart_2d, PFFT_FORWARD, PFFT_TRANSPOSED_OUT| PFFT_MEASURE| PFFT_DESTROY_INPUT| PFFT_TUNE| PFFT_SHIFTED_IN);
  
  /* Plan parallel backward FFT */
  plan_back = pfft_plan_dft_3d(
      n, out, in, comm_cart_2d, PFFT_BACKWARD, PFFT_TRANSPOSED_IN| PFFT_MEASURE| PFFT_DESTROY_INPUT| PFFT_TUNE| PFFT_SHIFTED_OUT);

  /* Initialize input with random numbers */
  pfft_init_input_complex_3d(n, local_ni, local_i_start,
      in);

  for(int i=0; i<runs; i++)
  {
    /* execute parallel forward FFT */
    pfft_execute(plan_forw);

    /* clear the old input */
    /* pfft_clear_input_complex_3d(n, local_ni, local_i_start,
        in);
    */
    /* execute parallel backward FFT */
    pfft_execute(plan_back);

    /* Scale data */
    ptrdiff_t l;
    for(l=0; l < local_ni[0] * local_ni[1] * local_ni[2]; l++)
      in[l] /= (n[0]*n[1]*n[2]);
  }

  pfft_print_average_timer_adv(plan_forw, MPI_COMM_WORLD);
  pfft_print_average_timer_adv(plan_back, MPI_COMM_WORLD);

  /* Print error of back transformed data */
  err = pfft_check_output_complex_3d(n, local_ni, local_i_start, in, comm_cart_2d);
  pfft_printf(comm_cart_2d, "Error after %d forward and backward trafos of size n=(%td, %td, %td):\n", runs, n[0], n[1], n[2]); 
  pfft_printf(comm_cart_2d, "maxerror = %6.2e;\n", err);
  
  /* free mem and finalize */
  pfft_destroy_plan(plan_forw);
  pfft_destroy_plan(plan_back);
  MPI_Comm_free(&comm_cart_2d);
  pfft_free(in); pfft_free(out);
  MPI_Finalize();
  return 0;
}
コード例 #6
0
int main(int argc, char **argv){
  ptrdiff_t n[3], gc_below[3], gc_above[3];
  ptrdiff_t local_ni[3], local_i_start[3];
  ptrdiff_t local_no[3], local_o_start[3];
  ptrdiff_t local_ngc[3], local_gc_start[3];
  ptrdiff_t alloc_local, alloc_local_gc;
  int np[3], rnk_self, size, verbose;
  double err;
  MPI_Comm comm_cart_2d;
  pfft_complex *cdata;
  pfft_gcplan ths;
  
  MPI_Init(&argc, &argv);
  pfft_init();
  MPI_Comm_rank(MPI_COMM_WORLD, &rnk_self);
  MPI_Comm_size(MPI_COMM_WORLD, &size);
  
  /* default values */
  n[0] = n[1] = n[2] = 8; /*  n[0] = 3; n[1] = 5; n[2] = 7;*/
  np[0]=2; np[1]=2; np[2] = 1;

  verbose = 0;
  for(int t=0; t<3; t++){
    gc_below[t] = 0;
    gc_above[t] = 0;
  }
  gc_below[0] = 0;
  gc_above[0] = 4;

  /* set values by commandline */
  init_parameters(argc, argv, n, np, gc_below, gc_above, &verbose);

  /* Create two-dimensional process grid of size np[0] x np[1], if possible */
  if( pfft_create_procmesh_2d(MPI_COMM_WORLD, np[0], np[1], &comm_cart_2d) ){
    pfft_fprintf(MPI_COMM_WORLD, stderr, "Error: This test file only works with %d processes.\n", np[0]*np[1]);
    MPI_Finalize();
    return 1;
  }

  /* Get parameters of data distribution */
  /* alloc_local, local_no, local_o_start are given in complex units */
  /* local_ni, local_i_start are given in real units */
  alloc_local = pfft_local_size_dft_r2c_3d(n, comm_cart_2d, PFFT_TRANSPOSED_NONE,
      local_ni, local_i_start, local_no, local_o_start);

  /* alloc_local_gc, local_ngc, local_gc_start are given in complex units */
  alloc_local_gc = pfft_local_size_gc_3d(
      local_no, local_o_start, gc_below, gc_above,
      local_ngc, local_gc_start);

  /* Allocate enough memory for FFT and ghost cells */
  cdata = pfft_alloc_complex(alloc_local_gc > alloc_local ? alloc_local_gc : alloc_local);

  /* Plan parallel ghost cell send */
  ths = pfft_plan_cgc_3d(n, gc_below, gc_above,
      cdata, comm_cart_2d, PFFT_GC_TRANSPOSED_NONE | PFFT_GC_R2C);

  /* Initialize input with random numbers */
  pfft_init_input_complex_3d(n, local_no, local_o_start,
      cdata);

  /* check gcell input */
  if(verbose)
    pfft_apr_complex_3d(cdata, local_no, local_o_start, "gcell input", comm_cart_2d);

  /* Execute parallel ghost cell send */
  pfft_exchange(ths);

  /* Check gcell output */
  if(verbose)
    pfft_apr_complex_3d(cdata, local_ngc, local_gc_start, "exchanged gcells", comm_cart_2d);
  
  /* Execute adjoint parallel ghost cell send */
  pfft_reduce(ths);

  /* check input */
  if(verbose)
    pfft_apr_complex_3d(cdata, local_no, local_o_start, "reduced gcells", comm_cart_2d);

  /* Scale data */
  for(ptrdiff_t l=0; l < local_no[0] * local_no[1] * local_no[2]; l++)
    cdata[l] /= 2;

  /* Print error of back transformed data */
  MPI_Barrier(comm_cart_2d);
  err = pfft_check_output_complex_3d(n, local_no, local_o_start, cdata, comm_cart_2d);
  pfft_printf(comm_cart_2d, "Error after one gcell exchange and reduce of logical size n=(%td, %td, %td),\n", n[0], n[1], n[2]); 
  pfft_printf(comm_cart_2d, "physical size pn=(%td, %td, %td),\n", n[0], n[1], n[2]/2+1); 
  pfft_printf(comm_cart_2d, "gc_below = (%td, %td, %td), gc_above = (%td, %td, %td):\n", gc_below[0], gc_below[1], gc_below[2], gc_above[0], gc_above[1], gc_above[2]); 
  pfft_printf(comm_cart_2d, "maxerror = %6.2e;\n", err);


  /* free mem and finalize */
  pfft_destroy_gcplan(ths);
  MPI_Comm_free(&comm_cart_2d);
  pfft_free(cdata);
  MPI_Finalize();
  return 0;
}
コード例 #7
0
static void measure_pfft(
    const ptrdiff_t *n, int *np, MPI_Comm comm,
    int loops, int inplace, unsigned pfft_opt_flags
    )
{
  ptrdiff_t alloc_local;
  ptrdiff_t local_ni[3], local_i_start[3];
  ptrdiff_t local_no[3], local_o_start[3];
  double err=0.0, timer[4], max_timer[4];
  pfft_complex *in, *out;
  pfft_plan plan_forw=NULL, plan_back=NULL;
  MPI_Comm comm_cart_2d;

  /* Create two-dimensional process grid of size np[0] x np[1], if possible */
  if( pfft_create_procmesh_2d(comm, np[0], np[1], &comm_cart_2d) ){
    pfft_fprintf(comm, stderr, "Error: This test file only works with %d processes.\n", np[0]*np[1]);
    return;
  }
  
  /* Get parameters of data distribution */
  alloc_local = pfft_local_size_dft_3d(n, comm_cart_2d, PFFT_TRANSPOSED_OUT,
      local_ni, local_i_start, local_no, local_o_start);

  /* Allocate memory */
  in  = pfft_alloc_complex(alloc_local);
  out = (inplace) ? in : pfft_alloc_complex(alloc_local);

  /* Plan parallel forward FFT */
  timer[0] = -MPI_Wtime();
  plan_forw = pfft_plan_dft_3d(
      n, in, out, comm_cart_2d, PFFT_FORWARD, PFFT_TRANSPOSED_OUT| pfft_opt_flags);
  timer[0] += MPI_Wtime();
  
  /* Plan parallel backward FFT */
  timer[1] = -MPI_Wtime();
  plan_back = pfft_plan_dft_3d(
      n, out, in, comm_cart_2d, PFFT_BACKWARD, PFFT_TRANSPOSED_IN| pfft_opt_flags);
  timer[1] += MPI_Wtime();

  /* Initialize input with random numbers */
  pfft_init_input_complex_3d(n, local_ni, local_i_start,
      in);

  pfft_reset_timer(plan_forw);
  pfft_reset_timer(plan_back);

  timer[2] = timer[3] = 0;
  for(int t=0; t<loops; t++){
    /* execute parallel forward FFT */
    MPI_Barrier(MPI_COMM_WORLD);
    timer[2] -= MPI_Wtime();
    pfft_execute(plan_forw);
    timer[2] += MPI_Wtime();
    
    /* execute parallel backward FFT */
    MPI_Barrier(MPI_COMM_WORLD);
    timer[3] -= MPI_Wtime();
    pfft_execute(plan_back);
    timer[3] += MPI_Wtime();
    
    /* Scale data */
    for(ptrdiff_t l=0; l < local_ni[0] * local_ni[1] * local_ni[2]; l++)
      in[l] /= (n[0]*n[1]*n[2]);
  }
  timer[2] /= loops;
  timer[3] /= loops;

  /* Print pfft timer */
  pfft_print_average_timer_adv(plan_forw, comm_cart_2d);
  pfft_print_average_timer_adv(plan_back, comm_cart_2d);

  /* Print optimization flags */
  pfft_printf(comm_cart_2d, "\nFlags = ");
  if(pfft_opt_flags & PFFT_TUNE)
    pfft_printf(comm_cart_2d, "PFFT_TUNE");
  else
    pfft_printf(comm_cart_2d, "PFFT_NO_TUNE");

  pfft_printf(comm_cart_2d, " | ");

  if(pfft_opt_flags & PFFT_ESTIMATE)
    pfft_printf(comm_cart_2d, "PFFT_ESTIMATE");
  else if(pfft_opt_flags & PFFT_PATIENT)
    pfft_printf(comm_cart_2d, "PFFT_PATIENT");
  else if(pfft_opt_flags & PFFT_EXHAUSTIVE)
    pfft_printf(comm_cart_2d, "PFFT_EXHAUSTIVE");
  else
    pfft_printf(comm_cart_2d, "PFFT_MEASURE");

  pfft_printf(comm_cart_2d, " | ");

  if(pfft_opt_flags & PFFT_DESTROY_INPUT)
    pfft_printf(comm_cart_2d, "PFFT_DESTROY_INPUT");
  else
    pfft_printf(comm_cart_2d, "PFFT_PRESERVE_INPUT");

  pfft_printf(comm_cart_2d, "\n");


  /* Print error of back transformed data */
  err = pfft_check_output_complex_3d(n, local_ni, local_i_start, in, comm_cart_2d);
  pfft_printf(comm_cart_2d, "Run %d loops of ", loops);
  if(inplace)
    pfft_printf(comm_cart_2d, "in-place");
  else
    pfft_printf(comm_cart_2d, "out-of-place");
  pfft_printf(comm_cart_2d, " forward and backward trafo of size n=(%td, %td, %td):\n", n[0], n[1], n[2]); 

  MPI_Reduce(&timer, &max_timer, 4, MPI_DOUBLE, MPI_MAX, 0, comm_cart_2d);
  pfft_printf(comm_cart_2d, "tune_forw = %6.2e; tune_back = %6.2e, exec_forw = %6.2e, exec_back = %6.2e, error = %6.2e\n", max_timer[0], max_timer[1], max_timer[2], max_timer[3], err);

  /* free mem and finalize */
  pfft_destroy_plan(plan_forw);
  pfft_destroy_plan(plan_back);
  MPI_Comm_free(&comm_cart_2d);
  if(in != out) pfft_free(out);
  pfft_free(in);
}