Ejemplo n.º 1
0
static void compare_f(
    const pnfft_complex *f_pnfft, const pnfft_complex *f_nfft, ptrdiff_t local_M,
    double f_hat_sum, const char *name, MPI_Comm comm
    )
{
  double error = 0, error_max;

  for(ptrdiff_t j=0; j<local_M; j++)
    if( cabs(f_pnfft[j]-f_nfft[j]) > error)
      error = cabs(f_pnfft[j]-f_nfft[j]);

  MPI_Reduce(&error, &error_max, 1, MPI_DOUBLE, MPI_MAX, 0, comm);
  pfft_printf(comm, "%s absolute error = %6.2e\n", name, error_max);
  pfft_printf(comm, "%s relative error = %6.2e\n", name, error_max/f_hat_sum);
}
Ejemplo n.º 2
0
static void compare_grad_f(
    const pnfft_complex *grad_f1, const pnfft_complex *grad_f2, ptrdiff_t local_M,
    double f_hat_sum, const char *name, MPI_Comm comm
    )
{
  double error, error_max;

  error = 0;
  for(ptrdiff_t j=0; j<local_M; j++)
    if( cabs(grad_f1[3*j]-grad_f2[3*j]) > error)
      error = cabs(grad_f1[3*j]-grad_f2[3*j]);
  MPI_Reduce(&error, &error_max, 1, MPI_DOUBLE, MPI_MAX, 0, comm);
  pfft_printf(comm, "%sx absolute error = %6.2e\n", name, error_max);
  pfft_printf(comm, "%sx relative error = %6.2e\n", name, error_max/f_hat_sum);

  error = 0;
  for(ptrdiff_t j=0; j<local_M; j++)
    if( cabs(grad_f1[3*j+1]-grad_f2[3*j+1]) > error)
      error = cabs(grad_f1[3*j+1]-grad_f2[3*j+1]);
  MPI_Reduce(&error, &error_max, 1, MPI_DOUBLE, MPI_MAX, 0, comm);
  pfft_printf(comm, "%sy absolute error = %6.2e\n", name, error_max);
  pfft_printf(comm, "%sy relative error = %6.2e\n", name, error_max/f_hat_sum);

  error = 0;
  for(ptrdiff_t j=0; j<local_M; j++)
    if( cabs(grad_f1[3*j+2]-grad_f2[3*j+2]) > error)
      error = cabs(grad_f1[3*j+2]-grad_f2[3*j+2]);
  MPI_Reduce(&error, &error_max, 1, MPI_DOUBLE, MPI_MAX, 0, comm);
  pfft_printf(comm, "%sz absolute error = %6.2e\n", name, error_max);
  pfft_printf(comm, "%sz relative error = %6.2e\n", name, error_max/f_hat_sum);
}
Ejemplo n.º 3
0
int main(int argc, char **argv)
{
  int np[2];
  ptrdiff_t n[3];
  ptrdiff_t alloc_local;
  ptrdiff_t local_ni[3], local_i_start[3];
  ptrdiff_t local_no[3], local_o_start[3];
  double err;
  pfft_complex *in, *out;
  pfft_plan plan_forw=NULL, plan_back=NULL;
  MPI_Comm comm_cart_2d;
  double time;
  pfft_timer timer_forw, timer_back;
  unsigned pfft_opt_flag;

  /* setup default parameters */
  int iter = 10, inplace = 0, patience = 0;  
  
  /* Set size of FFT and process mesh */
  n[0] = n[1] = n[2] = 16;
  np[0] = 2; np[1] = 2;
 
  /* Initialize MPI and PFFT */
  MPI_Init(&argc, &argv);
  pfft_init();

  /* read parameters from command line */
  init_parameters(argc, argv, np, n, &iter, &inplace, &patience);

  /* setup FFTWs planing depth */  
  switch(patience){
    case 1: pfft_opt_flag = PFFT_MEASURE; break;
    case 2: pfft_opt_flag = PFFT_PATIENT; break;
    case 3: pfft_opt_flag = PFFT_EXHAUSTIVE; break;
    default: pfft_opt_flag = PFFT_ESTIMATE;
  }
  pfft_opt_flag |= PFFT_DESTROY_INPUT;
  
  /* Create two-dimensional process grid of size np[0] x np[1], if possible */
  if( pfft_create_procmesh_2d(MPI_COMM_WORLD, np[0], np[1], &comm_cart_2d) ){
    pfft_fprintf(MPI_COMM_WORLD, stderr, "Error: Procmesh %d x %d requires MPI launch with %d processes.\n",
        np[0], np[1], np[0]*np[1]);
    MPI_Finalize();
    MPI_Finalize();
    return 1;
  }
  
  /* Get parameters of data distribution */
  alloc_local = pfft_local_size_dft_3d(n, comm_cart_2d, PFFT_TRANSPOSED_OUT,
      local_ni, local_i_start, local_no, local_o_start);

  /* Allocate memory */
  in = pfft_alloc_complex(alloc_local);
  out = (inplace) ? in : pfft_alloc_complex(alloc_local);

  /* We often want to scale large FFTs, which do not fit on few processes. */
  if( (in == NULL) || (out == NULL)){
    fprintf(stderr, "!!! Error: Not enough memory to allocate input/output arrays !!!\n");
    MPI_Finalize();
    MPI_Finalize();
    return 1;
  }

  
  /* Plan parallel forward FFT */
  time = -MPI_Wtime();
  plan_forw = pfft_plan_dft_3d(
      n, in, out, comm_cart_2d, PFFT_FORWARD, PFFT_TRANSPOSED_OUT| pfft_opt_flag);
  time += MPI_Wtime();
//  printf("time for forw planing: %.2e\n", time);
  
  /* Plan parallel backward FFT */
  time = -MPI_Wtime();
  plan_back = pfft_plan_dft_3d(
      n, out, in, comm_cart_2d, PFFT_BACKWARD, PFFT_TRANSPOSED_IN| pfft_opt_flag);
  time += MPI_Wtime();
//  printf("time for back planing: %.2e\n", time);

  /* Initialize input with random numbers */
  pfft_init_input_c2c_3d(n, local_ni, local_i_start,
      in);
  
  for(int t=0; t<iter; t++){
    /* execute parallel forward FFT */
    pfft_execute(plan_forw);
  
    /* execute parallel backward FFT */
    pfft_execute(plan_back);
  }
 
  /* check individual timers for workbalance */
  timer_forw = pfft_get_timer(plan_forw);
//    printf("timer_forw->whole = %.2e\n", timer_forw->whole);
  pfft_destroy_timer(timer_forw);
  timer_back = pfft_get_timer(plan_back);
//  printf("timer_back->whole = %.2e\n", timer_back->whole);
  pfft_destroy_timer(timer_back);

  /* read out PFFT timers */ 
  pfft_print_average_timer_adv(plan_forw, comm_cart_2d);
  pfft_print_average_timer_adv(plan_back, comm_cart_2d);
  if(inplace){
    pfft_write_average_timer_adv(plan_forw, "measure_forw_inplace.m", comm_cart_2d);
    pfft_write_average_timer_adv(plan_back, "measure_back_inplace.m", comm_cart_2d);
  } else {
    pfft_write_average_timer_adv(plan_forw, "measure_forw_outofplace.m", comm_cart_2d);
    pfft_write_average_timer_adv(plan_back, "measure_back_outofplace.m", comm_cart_2d);
  }
  
  /* Scale data */
  for(int t=0; t<iter; t++)
    for(ptrdiff_t l=0; l < local_ni[0] * local_ni[1] * local_ni[2]; l++)
      in[l] /= (n[0]*n[1]*n[2]);

  /* Print error of back transformed data */
  MPI_Barrier(MPI_COMM_WORLD);
  err = pfft_check_output_c2c_3d(n, local_ni, local_i_start, in, comm_cart_2d);
  pfft_printf(comm_cart_2d, "Error after one forward and backward trafo of size n=(%td, %td, %td):\n", n[0], n[1], n[2]); 
  pfft_printf(comm_cart_2d, "maxerror = %6.2e;\n", err);
  
  /* free mem and finalize */
  pfft_destroy_plan(plan_forw);
  pfft_destroy_plan(plan_back);
  MPI_Comm_free(&comm_cart_2d);
  pfft_free(in); if(!inplace) pfft_free(out);
  MPI_Finalize();
  return 0;
}
Ejemplo n.º 4
0
int main(int argc, char **argv){
  int np[2];
  ptrdiff_t n[3], ni[3], no[3];
  ptrdiff_t alloc_local_forw, alloc_local_back, alloc_local, howmany;
  ptrdiff_t local_ni[3], local_i_start[3];
  ptrdiff_t local_n[3], local_start[3];
  ptrdiff_t local_no[3], local_o_start[3];
  double err, *in;
  pfft_complex *out;
  pfft_plan plan_forw=NULL, plan_back=NULL;
  MPI_Comm comm_cart_2d;
  
  /* Set size of FFT and process mesh */
  ni[0] = ni[1] = ni[2] = 16;
  n[0] = 29; n[1] = 27; n[2] = 31;
  for(int t=0; t<3; t++)
    no[t] = ni[t];
  np[0] = 2; np[1] = 2;
  howmany = 1;

  /* Initialize MPI and PFFT */
  MPI_Init(&argc, &argv);
  pfft_init();

  /* Create two-dimensional process grid of size np[0] x np[1], if possible */
  if( pfft_create_procmesh_2d(MPI_COMM_WORLD, np[0], np[1], &comm_cart_2d) ){
    pfft_fprintf(MPI_COMM_WORLD, stderr, "Error: This test file only works with %d processes.\n", np[0]*np[1]);
    MPI_Finalize();
    return 1;
  }

  /* Get parameters of data distribution */
  alloc_local_forw = pfft_local_size_many_dft_r2c(3, n, ni, n, howmany,
      PFFT_DEFAULT_BLOCKS, PFFT_DEFAULT_BLOCKS,
      comm_cart_2d, PFFT_TRANSPOSED_NONE | PFFT_PADDED_R2C,
      local_ni, local_i_start, local_n, local_start);

  alloc_local_back = pfft_local_size_many_dft_c2r(3, n, n, no, howmany,
      PFFT_DEFAULT_BLOCKS, PFFT_DEFAULT_BLOCKS,
      comm_cart_2d, PFFT_TRANSPOSED_NONE | PFFT_PADDED_R2C,
      local_n, local_start, local_no, local_o_start);

  /* Allocate enough memory for both trafos */
  alloc_local = (alloc_local_forw > alloc_local_back) ?
    alloc_local_forw : alloc_local_back;
  in  = pfft_alloc_real(2 * alloc_local);
  out = pfft_alloc_complex(alloc_local);

  /* Plan parallel forward FFT */
  plan_forw = pfft_plan_many_dft_r2c(
      3, n, ni, n, howmany, PFFT_DEFAULT_BLOCKS, PFFT_DEFAULT_BLOCKS,
      in, out, comm_cart_2d, PFFT_FORWARD, PFFT_TRANSPOSED_NONE| PFFT_MEASURE| PFFT_DESTROY_INPUT| PFFT_PADDED_R2C);

  /* Plan parallel backward FFT */
  plan_back = pfft_plan_many_dft_c2r(
      3, n, n, no, howmany, PFFT_DEFAULT_BLOCKS, PFFT_DEFAULT_BLOCKS,
      out, in, comm_cart_2d, PFFT_BACKWARD, PFFT_TRANSPOSED_NONE| PFFT_MEASURE| PFFT_DESTROY_INPUT| PFFT_PADDED_R2C);

  /* Initialize input with random numbers */
  pfft_init_input_real(3, ni, local_ni, local_i_start,
      in);

  /* Execute parallel forward FFT */
  pfft_execute(plan_forw);

  /* clear the old input */
  pfft_clear_input_real(3, ni, local_ni, local_i_start,
      in);
 
  /* execute parallel backward FFT */
  pfft_execute(plan_back);
  
  /* Scale data */
  for(ptrdiff_t l=0; l < local_ni[0] * local_ni[1] * local_ni[2]; l++)
    in[l] /= (n[0]*n[1]*n[2]);

  /* Print error of back transformed data */
  MPI_Barrier(MPI_COMM_WORLD);
  err = pfft_check_output_real(3, ni, local_ni, local_i_start, in, comm_cart_2d);
  pfft_printf(comm_cart_2d, "Error after one forward and backward trafo of size n=(%td, %td, %td):\n", n[0], n[1], n[2]); 
  pfft_printf(comm_cart_2d, "maxerror = %6.2e;\n", err);
  
  /* free mem and finalize MPI */
  pfft_destroy_plan(plan_forw);
  pfft_destroy_plan(plan_back);
  MPI_Comm_free(&comm_cart_2d);
  pfft_free(in); pfft_free(out);
  MPI_Finalize();
  return 0;
}
Ejemplo n.º 5
0
int main(int argc, char **argv)
{
  int np[3];
  ptrdiff_t n[4], N[4];
  ptrdiff_t alloc_local;
  ptrdiff_t local_ni[4], local_i_start[4];
  ptrdiff_t local_no[4], local_o_start[4];
  double err, *in, *out;
  pfft_plan plan_forw=NULL, plan_back=NULL;
  MPI_Comm comm_cart_3d;
  pfft_r2r_kind kinds_forw[4], kinds_back[4];
  
  /* Set size of FFT and process mesh */
  n[0] = 13; n[1] = 14; n[2] = 19; n[3] = 17;
  np[0] = 2; np[1] = 2; np[2] = 2;
  
  /* Set FFTW kinds of 1d R2R trafos */
  kinds_forw[0] = PFFT_REDFT00; kinds_back[0] = PFFT_REDFT00;
  kinds_forw[1] = PFFT_REDFT01; kinds_back[1] = PFFT_REDFT10;
  kinds_forw[2] = PFFT_RODFT00; kinds_back[2] = PFFT_RODFT00;
  kinds_forw[3] = PFFT_RODFT10; kinds_back[3] = PFFT_RODFT01;

  /* Set logical DFT sizes corresponding to FFTW manual:
   * for REDFT00 N=2*(n-1), for RODFT00 N=2*(n+1), otherwise N=2*n */
  N[0] = 2*(n[0]-1);
  N[1] = 2*n[1];
  N[2] = 2*(n[2]+1); 
  N[3] = 2*n[3];

  /* Initialize MPI and PFFT */
  MPI_Init(&argc, &argv);
  pfft_init();

  /* Create three-dimensional process grid of size np[0] x np[1] x np[2], if possible */
  if( pfft_create_procmesh(3, MPI_COMM_WORLD, np, &comm_cart_3d) ){
    pfft_fprintf(MPI_COMM_WORLD, stderr, "Error: This test file only works with %d processes.\n", np[0]*np[1]*np[2]);
    MPI_Finalize();
    return 1;
  }
  
  /* Get parameters of data distribution */
  alloc_local = pfft_local_size_r2r(4, n, comm_cart_3d, PFFT_TRANSPOSED_NONE,
      local_ni, local_i_start, local_no, local_o_start);

  /* Allocate memory */
  in  = pfft_alloc_real(alloc_local);
  out = pfft_alloc_real(alloc_local);

  /* Plan parallel forward FFT */
  plan_forw = pfft_plan_r2r(
      4, n, in, out, comm_cart_3d, kinds_forw, PFFT_TRANSPOSED_NONE| PFFT_MEASURE| PFFT_DESTROY_INPUT);
  
  /* Plan parallel backward FFT */
  plan_back = pfft_plan_r2r(
      4, n, out, in, comm_cart_3d, kinds_back, PFFT_TRANSPOSED_NONE| PFFT_MEASURE| PFFT_DESTROY_INPUT);

  /* Initialize input with random numbers */
  pfft_init_input_real(4, n, local_ni, local_i_start,
      in);

  /* execute parallel forward FFT */
  pfft_execute(plan_forw);

  /* clear the old input */
  pfft_clear_input_real(4, n, local_ni, local_i_start,
      in);
  
  /* execute parallel backward FFT */
  pfft_execute(plan_back);
  
  /* Scale data */
  for(ptrdiff_t l=0; l < local_ni[0] * local_ni[1] * local_ni[2] * local_ni[3]; l++)
    in[l] /= (N[0]*N[1]*N[2]*N[3]);
  
  /* Print error of back transformed data */
  MPI_Barrier(MPI_COMM_WORLD);
  err = pfft_check_output_real(4, n, local_ni, local_i_start, in, comm_cart_3d);
  pfft_printf(comm_cart_3d, "Error after one forward and backward trafo of size n=(%td, %td, %td, %td):\n", n[0], n[1], n[2], n[3]); 
  pfft_printf(comm_cart_3d, "maxerror = %6.2e;\n", err);
  
  /* free mem and finalize */
  pfft_destroy_plan(plan_forw);
  pfft_destroy_plan(plan_back);
  MPI_Comm_free(&comm_cart_3d);
  pfft_free(in); pfft_free(out);
  MPI_Finalize();
  return 0;
}
int main(int argc, char **argv){
  int np[2];
  ptrdiff_t n[3], ni[3], no[3], N[3];
  ptrdiff_t alloc_local_forw, alloc_local_back, alloc_local, howmany;
  ptrdiff_t local_ni[3], local_i_start[3];
  ptrdiff_t local_n[3], local_start[3];
  ptrdiff_t local_no[3], local_o_start[3];
  double err, *in, *out;
  pfft_plan plan_forw=NULL, plan_back=NULL;
  MPI_Comm comm_cart_2d;
  fftw_r2r_kind kinds_forw[3], kinds_back[3];
  
  /* Set size of FFT and process mesh */
  ni[0] = ni[1] = ni[2] = 16;
  n[0] = 29; n[1] = 27; n[2] = 31;
  for(int t=0; t<3; t++)
    no[t] = ni[t];
  np[0] = 2; np[1] = 2;
  howmany = 1;
  
  /* Set PFFT kinds of 1d R2R trafos */
  kinds_forw[0] = PFFT_REDFT00; kinds_back[0] = PFFT_REDFT00;
  kinds_forw[1] = PFFT_REDFT01; kinds_back[1] = PFFT_REDFT10;
  kinds_forw[2] = PFFT_RODFT00; kinds_back[2] = PFFT_RODFT00;

  /* Set logical DFT sizes corresponding to FFTW manual:
   * for REDFT00 N=2*(n-1), for RODFT00 N=2*(n+1), otherwise N=2*n */
  N[0] = 2*(n[0]-1);
  N[1] = 2*n[1];
  N[2] = 2*(n[2]+1); 

  /* Initialize MPI and PFFT */
  MPI_Init(&argc, &argv);
  pfft_init();

  /* Create two-dimensional process grid of size np[0] x np[1], if possible */
  if( pfft_create_procmesh_2d(MPI_COMM_WORLD, np[0], np[1], &comm_cart_2d) ){
    pfft_fprintf(MPI_COMM_WORLD, stderr, "Error: This test file only works with %d processes.\n", np[0]*np[1]);
    MPI_Finalize();
    return 1;
  }

  /* Get parameters of data distribution */
  alloc_local_forw = pfft_local_size_many_r2r(3, n, ni, n, howmany,
      PFFT_DEFAULT_BLOCKS, PFFT_DEFAULT_BLOCKS,
      comm_cart_2d, PFFT_TRANSPOSED_OUT,
      local_ni, local_i_start, local_n, local_start);

  alloc_local_back = pfft_local_size_many_r2r(3, n, n, no, howmany,
      PFFT_DEFAULT_BLOCKS, PFFT_DEFAULT_BLOCKS,
      comm_cart_2d, PFFT_TRANSPOSED_IN,
      local_n, local_start, local_no, local_o_start);

  /* Allocate enough memory for both trafos */
  alloc_local = (alloc_local_forw > alloc_local_back) ?
    alloc_local_forw : alloc_local_back;
  in  = fftw_alloc_real(alloc_local);
  out = fftw_alloc_real(alloc_local);

  /* Plan parallel forward FFT */
  plan_forw = pfft_plan_many_r2r(
      3, n, ni, n, howmany, PFFT_DEFAULT_BLOCKS, PFFT_DEFAULT_BLOCKS,
      in, out, comm_cart_2d, kinds_forw, PFFT_TRANSPOSED_OUT| PFFT_MEASURE| PFFT_DESTROY_INPUT);

  /* Plan parallel backward FFT */
  plan_back = pfft_plan_many_r2r(
      3, n, n, no, howmany, PFFT_DEFAULT_BLOCKS, PFFT_DEFAULT_BLOCKS,
      out, in, comm_cart_2d, kinds_back, PFFT_TRANSPOSED_IN| PFFT_MEASURE| PFFT_DESTROY_INPUT);

  /* Initialize input with random numbers */
  pfft_init_input_real_3d(ni, local_ni, local_i_start,
      in);

  /* Execute parallel forward FFT */
  pfft_execute(plan_forw);

  /* clear the old input */
  pfft_clear_input_real_3d(ni, local_ni, local_i_start,
      in);
 
  /* execute parallel backward FFT */
  pfft_execute(plan_back);
  
  /* Scale data */
  for(ptrdiff_t l=0; l < local_ni[0] * local_ni[1] * local_ni[2]; l++)
    in[l] /= (N[0]*N[1]*N[2]);

  /* Print error of back transformed data */
  MPI_Barrier(MPI_COMM_WORLD);
  err = pfft_check_output_real_3d(ni, local_ni, local_i_start, in, comm_cart_2d);
  pfft_printf(comm_cart_2d, "Error after one forward and backward trafo of size n=(%td, %td, %td):\n", n[0], n[1], n[2]); 
  pfft_printf(comm_cart_2d, "maxerror = %6.2e;\n", err);
  
  /* free mem and finalize MPI */
  pfft_destroy_plan(plan_forw);
  pfft_destroy_plan(plan_back);
  MPI_Comm_free(&comm_cart_2d);
  fftw_free(in); fftw_free(out);
  MPI_Finalize();
  return 0;
}
Ejemplo n.º 7
0
int main(int argc, char **argv)
{
  int np[2];
  ptrdiff_t n[3];
  ptrdiff_t alloc_local;
  ptrdiff_t local_ni[3], local_i_start[3];
  ptrdiff_t local_no[3], local_o_start[3];
  double err;
  pfft_complex *in, *out;
  pfft_plan plan_forw=NULL, plan_back=NULL;
  MPI_Comm comm_cart_2d;
  
  /* Set size of FFT and process mesh */
  n[0] = 2; n[1] = 2; n[2] = 4;
  np[0] = 2; np[1] = 2;
  
  /* Initialize MPI and PFFT */
  MPI_Init(&argc, &argv);
  pfft_init();

  /* Create two-dimensional process grid of size np[0] x np[1], if possible */
  if( pfft_create_procmesh_2d(MPI_COMM_WORLD, np[0], np[1], &comm_cart_2d) ){
    pfft_fprintf(MPI_COMM_WORLD, stderr, "Error: This test file only works with %d processes.\n", np[0]*np[1]);
    MPI_Finalize();
    return 1;
  }
  
  /* Get parameters of data distribution */
  alloc_local = pfft_local_size_dft_3d(n, comm_cart_2d, PFFT_TRANSPOSED_NONE,
      local_ni, local_i_start, local_no, local_o_start);

  /* Allocate memory */
  in  = pfft_alloc_complex(alloc_local);
  out = pfft_alloc_complex(alloc_local);

  /* Plan parallel forward FFT */
  plan_forw = pfft_plan_dft_3d(
      n, in, out, comm_cart_2d, PFFT_FORWARD, PFFT_TRANSPOSED_NONE| PFFT_MEASURE| PFFT_DESTROY_INPUT);
  
  /* Plan parallel backward FFT */
  plan_back = pfft_plan_dft_3d(
      n, out, in, comm_cart_2d, PFFT_BACKWARD, PFFT_TRANSPOSED_NONE| PFFT_MEASURE| PFFT_DESTROY_INPUT);

  /* Initialize input with random numbers */
  pfft_init_input_complex_3d(n, local_ni, local_i_start,
      in);

  /* Print input data */
  pfft_apr_complex_3d(
      in, local_ni, local_i_start,
      "PFFT, g_hat", MPI_COMM_WORLD);

  /* execute parallel forward FFT */
  pfft_execute(plan_forw);
  
  /* Print transformed data */
  pfft_apr_complex_3d(
      out, local_no, local_o_start,
      "PFFT, g", MPI_COMM_WORLD);
  
  /* execute parallel backward FFT */
  pfft_execute(plan_back);
  
  /* Scale data */
  for(ptrdiff_t l=0; l < local_ni[0] * local_ni[1] * local_ni[2]; l++)
    in[l] /= (n[0]*n[1]*n[2]);
  
  /* Print back transformed data */
  pfft_apr_complex_3d(
      in, local_ni, local_i_start,
      "PFFT^H, g_hat", MPI_COMM_WORLD);
 
  /* Print error of back transformed data */
  MPI_Barrier(MPI_COMM_WORLD);
  err = pfft_check_output_complex_3d(n, local_ni, local_i_start, in, comm_cart_2d);
  pfft_printf(comm_cart_2d, "Error after one forward and backward trafo of size n=(%td, %td, %td):\n", n[0], n[1], n[2]); 
  pfft_printf(comm_cart_2d, "maxerror = %6.2e;\n", err);

  /* free mem and finalize MPI */
  pfft_destroy_plan(plan_forw);
  pfft_destroy_plan(plan_back);
  MPI_Comm_free(&comm_cart_2d);
  pfft_free(in); pfft_free(out);
  MPI_Finalize();
  return 0;
}
Ejemplo n.º 8
0
int main(int argc, char **argv){
  ptrdiff_t n[3], gc_below[3], gc_above[3];
  ptrdiff_t local_ni[3], local_i_start[3];
  ptrdiff_t local_no[3], local_o_start[3];
  ptrdiff_t local_ngc[3], local_gc_start[3];
  ptrdiff_t alloc_local, alloc_local_gc;
  int np[3], rnk_self, size, verbose;
  double err;
  MPI_Comm comm_cart_2d;
  pfft_complex *data;
  pfft_gcplan ths;
  
  MPI_Init(&argc, &argv);
  pfft_init();
  MPI_Comm_rank(MPI_COMM_WORLD, &rnk_self);
  MPI_Comm_size(MPI_COMM_WORLD, &size);
  
  /* default values */
  n[0] = n[1] = n[2] = 8; /*  n[0] = 3; n[1] = 5; n[2] = 7;*/
  np[0]=2; np[1]=2; np[2] = 1;

  verbose = 0;
  for(int t=0; t<3; t++){
    gc_below[t] = 0;
    gc_above[t] = 0;
  }
  gc_below[0] = 0;
  gc_above[0] = 8;

  /* set values by commandline */
  init_parameters(argc, argv, n, np, gc_below, gc_above, &verbose);

  /* Create two-dimensional process grid of size np[0] x np[1], if possible */
  if( pfft_create_procmesh_2d(MPI_COMM_WORLD, np[0], np[1], &comm_cart_2d) ){
    pfft_fprintf(MPI_COMM_WORLD, stderr, "Error: This test file only works with %d processes.\n", np[0]*np[1]);
    MPI_Finalize();
    return 1;
  }

  /* Get parameters of data distribution */
  alloc_local = pfft_local_size_dft_3d(n, comm_cart_2d, PFFT_TRANSPOSED_NONE,
      local_ni, local_i_start, local_no, local_o_start);

  alloc_local_gc = pfft_local_size_gc_3d(
      local_ni, local_i_start, alloc_local, gc_below, gc_above,
      local_ngc, local_gc_start);

  /* Allocate memory */
  data = pfft_alloc_complex(alloc_local_gc);

  /* Plan parallel ghost cell send */
  ths = pfft_plan_cgc_3d(n, gc_below, gc_above,
      data, comm_cart_2d, PFFT_GC_NONTRANSPOSED);

  /* Initialize input with random numbers */
  pfft_init_input_c2c_3d(n, local_ni, local_i_start,
      data);

  /* check gcell input */
  if(verbose)
    pfft_apr_complex_3d(data, local_ni, local_i_start, "gcell input", comm_cart_2d);

  /* Execute parallel ghost cell send */
  pfft_exchange(ths);

  /* Check gcell output */
  if(verbose)
    pfft_apr_complex_3d(data, local_ngc, local_gc_start, "exchanged gcells", comm_cart_2d);
  
  /* Execute adjoint parallel ghost cell send */
  pfft_reduce(ths);

  /* check input */
  if(verbose)
    pfft_apr_complex_3d(data, local_no, local_o_start, "reduced gcells", comm_cart_2d);

  /* Scale data */
  for(ptrdiff_t l=0; l < local_ni[0] * local_ni[1] * local_ni[2]; l++)
    data[l] /= 3;

  /* Print error of back transformed data */
  MPI_Barrier(comm_cart_2d);
  err = pfft_check_output_c2c_3d(n, local_ni, local_i_start, data, comm_cart_2d);
  pfft_printf(comm_cart_2d, "Error after one gcell exchange and reduce of size n=(%td, %td, %td),\n", n[0], n[1], n[2]); 
  pfft_printf(comm_cart_2d, "gc_below = (%td, %td, %td), gc_above = (%td, %td, %td):\n", gc_below[0], gc_below[1], gc_below[2], gc_above[0], gc_above[1], gc_above[2]); 
  pfft_printf(comm_cart_2d, "maxerror = %6.2e;\n", err);


  /* free mem and finalize */
  pfft_destroy_gcplan(ths);
  MPI_Comm_free(&comm_cart_2d);
  pfft_free(data);
  MPI_Finalize();
  return 0;
}
Ejemplo n.º 9
0
int main(int argc, char **argv){
  int np[3], m, window, interlacing;
  ptrdiff_t N[3], n[3], local_M;
  double x_max[3];
  
  MPI_Init(&argc, &argv);
  pnfft_init();
  
  /* set default values */
  N[0] = N[1] = N[2] = 16;
  n[0] = n[1] = n[2] = 0;
  local_M = 0;
  m = 6;
  window = 4;
  interlacing = 0;
  x_max[0] = x_max[1] = x_max[2] = 0.5;
  np[0]=2; np[1]=2; np[2]=2;
  
  /* set parameters by command line */
  int intpol = -1;
  init_parameters(argc, argv, N, n, &local_M, &m, &window, &intpol, &interlacing, x_max, np);

  /* if M or n are set to zero, we choose nice values */
  local_M = (local_M==0) ? N[0]*N[1]*N[2]/(np[0]*np[1]*np[2]) : local_M;
  for(int t=0; t<3; t++)
    n[t] = (n[t]==0) ? 2*N[t] : n[t];

  unsigned window_flag;
  switch(window){
    case 0: window_flag = PNFFT_WINDOW_GAUSSIAN; break;
    case 1: window_flag = PNFFT_WINDOW_BSPLINE; break;
    case 2: window_flag = PNFFT_WINDOW_SINC_POWER; break;
    case 3: window_flag = PNFFT_WINDOW_BESSEL_I0; break;
    case 4: window_flag = PNFFT_WINDOW_KAISER_BESSEL; break;
    case 5: window_flag = PNFFT_WINDOW_GAUSSIAN_T; break;
    default: window_flag = PNFFT_WINDOW_GAUSSIAN; window = 0;
  }

  unsigned intpol_flag;
  switch(intpol){
    case 0: intpol_flag = PNFFT_PRE_CONST_PSI; break;
    case 1: intpol_flag = PNFFT_PRE_LIN_PSI; break;
    case 2: intpol_flag = PNFFT_PRE_QUAD_PSI; break;
    case 3: intpol_flag = PNFFT_PRE_CUB_PSI; break;
    default: intpol_flag = (window==0) ? PNFFT_FG_PSI : 0;
  }

  unsigned interlacing_flag = (interlacing) ? PNFFT_INTERLACED : 0;

  pfft_printf(MPI_COMM_WORLD, "******************************************************************************************************\n");
  pfft_printf(MPI_COMM_WORLD, "* Computation of parallel NFFT\n");
  pfft_printf(MPI_COMM_WORLD, "* for  N[0] x N[1] x N[2] = %td x %td x %td Fourier coefficients (change with -pnfft_N * * *)\n", N[0], N[1], N[2]);
  pfft_printf(MPI_COMM_WORLD, "* at   local_M = %td nodes per process (change with -pnfft_local_M *)\n", local_M);
  pfft_printf(MPI_COMM_WORLD, "* with n[0] x n[1] x n[2] = %td x %td x %td FFT grid size (change with -pnfft_n * * *),\n", n[0], n[1], n[2]);
  pfft_printf(MPI_COMM_WORLD, "*      m = %d real space cutoff (change with -pnfft_m *),\n", m);
  pfft_printf(MPI_COMM_WORLD, "*      window = %d window function ", window);
  switch(window){
    case 0: pfft_printf(MPI_COMM_WORLD, "(PNFFT_WINDOW_GAUSSIAN) "); break;
    case 1: pfft_printf(MPI_COMM_WORLD, "(PNFFT_WINDOW_BSPLINE) "); break;
    case 2: pfft_printf(MPI_COMM_WORLD, "(PNFFT_WINDOW_SINC_POWER) "); break;
    case 3: pfft_printf(MPI_COMM_WORLD, "(PNFFT_WINDOW_BESSEL_I0) "); break;
    case 4: pfft_printf(MPI_COMM_WORLD, "(PNFFT_WINDOW_KAISER_BESSEL) "); break;
    case 5: pfft_printf(MPI_COMM_WORLD, "(PNFFT_WINDOW_GAUSSIAN_T) "); break;
  }
  pfft_printf(MPI_COMM_WORLD, "(change with -pnfft_window *),\n");
  pfft_printf(MPI_COMM_WORLD, "*      intpol = %d interpolation order ", intpol);
  switch(intpol){
    case 0: pfft_printf(MPI_COMM_WORLD, "(PNFFT_PRE_CONST_PSI) "); break;
    case 1: pfft_printf(MPI_COMM_WORLD, "(PNFFT_PRE_LIN_PSI) "); break;
    case 2: pfft_printf(MPI_COMM_WORLD, "(PNFFT_PRE_QUAD_PSI) "); break;
    case 3: pfft_printf(MPI_COMM_WORLD, "(PNFFT_PRE_CUB_PSI) "); break;
    default: if(window==0 || window==5)
               pfft_printf(MPI_COMM_WORLD, "(PNFFT_FG_PSI) ");
             else
               pfft_printf(MPI_COMM_WORLD, "(No interpolation enabled) ");
  }
  pfft_printf(MPI_COMM_WORLD, "(change with -pnfft_intpol *),\n");
  if(interlacing)
    pfft_printf(MPI_COMM_WORLD, "*      interlacing = enabled (disable with -pnfft_interlacing 0)");
  else
    pfft_printf(MPI_COMM_WORLD, "*      interlacing = disabled (enable with -pnfft_interlacing 1)");
  pfft_printf(MPI_COMM_WORLD, "* on   np[0] x np[1] x np[2] = %td x %td x %td processes (change with -pnfft_np * * *)\n", np[0], np[1], np[2]);
  pfft_printf(MPI_COMM_WORLD, "*******************************************************************************************************\n\n");


  /* calculate parallel NFFT */
  pnfft_perform_guru(N, n, local_M, m,   x_max, window_flag| intpol_flag| interlacing_flag, np, MPI_COMM_WORLD);

  /* free mem and finalize */
  pnfft_cleanup();
  MPI_Finalize();
  return 0;
}
Ejemplo n.º 10
0
int main(int argc, char **argv){
  int np[3];
  ptrdiff_t N[3], local_M;
  ptrdiff_t local_N[3], local_N_start[3];
  double lower_border[3], upper_border[3];
  MPI_Comm comm_cart_3d;
  pnfft_complex *f_hat, *f;
  double *x;
  pnfft_plan pnfft;
  
  MPI_Init(&argc, &argv);
  pnfft_init();
  
  /* Set default values */
  N[0] = N[1] = N[2] = 16;
  np[0]=2; np[1]=2; np[2]=2;
  local_M = N[0]*N[1]*N[2]/(np[0]*np[1]*np[2]);
  
  /* Print infos */
  pfft_printf(MPI_COMM_WORLD, "******************************************************************************************************\n");
  pfft_printf(MPI_COMM_WORLD, "* Computation of parallel NFFT\n");
  pfft_printf(MPI_COMM_WORLD, "* for  N[0] x N[1] x N[2] = %td x %td x %td Fourier coefficients\n", N[0], N[1], N[2]);
  pfft_printf(MPI_COMM_WORLD, "* at   local_M = %td nodes per process\n", local_M);
  pfft_printf(MPI_COMM_WORLD, "* on   np[0] x np[1] x np[2] = %td x %td x %td processes\n", np[0], np[1], np[2]);
  pfft_printf(MPI_COMM_WORLD, "*******************************************************************************************************\n\n");

  /* create three-dimensional process grid of size np[0] x np[1] x np[2], if possible */
  if( pnfft_create_procmesh(3, MPI_COMM_WORLD, np, &comm_cart_3d) ){
    pfft_fprintf(MPI_COMM_WORLD, stderr, "Error: Procmesh of size %d x %d x %d does not fit to number of allocated processes.\n", np[0], np[1], np[2]);
    pfft_fprintf(MPI_COMM_WORLD, stderr, "       Please allocate %d processes (mpiexec -np %d ...) or change the procmesh (with -pnfft_np * * *).\n", np[0]*np[1]*np[2], np[0]*np[1]*np[2]);
    MPI_Finalize();
    return 1;
  }

  /* Get parameters of data distribution */
  pnfft_local_size_3d(N, comm_cart_3d, PNFFT_TRANSPOSED_NONE,
      local_N, local_N_start, lower_border, upper_border);

  /* Plan parallel NFFT */
  pnfft = pnfft_init_3d(N, local_M, comm_cart_3d);

  /* Get data pointers */
  f_hat = pnfft_get_f_hat(pnfft);
  f     = pnfft_get_f(pnfft);
  x     = pnfft_get_x(pnfft);

  /* Initialize Fourier coefficients */
  pnfft_init_f_hat_3d(N, local_N, local_N_start, PNFFT_TRANSPOSED_NONE,
      f_hat);

  /* Initialize nonequispaced nodes */
  init_random_x(lower_border, upper_border, local_M,
      x);

  /* Print input Fourier coefficents */
  vpr_complex(comm_cart_3d, 8, f_hat,
      "Input Fourier coefficients on process 1:");

  /* Execute parallel NFFT */
  pnfft_trafo(pnfft);

  /* Print NFFT results */
  vpr_complex(comm_cart_3d, 8, f,
      "PNFFT Results on process 1:");

  /* Execute parallel adjoint NFFT */
  pnfft_adj(pnfft);

  /* Scale data */
  for(ptrdiff_t l=0; l < local_N[0] * local_N[1] * local_N[2]; l++)
    f_hat[l] /= (N[0]*N[1]*N[2]);

  /* Print output Fourier coefficents */
  vpr_complex(comm_cart_3d, 8, f_hat,
      "Fourier coefficients after one forward and backward PNFFT on process 1:");

  /* free mem and finalize */
  pnfft_finalize(pnfft, PNFFT_FREE_X | PNFFT_FREE_F_HAT| PNFFT_FREE_F);
  MPI_Comm_free(&comm_cart_3d);
  pnfft_cleanup();
  MPI_Finalize();
  return 0;
}
Ejemplo n.º 11
0
int main(int argc, char **argv){
  int np[3], m, window;
  unsigned window_flag;
  ptrdiff_t N[3], n[3], local_M;
  double f_hat_sum, x_max[3];
  pnfft_complex *f1, *f2;
  
  MPI_Init(&argc, &argv);
  pnfft_init();
  
  /* set default values */
  N[0] = N[1] = N[2] = 16;
  n[0] = n[1] = n[2] = 0;
  local_M = 0;
  m = 6;
  window = 4;
  x_max[0] = x_max[1] = x_max[2] = 0.5;
  np[0]=2; np[1]=2; np[2]=2;
  
  /* set parameters by command line */
  init_parameters(argc, argv, N, n, &local_M, &m, &window, x_max, np);

  /* if M or n are set to zero, we choose nice values */
  local_M = (local_M==0) ? N[0]*N[1]*N[2]/(np[0]*np[1]*np[2]) : local_M;
  for(int t=0; t<3; t++)
    n[t] = (n[t]==0) ? 2*N[t] : n[t];

  switch(window){
    case 0: window_flag = PNFFT_WINDOW_GAUSSIAN; break;
    case 1: window_flag = PNFFT_WINDOW_BSPLINE; break;
    case 2: window_flag = PNFFT_WINDOW_SINC_POWER; break;
    case 3: window_flag = PNFFT_WINDOW_BESSEL_I0; break;
    default: window_flag = PNFFT_WINDOW_KAISER_BESSEL;
  }

  pfft_printf(MPI_COMM_WORLD, "******************************************************************************************************\n");
  pfft_printf(MPI_COMM_WORLD, "* Computation of parallel NFFT\n");
  pfft_printf(MPI_COMM_WORLD, "* for  N[0] x N[1] x N[2] = %td x %td x %td Fourier coefficients (change with -pnfft_N * * *)\n", N[0], N[1], N[2]);
  pfft_printf(MPI_COMM_WORLD, "* at   local_M = %td nodes per process (change with -pnfft_local_M *)\n", local_M);
  pfft_printf(MPI_COMM_WORLD, "* with n[0] x n[1] x n[2] = %td x %td x %td FFT grid size (change with -pnfft_n * * *),\n", n[0], n[1], n[2]);
  pfft_printf(MPI_COMM_WORLD, "*      m = %d real space cutoff (change with -pnfft_m *),\n", m);
  pfft_printf(MPI_COMM_WORLD, "*      window = %d window function ", window);
  switch(window){
    case 0: pfft_printf(MPI_COMM_WORLD, "(PNFFT_WINDOW_GAUSSIAN) "); break;
    case 1: pfft_printf(MPI_COMM_WORLD, "(PNFFT_WINDOW_BSPLINE) "); break;
    case 2: pfft_printf(MPI_COMM_WORLD, "(PNFFT_WINDOW_SINC_POWER) "); break;
    case 3: pfft_printf(MPI_COMM_WORLD, "(PNFFT_WINDOW_BESSEL_I0) "); break;
    default: pfft_printf(MPI_COMM_WORLD, "(PNFFT_WINDOW_KAISER_BESSEL) "); break;
  }
  pfft_printf(MPI_COMM_WORLD, "(change with -pnfft_window *),\n");
  pfft_printf(MPI_COMM_WORLD, "* on   np[0] x np[1] x np[2] = %td x %td x %td processes (change with -pnfft_np * * *)\n", np[0], np[1], np[2]);
  pfft_printf(MPI_COMM_WORLD, "*******************************************************************************************************\n\n");


  /* calculate parallel NFFT */
  pnfft_perform_guru(N, n, local_M, m,   x_max, window_flag, np, MPI_COMM_WORLD,
      &f1, &f_hat_sum);

  /* calculate parallel NFFT with higher accuracy */
  pnfft_perform_guru(N, n, local_M, m+2, x_max, PNFFT_WINDOW_KAISER_BESSEL, np, MPI_COMM_WORLD,
      &f2, &f_hat_sum);

  /* calculate error of PNFFT */
  compare_f(f1, f2, local_M, f_hat_sum, "* Results in", MPI_COMM_WORLD);

  /* free mem and finalize */
  pnfft_free(f1); pnfft_free(f2);
  pnfft_cleanup();
  MPI_Finalize();
  return 0;
}
Ejemplo n.º 12
0
static void pnfft_perform_guru(
    const ptrdiff_t *N, const ptrdiff_t *n, ptrdiff_t local_M,
    int m, const double *x_max, unsigned window_flag,
    const int *np, MPI_Comm comm,
    pnfft_complex **f, double *f_hat_sum
    )
{
  ptrdiff_t local_N[3], local_N_start[3];
  double lower_border[3], upper_border[3];
  double local_sum = 0, time, time_max;
  MPI_Comm comm_cart_3d;
  pnfft_complex *f_hat;
  double *x;
  pnfft_plan pnfft;

  /* create three-dimensional process grid of size np[0] x np[1] x np[2], if possible */
  if( pnfft_create_procmesh(3, comm, np, &comm_cart_3d) ){
    pfft_fprintf(comm, stderr, "Error: Procmesh of size %d x %d x %d does not fit to number of allocated processes.\n", np[0], np[1], np[2]);
    pfft_fprintf(comm, stderr, "       Please allocate %d processes (mpiexec -np %d ...) or change the procmesh (with -pnfft_np * * *).\n", np[0]*np[1]*np[2], np[0]*np[1]*np[2]);
    MPI_Finalize();
    exit(1);
  }

  /* get parameters of data distribution */
  pnfft_local_size_guru(3, N, n, x_max, m, comm_cart_3d, PNFFT_TRANSPOSED_NONE,
      local_N, local_N_start, lower_border, upper_border);

  /* plan parallel NFFT */
  pnfft = pnfft_init_guru(3, N, n, x_max, local_M, m,
      PNFFT_MALLOC_X| PNFFT_MALLOC_F_HAT| PNFFT_MALLOC_F| window_flag, PFFT_ESTIMATE,
      comm_cart_3d);

  /* get data pointers */
  f_hat = pnfft_get_f_hat(pnfft);
  *f    = pnfft_get_f(pnfft);
  x     = pnfft_get_x(pnfft);

  /* initialize Fourier coefficients */
  pnfft_init_f_hat_3d(N, local_N, local_N_start, PNFFT_TRANSPOSED_NONE,
      f_hat);

  /* initialize nonequispaced nodes */
  srand(0);
  init_random_x(lower_border, upper_border, x_max, local_M,
      x);

  /* execute parallel NFFT */
  time = -MPI_Wtime();
  pnfft_trafo(pnfft);
  time += MPI_Wtime();
  
  /* print timing */
  MPI_Reduce(&time, &time_max, 1, MPI_DOUBLE, MPI_MAX, 0, comm);
  pfft_printf(comm, "pnfft_trafo needs %6.2e s\n", time_max);
 
  /* calculate norm of Fourier coefficients for calculation of relative error */ 
  for(ptrdiff_t k=0; k<local_N[0]*local_N[1]*local_N[2]; k++)
    local_sum += cabs(f_hat[k]);
  MPI_Allreduce(&local_sum, f_hat_sum, 1, MPI_DOUBLE, MPI_SUM, comm_cart_3d);

  /* free mem and finalize, do not free nfft.f */
  pnfft_finalize(pnfft, PNFFT_FREE_X | PNFFT_FREE_F_HAT);
  MPI_Comm_free(&comm_cart_3d);
}
Ejemplo n.º 13
0
static void measure_pfft(
    const ptrdiff_t *n, int *np, MPI_Comm comm,
    int loops, int inplace, unsigned pfft_opt_flags
    )
{
  ptrdiff_t alloc_local;
  ptrdiff_t local_ni[3], local_i_start[3];
  ptrdiff_t local_no[3], local_o_start[3];
  double err=0.0, timer[4], max_timer[4];
  pfft_complex *in, *out;
  pfft_plan plan_forw=NULL, plan_back=NULL;
  MPI_Comm comm_cart_2d;

  /* Create two-dimensional process grid of size np[0] x np[1], if possible */
  if( pfft_create_procmesh_2d(comm, np[0], np[1], &comm_cart_2d) ){
    pfft_fprintf(comm, stderr, "Error: This test file only works with %d processes.\n", np[0]*np[1]);
    return;
  }
  
  /* Get parameters of data distribution */
  alloc_local = pfft_local_size_dft_3d(n, comm_cart_2d, PFFT_TRANSPOSED_OUT,
      local_ni, local_i_start, local_no, local_o_start);

  /* Allocate memory */
  in  = pfft_alloc_complex(alloc_local);
  out = (inplace) ? in : pfft_alloc_complex(alloc_local);

  /* Plan parallel forward FFT */
  timer[0] = -MPI_Wtime();
  plan_forw = pfft_plan_dft_3d(
      n, in, out, comm_cart_2d, PFFT_FORWARD, PFFT_TRANSPOSED_OUT| pfft_opt_flags);
  timer[0] += MPI_Wtime();
  
  /* Plan parallel backward FFT */
  timer[1] = -MPI_Wtime();
  plan_back = pfft_plan_dft_3d(
      n, out, in, comm_cart_2d, PFFT_BACKWARD, PFFT_TRANSPOSED_IN| pfft_opt_flags);
  timer[1] += MPI_Wtime();

  /* Initialize input with random numbers */
  pfft_init_input_complex_3d(n, local_ni, local_i_start,
      in);

  pfft_reset_timer(plan_forw);
  pfft_reset_timer(plan_back);

  timer[2] = timer[3] = 0;
  for(int t=0; t<loops; t++){
    /* execute parallel forward FFT */
    MPI_Barrier(MPI_COMM_WORLD);
    timer[2] -= MPI_Wtime();
    pfft_execute(plan_forw);
    timer[2] += MPI_Wtime();
    
    /* execute parallel backward FFT */
    MPI_Barrier(MPI_COMM_WORLD);
    timer[3] -= MPI_Wtime();
    pfft_execute(plan_back);
    timer[3] += MPI_Wtime();
    
    /* Scale data */
    for(ptrdiff_t l=0; l < local_ni[0] * local_ni[1] * local_ni[2]; l++)
      in[l] /= (n[0]*n[1]*n[2]);
  }
  timer[2] /= loops;
  timer[3] /= loops;

  /* Print pfft timer */
  pfft_print_average_timer_adv(plan_forw, comm_cart_2d);
  pfft_print_average_timer_adv(plan_back, comm_cart_2d);

  /* Print optimization flags */
  pfft_printf(comm_cart_2d, "\nFlags = ");
  if(pfft_opt_flags & PFFT_TUNE)
    pfft_printf(comm_cart_2d, "PFFT_TUNE");
  else
    pfft_printf(comm_cart_2d, "PFFT_NO_TUNE");

  pfft_printf(comm_cart_2d, " | ");

  if(pfft_opt_flags & PFFT_ESTIMATE)
    pfft_printf(comm_cart_2d, "PFFT_ESTIMATE");
  else if(pfft_opt_flags & PFFT_PATIENT)
    pfft_printf(comm_cart_2d, "PFFT_PATIENT");
  else if(pfft_opt_flags & PFFT_EXHAUSTIVE)
    pfft_printf(comm_cart_2d, "PFFT_EXHAUSTIVE");
  else
    pfft_printf(comm_cart_2d, "PFFT_MEASURE");

  pfft_printf(comm_cart_2d, " | ");

  if(pfft_opt_flags & PFFT_DESTROY_INPUT)
    pfft_printf(comm_cart_2d, "PFFT_DESTROY_INPUT");
  else
    pfft_printf(comm_cart_2d, "PFFT_PRESERVE_INPUT");

  pfft_printf(comm_cart_2d, "\n");


  /* Print error of back transformed data */
  err = pfft_check_output_complex_3d(n, local_ni, local_i_start, in, comm_cart_2d);
  pfft_printf(comm_cart_2d, "Run %d loops of ", loops);
  if(inplace)
    pfft_printf(comm_cart_2d, "in-place");
  else
    pfft_printf(comm_cart_2d, "out-of-place");
  pfft_printf(comm_cart_2d, " forward and backward trafo of size n=(%td, %td, %td):\n", n[0], n[1], n[2]); 

  MPI_Reduce(&timer, &max_timer, 4, MPI_DOUBLE, MPI_MAX, 0, comm_cart_2d);
  pfft_printf(comm_cart_2d, "tune_forw = %6.2e; tune_back = %6.2e, exec_forw = %6.2e, exec_back = %6.2e, error = %6.2e\n", max_timer[0], max_timer[1], max_timer[2], max_timer[3], err);

  /* free mem and finalize */
  pfft_destroy_plan(plan_forw);
  pfft_destroy_plan(plan_back);
  MPI_Comm_free(&comm_cart_2d);
  if(in != out) pfft_free(out);
  pfft_free(in);
}
int main(int argc, char **argv)
{
  int np[3];
  ptrdiff_t N[3];
  ptrdiff_t local_M;
  double err;
  MPI_Comm comm_cart_3d;

  ptrdiff_t local_N_c2c[3], local_N_start_c2c[3];
  double lower_border_c2c[3], upper_border_c2c[3];
  pnfft_plan plan_c2c;
  pnfft_complex *f_hat_c2c, *f_c2c;
  double *x_c2c;

  ptrdiff_t local_N_c2r[3], local_N_start_c2r[3];
  double lower_border_c2r[3], upper_border_c2r[3];
  pnfft_plan plan_c2r;
  pnfft_complex *f_hat_c2r;
  double *x_c2r, *f_c2r;

  /* Initialize MPI and PFFT */
  MPI_Init(&argc, &argv);
  pnfft_init();

  np[0] = 2; np[1] = 2; np[2] = 4;
  // for bigger transforms the data gets distributed differently, which makes comparing the results harder.
  N[0] = 8; N[1] = 8; N[2] = 16;
  local_M = N[0]*N[1]*N[2]/(np[0]*np[1]*np[2]);

   /* Print infos */
  pfft_printf(MPI_COMM_WORLD, "******************************************************************************************************\n");
  pfft_printf(MPI_COMM_WORLD, "* Computation of parallel NFFT\n");
  pfft_printf(MPI_COMM_WORLD, "* for  N[0] x N[1] x N[2] = %td x %td x %td Fourier coefficients)\n", N[0], N[1], N[2]);
  pfft_printf(MPI_COMM_WORLD, "* at   local_M = %td nodes per process\n", local_M);
  pfft_printf(MPI_COMM_WORLD, "* on   np[0] x np[1] x np[2] = %td x %td x %td processes\n", np[0], np[1], np[2]);
  pfft_printf(MPI_COMM_WORLD, "*******************************************************************************************************\n\n");

  /* create three-dimensional process grid of size np[0] x np[1] x np[2], if possible */
  if( pnfft_create_procmesh(3, MPI_COMM_WORLD, np, &comm_cart_3d) ){
    pfft_fprintf(MPI_COMM_WORLD, stderr, "Error: Procmesh of size %d x %d x %d does not fit to number of allocated processes.\n", np[0], np[1], np[2]);
    pfft_fprintf(MPI_COMM_WORLD, stderr, "       Please allocate %d processes (mpiexec -np %d ...) or change the procmesh (with -pnfft_np * * *).\n", np[0]*np[1]*np[2], np[0]*np[1]*np[2]);
    MPI_Finalize();
    return 1;
  }

  /* Get parameters of data distribution */
  pnfft_local_size_3d(N, comm_cart_3d, PNFFT_TRANSPOSED_NONE,
      local_N_c2c, local_N_start_c2c, lower_border_c2c, upper_border_c2c);
  pnfft_local_size_3d_c2r(N, comm_cart_3d, PNFFT_TRANSPOSED_NONE,
      local_N_c2r, local_N_start_c2r, lower_border_c2r, upper_border_c2r);

  /* Plan parallel NFFT */
  plan_c2c = pnfft_init_adv(3, N, local_M,
      PNFFT_TRANSPOSED_NONE| PNFFT_WINDOW_SINC_POWER| PNFFT_MALLOC_X| PNFFT_MALLOC_F_HAT| PNFFT_MALLOC_F, PFFT_ESTIMATE,
      comm_cart_3d);
  plan_c2r = pnfft_init_adv_c2r(3, N, local_M,
      PNFFT_TRANSPOSED_NONE| PNFFT_WINDOW_SINC_POWER| PNFFT_MALLOC_X| PNFFT_MALLOC_F_HAT| PNFFT_MALLOC_F, PFFT_ESTIMATE,
      comm_cart_3d);


  f_hat_c2c = pnfft_get_f_hat(plan_c2c);
  f_c2c     = pnfft_get_f(plan_c2c);
  x_c2c     = pnfft_get_x(plan_c2c);
  f_hat_c2r = pnfft_get_f_hat(plan_c2r);
  f_c2r     = pnfft_get_f(plan_c2r);
  x_c2r     = pnfft_get_x(plan_c2r);

  /* Initialize Fourier coefficients with random numbers */
  init_input(N, local_N_c2c, local_N_start_c2c, f_hat_c2c);
  init_input(N, local_N_c2r, local_N_start_c2r, f_hat_c2r);

  /* Initialize nodes with random numbers */
//  pnfft_init_x_3d(lower_border_c2c, upper_border_c2c, local_M, x_c2c);
  init_equispaced_x(N, lower_border_c2c, upper_border_c2c, x_c2c);
  for (int k=0; k<local_M*3; k++)
    x_c2r[k] = x_c2c[k];

  /* execute parallel NFFT */
  pnfft_trafo(plan_c2c);
  MPI_Barrier(MPI_COMM_WORLD);
  pnfft_trafo(plan_c2r);
  MPI_Barrier(MPI_COMM_WORLD);

  err = compare_complex_real(local_M, f_c2c, f_c2r, comm_cart_3d);
  pfft_printf(MPI_COMM_WORLD, "max error between c2c and c2r after trafo: %6.2e\n", err);

  /* free mem and finalize */
  pnfft_finalize(plan_c2c, PNFFT_FREE_X | PNFFT_FREE_F_HAT | PNFFT_FREE_F);
  pnfft_finalize(plan_c2r, PNFFT_FREE_X | PNFFT_FREE_F_HAT | PNFFT_FREE_F);
  MPI_Comm_free(&comm_cart_3d);

  /* Finalize MPI */
  MPI_Finalize();
  return 0;
}
Ejemplo n.º 15
0
int main(int argc, char **argv)
{
  int np[2];
  ptrdiff_t n[3], N[3];
  ptrdiff_t alloc_local;
  ptrdiff_t local_ni[3], local_i_start[3];
  ptrdiff_t local_no[3], local_o_start[3];
  double err, *in, *out;
  pfft_plan plan_forw=NULL, plan_back=NULL;
  MPI_Comm comm_cart_2d;
  pfft_r2r_kind kinds_forw[3], kinds_back[3];
  
  /* Set size of FFT and process mesh */
  n[0] = 29; n[1] = 27; n[2] = 31;
  np[0] = 2; np[1] = 2;
  
  /* Set FFTW kinds of 1d R2R trafos */
  kinds_forw[0] = PFFT_REDFT00; kinds_back[0] = PFFT_REDFT00;
  kinds_forw[1] = PFFT_REDFT01; kinds_back[1] = PFFT_REDFT10;
  kinds_forw[2] = PFFT_RODFT00; kinds_back[2] = PFFT_RODFT00;

  /* Set logical DFT sizes corresponding to FFTW manual:
   * for REDFT00 N=2*(n-1), for RODFT00 N=2*(n+1), otherwise N=2*n */
  N[0] = 2*(n[0]-1);
  N[1] = 2*n[1];
  N[2] = 2*(n[2]+1); 

  /* Initialize MPI and PFFT */
  MPI_Init(&argc, &argv);
  pfft_init();

  /* Create two-dimensional process grid of size np[0] x np[1], if possible */
  if( pfft_create_procmesh_2d(MPI_COMM_WORLD, np[0], np[1], &comm_cart_2d) ){
    pfft_fprintf(MPI_COMM_WORLD, stderr, "Error: This test file only works with %d processes.\n", np[0]*np[1]);
    MPI_Finalize();
    return 1;
  }
  
  /* Get parameters of data distribution */
  alloc_local = pfft_local_size_r2r_3d(n, comm_cart_2d, PFFT_TRANSPOSED_OUT,
      local_ni, local_i_start, local_no, local_o_start);

  /* Allocate memory */
  in  = pfft_alloc_real(alloc_local);
  out = pfft_alloc_real(alloc_local);

  /* Plan parallel forward FFT */
  plan_forw = pfft_plan_r2r_3d(
      n, in, out, comm_cart_2d, kinds_forw, PFFT_TRANSPOSED_OUT| PFFT_MEASURE| PFFT_DESTROY_INPUT);
 
  /* Plan parallel backward FFT */
  plan_back = pfft_plan_r2r_3d(
      n, out, in, comm_cart_2d, kinds_back, PFFT_TRANSPOSED_IN| PFFT_MEASURE| PFFT_DESTROY_INPUT);

  /* Initialize input with random numbers */
  pfft_init_input_real_3d(n, local_ni, local_i_start,
      in);

  int myrank, size;
  MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
  MPI_Comm_size(MPI_COMM_WORLD, &size);

//  for(int t=0; t<size; t++){
//    if(t == myrank){  
//      int m=0;
//      for(int k0=0; k0<local_ni[0]; k0++)
//        for(int k1=0; k1<local_ni[1]; k1++){
//          for(int k2=0; k2<local_ni[2]; k2++, m++)
//            printf("in[%d, %d, %d] = %.2f\t", k0+local_i_start[0], k1+local_i_start[1], k2+local_i_start[2], in[m]);
//          printf("\n");
//        }
//    }
//    fflush(stderr);
//    MPI_Barrier(MPI_COMM_WORLD);
//  }

  /* execute parallel forward FFT */
  pfft_execute(plan_forw);
  

//  for(int t=0; t<size; t++){
//    if(t == myrank){  
//      int m=0;
//      for(int k1=0; k1<local_no[1]; k1++)
//        for(int k2=0; k2<local_no[2]; k2++){
//          for(int k0=0; k0<local_no[0]; k0++, m++)
//            printf("out[%d, %d, %d] = %.2f\t", k0+local_o_start[0], k1+local_o_start[1], k2+local_o_start[2], out[m]);
//          printf("\n");
//        }
//    }
//    fflush(stderr);
//    MPI_Barrier(MPI_COMM_WORLD);
//  }

  /* execute parallel backward FFT */
  pfft_execute(plan_back);
  
  /* Scale data */
  for(ptrdiff_t l=0; l < local_ni[0] * local_ni[1] * local_ni[2]; l++)
    in[l] /= (N[0]*N[1]*N[2]);

  /* Print error of back transformed data */
  MPI_Barrier(MPI_COMM_WORLD);
  err = pfft_check_output_real_3d(n, local_ni, local_i_start, in, comm_cart_2d);
  pfft_printf(comm_cart_2d, "Error after one forward and backward trafo of size n=(%td, %td, %td):\n", n[0], n[1], n[2]); 
  pfft_printf(comm_cart_2d, "maxerror = %6.2e;\n", err);

  /* free mem and finalize */
  pfft_destroy_plan(plan_forw);
  pfft_destroy_plan(plan_back);
  MPI_Comm_free(&comm_cart_2d);
  pfft_free(in); pfft_free(out);
  MPI_Finalize();
  return 0;
}
int main(int argc, char **argv)
{
  int np[2];
  ptrdiff_t n[3], ni[3], howmany;
  double err;

  ptrdiff_t alloc_local_c;
  ptrdiff_t local_ni_c[3], local_i_start_c[3];
  ptrdiff_t local_no_c[3], local_o_start_c[3];
  pfft_complex *in_c, *out_c;
  pfft_plan plan_forw_c=NULL, plan_back_c=NULL;

  ptrdiff_t alloc_local_r, alloc_local_forw, alloc_local_back;
  ptrdiff_t local_ni_r[3], local_i_start_r[3];
  ptrdiff_t local_no_r[3], local_o_start_r[3];
  pfft_complex *in_r;
  double *out_r;
  pfft_plan plan_forw_r=NULL, plan_back_r=NULL;

  MPI_Comm comm_cart_2d;

  /* Set size of FFT and process mesh */
  ni[0] = 4; ni[1] = 4; ni[2] = 4;
  n[0] = 6; n[1] = 6; n[2] = 6;
  np[0] = 2; np[1] = 2;
  howmany = 1;

  /* Initialize MPI and PFFT */
  MPI_Init(&argc, &argv);
  pfft_init();

  /* Create two-dimensional process grid of size np[0] x np[1], if possible */
  if( pfft_create_procmesh_2d(MPI_COMM_WORLD, np[0], np[1], &comm_cart_2d) ){
    pfft_fprintf(MPI_COMM_WORLD, stderr, "Error: This test file only works with %d processes.\n", np[0]*np[1]);
    MPI_Finalize();
    return 1;
  }

  /* Get parameters of data distribution */
  alloc_local_forw = pfft_local_size_many_dft(3, n, ni, n, howmany, PFFT_DEFAULT_BLOCKS, PFFT_DEFAULT_BLOCKS, comm_cart_2d, PFFT_TRANSPOSED_NONE| PFFT_SHIFTED_IN | PFFT_SHIFTED_OUT,
      local_ni_c, local_i_start_c, local_no_c, local_o_start_c);
  alloc_local_back = pfft_local_size_many_dft(3, n, n, ni, howmany, PFFT_DEFAULT_BLOCKS, PFFT_DEFAULT_BLOCKS, comm_cart_2d, PFFT_TRANSPOSED_NONE| PFFT_SHIFTED_IN | PFFT_SHIFTED_OUT,
      local_no_c, local_o_start_c, local_ni_c, local_i_start_c);

  alloc_local_c = (alloc_local_forw > alloc_local_back) ? alloc_local_forw : alloc_local_back;

  alloc_local_forw = pfft_local_size_many_dft_c2r(3, n, ni, n, howmany, PFFT_DEFAULT_BLOCKS, PFFT_DEFAULT_BLOCKS, comm_cart_2d, PFFT_TRANSPOSED_NONE| PFFT_SHIFTED_IN | PFFT_SHIFTED_OUT,
      local_ni_r, local_i_start_r, local_no_r, local_o_start_r);
  alloc_local_back = pfft_local_size_many_dft_r2c(3, n, n, ni, howmany, PFFT_DEFAULT_BLOCKS, PFFT_DEFAULT_BLOCKS, comm_cart_2d, PFFT_TRANSPOSED_NONE| PFFT_SHIFTED_IN | PFFT_SHIFTED_OUT,
      local_no_r, local_o_start_r, local_ni_r, local_i_start_r);

  alloc_local_r = (alloc_local_forw > alloc_local_back) ? alloc_local_forw : alloc_local_back;


  /* Allocate memory */
  in_c  = pfft_alloc_complex(alloc_local_c);
  out_c = pfft_alloc_complex(alloc_local_c);
  in_r  = pfft_alloc_complex(alloc_local_r);
  out_r = pfft_alloc_real(2*alloc_local_r);


  /* Plan parallel forward FFT */
  plan_forw_c = pfft_plan_many_dft(3,
      n, ni, n, howmany, PFFT_DEFAULT_BLOCKS, PFFT_DEFAULT_BLOCKS, in_c, out_c, comm_cart_2d, PFFT_FORWARD, PFFT_TRANSPOSED_NONE| PFFT_MEASURE| PFFT_DESTROY_INPUT| PFFT_SHIFTED_IN | PFFT_SHIFTED_OUT);
  plan_forw_r = pfft_plan_many_dft_c2r(3,
      n, ni, n, howmany, PFFT_DEFAULT_BLOCKS, PFFT_DEFAULT_BLOCKS, in_r, out_r, comm_cart_2d, PFFT_FORWARD, PFFT_TRANSPOSED_NONE| PFFT_MEASURE| PFFT_DESTROY_INPUT| PFFT_SHIFTED_IN | PFFT_SHIFTED_OUT);

  /* Plan parallel backward FFT */
  plan_back_c = pfft_plan_many_dft(3,
      n, n, ni, howmany, PFFT_DEFAULT_BLOCKS, PFFT_DEFAULT_BLOCKS, out_c, in_c, comm_cart_2d, PFFT_BACKWARD, PFFT_TRANSPOSED_NONE| PFFT_MEASURE| PFFT_DESTROY_INPUT| PFFT_SHIFTED_IN | PFFT_SHIFTED_OUT);
  plan_back_r = pfft_plan_many_dft_r2c(3,
      n, n, ni, howmany, PFFT_DEFAULT_BLOCKS, PFFT_DEFAULT_BLOCKS, out_r, in_r, comm_cart_2d, PFFT_BACKWARD, PFFT_TRANSPOSED_NONE| PFFT_MEASURE| PFFT_DESTROY_INPUT| PFFT_SHIFTED_IN | PFFT_SHIFTED_OUT);

  /* Initialize input with random numbers */
  init_input(ni, local_ni_c, local_i_start_c, in_c);
  init_input(ni, local_ni_r, local_i_start_r, in_r);

//   pfft_apr_complex_3d(in_c, local_ni_c, local_i_start_c, "c2c input:\n", comm_cart_2d);
//   pfft_apr_complex_3d(in_r, local_ni_r, local_i_start_r, "c2r input:\n", comm_cart_2d);

  /* execute parallel forward FFT */
  pfft_execute(plan_forw_c);

  /* clear the old input */

  pfft_execute(plan_forw_r);

//   pfft_apr_complex_3d(out_c, local_no_c, local_o_start_c, "c2c output:\n", comm_cart_2d);
//   pfft_apr_real_3d(out_r, local_no_r, local_o_start_r, "c2r output:\n", comm_cart_2d);

  /* execute parallel backward FFT */
  pfft_execute(plan_back_c);
  pfft_execute(plan_back_r);

//   pfft_apr_complex_3d(in_c, local_ni_c, local_i_start_c, "c2c^ output:\n", comm_cart_2d);
//   pfft_apr_complex_3d(in_r, local_ni_r, local_i_start_r, "c2r^ output:\n", comm_cart_2d);

  /* Scale data */
  for(ptrdiff_t l=0; l < local_ni_c[0] * local_ni_c[1] * local_ni_c[2]; l++)
    in_c[l] /= (n[0]*n[1]*n[2]);
  for(ptrdiff_t l=0; l < local_ni_r[0] * local_ni_r[1] * local_ni_r[2]; l++)
    in_r[l] /= (n[0]*n[1]*n[2]);

  /* Print error of back transformed data */
  err = compare_c2c_c2r(local_ni_c, local_ni_r, in_c, in_r, comm_cart_2d);

  pfft_printf(comm_cart_2d, "Error after one forward and backward trafo of size n=(%td, %td, %td):\n", n[0], n[1], n[2]);
  pfft_printf(comm_cart_2d, "maxerror = %6.2e;\n", err);
  
  /* free mem and finalize */
  pfft_destroy_plan(plan_forw_c);
  pfft_destroy_plan(plan_back_c);
  pfft_destroy_plan(plan_forw_r);
  pfft_destroy_plan(plan_back_r);
  MPI_Comm_free(&comm_cart_2d);
  pfft_free(in_c); pfft_free(out_c);
  pfft_free(in_r); pfft_free(out_r);
  MPI_Finalize();
  return 0;
}
int main(int argc, char **argv)
{
  int np[2];
  ptrdiff_t n[3];
  ptrdiff_t alloc_local;
  ptrdiff_t local_ni[3], local_i_start[3];
  ptrdiff_t local_no[3], local_o_start[3];
  double err;
  double *planned_in, *executed_in;
  pfft_complex *planned_out, *executed_out;
  pfft_plan plan_forw=NULL, plan_back=NULL;
  MPI_Comm comm_cart_2d;
  
  /* Set size of FFT and process mesh */
  n[0] = 29; n[1] = 27; n[2] = 31;
  np[0] = 2; np[1] = 2;
  
  /* Initialize MPI and PFFT */
  MPI_Init(&argc, &argv);
  pfft_init();

  /* Create two-dimensional process grid of size np[0] x np[1], if possible */
  if( pfft_create_procmesh_2d(MPI_COMM_WORLD, np[0], np[1], &comm_cart_2d) ){
    pfft_fprintf(MPI_COMM_WORLD, stderr, "Error: This test file only works with %d processes.\n", np[0]*np[1]);
    MPI_Finalize();
    return 1;
  }
  
  /* Get parameters of data distribution */
  alloc_local = pfft_local_size_dft_r2c_3d(n, comm_cart_2d, PFFT_TRANSPOSED_OUT| PFFT_PADDED_R2C,
      local_ni, local_i_start, local_no, local_o_start);

  /* Allocate memory for planning */
  planned_in  = pfft_alloc_real (2 * alloc_local);
  planned_out = pfft_alloc_complex(alloc_local);

  /* Plan parallel forward FFT */
  plan_forw = pfft_plan_dft_r2c_3d(
      n, planned_in, planned_out, comm_cart_2d, PFFT_FORWARD, PFFT_TRANSPOSED_OUT| PFFT_MEASURE| PFFT_DESTROY_INPUT| PFFT_PADDED_R2C);
  
  /* Plan parallel backward FFT */
  plan_back = pfft_plan_dft_c2r_3d(
      n, planned_out, planned_in, comm_cart_2d, PFFT_BACKWARD, PFFT_TRANSPOSED_IN| PFFT_MEASURE| PFFT_DESTROY_INPUT| PFFT_PADDED_C2R);

  /* Free planning arrays since we use other arrays for execution */
  pfft_free(planned_in); pfft_free(planned_out);

  /* Allocate memory for execution */
  executed_in  = pfft_alloc_real(2 * alloc_local);
  executed_out = pfft_alloc_complex(alloc_local);

  /* Initialize input with random numbers */
  pfft_init_input_real(3, n, local_ni, local_i_start,
      executed_in);
  
  /* execute parallel forward FFT */
  pfft_execute_dft_r2c(plan_forw, executed_in, executed_out);

  /* clear the old input */
  pfft_clear_input_real(3, n, local_ni, local_i_start,
      executed_in);

  /* execute parallel backward FFT */
  pfft_execute_dft_c2r(plan_back, executed_out, executed_in);
  
  /* Scale data */
  for(ptrdiff_t l=0; l < local_ni[0] * local_ni[1] * local_ni[2]; l++)
    executed_in[l] /= (n[0]*n[1]*n[2]);

  /* Print error of back transformed data */
  err = pfft_check_output_real(3, n, local_ni, local_i_start, executed_in, comm_cart_2d);
  pfft_printf(comm_cart_2d, "Error after one forward and backward trafo of size n=(%td, %td, %td):\n", n[0], n[1], n[2]); 
  pfft_printf(comm_cart_2d, "maxerror = %6.2e;\n", err);
  
  /* free mem and finalize */
  pfft_destroy_plan(plan_forw);
  pfft_destroy_plan(plan_back);
  MPI_Comm_free(&comm_cart_2d);
  pfft_free(executed_in); pfft_free(executed_out);
  MPI_Finalize();
  return 0;
}
Ejemplo n.º 18
0
int main(int argc, char **argv)
{
  int nthreads=1; /*number of threads to initialize openmp with*/
  int runs=1; /*number of runs for testing*/
  int np[2];
  ptrdiff_t n[3];
  ptrdiff_t alloc_local;
  ptrdiff_t local_ni[3], local_i_start[3];
  ptrdiff_t local_no[3], local_o_start[3];
  double err;
  pfft_complex *in, *out;
  pfft_plan plan_forw=NULL, plan_back=NULL;
  MPI_Comm comm_cart_2d;

  /* Init OpenMP */
  pfft_get_args(argc,argv,"-pfft_omp_threads",1,PFFT_INT,&nthreads);
  pfft_get_args(argc,argv,"-pfft_runs",1,PFFT_INT,&runs);
  pfft_plan_with_nthreads(nthreads);

  /* Set size of FFT and process mesh */
  n[0] = NNN;n[1] =NNN; n[2] =NNN;
  np[0] = 1; np[1] = 1;
  
  /* Initialize MPI and PFFT */
  MPI_Init(&argc, &argv);
  pfft_init();

  pfft_plan_with_nthreads(nthreads);
  pfft_printf(MPI_COMM_WORLD, "# %4d threads will be used for openmp (default is 1)\n", nthreads);

 /* Create two-dimensional process grid of size np[0] x np[1], if possible */
  if( pfft_create_procmesh_2d(MPI_COMM_WORLD, np[0], np[1], &comm_cart_2d) ){
    pfft_fprintf(MPI_COMM_WORLD, stderr, "Error: This test file only works with %d processes.\n", np[0]*np[1]);
    MPI_Finalize();
    return 1;
  }

  /* Get parameters of data distribution */
  alloc_local = pfft_local_size_dft_3d(n, comm_cart_2d, PFFT_TRANSPOSED_NONE,
      local_ni, local_i_start, local_no, local_o_start);

  /* Allocate memory */
  in  = pfft_alloc_complex(alloc_local);
  out = pfft_alloc_complex(alloc_local);

  /* Plan parallel forward FFT */
  plan_forw = pfft_plan_dft_3d(
      n, in, out, comm_cart_2d, PFFT_FORWARD, PFFT_TRANSPOSED_OUT| PFFT_MEASURE| PFFT_DESTROY_INPUT| PFFT_TUNE| PFFT_SHIFTED_IN);
  
  /* Plan parallel backward FFT */
  plan_back = pfft_plan_dft_3d(
      n, out, in, comm_cart_2d, PFFT_BACKWARD, PFFT_TRANSPOSED_IN| PFFT_MEASURE| PFFT_DESTROY_INPUT| PFFT_TUNE| PFFT_SHIFTED_OUT);

  /* Initialize input with random numbers */
  pfft_init_input_complex_3d(n, local_ni, local_i_start,
      in);

  for(int i=0; i<runs; i++)
  {
    /* execute parallel forward FFT */
    pfft_execute(plan_forw);

    /* clear the old input */
    /* pfft_clear_input_complex_3d(n, local_ni, local_i_start,
        in);
    */
    /* execute parallel backward FFT */
    pfft_execute(plan_back);

    /* Scale data */
    ptrdiff_t l;
    for(l=0; l < local_ni[0] * local_ni[1] * local_ni[2]; l++)
      in[l] /= (n[0]*n[1]*n[2]);
  }

  pfft_print_average_timer_adv(plan_forw, MPI_COMM_WORLD);
  pfft_print_average_timer_adv(plan_back, MPI_COMM_WORLD);

  /* Print error of back transformed data */
  err = pfft_check_output_complex_3d(n, local_ni, local_i_start, in, comm_cart_2d);
  pfft_printf(comm_cart_2d, "Error after %d forward and backward trafos of size n=(%td, %td, %td):\n", runs, n[0], n[1], n[2]); 
  pfft_printf(comm_cart_2d, "maxerror = %6.2e;\n", err);
  
  /* free mem and finalize */
  pfft_destroy_plan(plan_forw);
  pfft_destroy_plan(plan_back);
  MPI_Comm_free(&comm_cart_2d);
  pfft_free(in); pfft_free(out);
  MPI_Finalize();
  return 0;
}