コード例 #1
0
int main(int argc, char **argv)
{
    int np[2];
    ptrdiff_t n[3];
    ptrdiff_t alloc_local;
    ptrdiff_t local_ni[3], local_i_start[3];
    ptrdiff_t local_no[3], local_o_start[3];
    long double err;
    pfftl_complex *in, *out;
    pfftl_plan plan_forw=NULL, plan_back=NULL;
    MPI_Comm comm_cart_2d;

    /* Set size of FFT and process mesh */
    n[0] = 29;
    n[1] = 27;
    n[2] = 31;
    np[0] = 2;
    np[1] = 2;

    /* Initialize MPI and PFFT */
    MPI_Init(&argc, &argv);
    pfftl_init();

    /* Create two-dimensional process grid of size np[0] x np[1], if possible */
    if( pfftl_create_procmesh_2d(MPI_COMM_WORLD, np[0], np[1], &comm_cart_2d) ) {
        pfftl_fprintf(MPI_COMM_WORLD, stderr, "Error: This test file only works with %d processes.\n", np[0]*np[1]);
        MPI_Finalize();
        return 1;
    }

    /* Get parameters of data distribution */
    alloc_local = pfftl_local_size_dft_3d(n, comm_cart_2d, PFFT_TRANSPOSED_NONE,
                                          local_ni, local_i_start, local_no, local_o_start);

    /* Allocate memory */
    in  = pfftl_alloc_complex(alloc_local);
    out = pfftl_alloc_complex(alloc_local);

    /* Plan parallel forward FFT */
    plan_forw = pfftl_plan_dft_3d(
                    n, in, out, comm_cart_2d, PFFT_FORWARD, PFFT_TRANSPOSED_NONE| PFFT_MEASURE| PFFT_DESTROY_INPUT);

    /* Plan parallel backward FFT */
    plan_back = pfftl_plan_dft_3d(
                    n, out, in, comm_cart_2d, PFFT_BACKWARD, PFFT_TRANSPOSED_NONE| PFFT_MEASURE| PFFT_DESTROY_INPUT);

    /* Initialize input with random numbers */
    pfftl_init_input_complex_3d(n, local_ni, local_i_start,
                                in);

    /* execute parallel forward FFT */
    pfftl_execute(plan_forw);

    /* execute parallel backward FFT */
    pfftl_execute(plan_back);

    /* Scale data */
    ptrdiff_t l;
    for(l=0; l < local_ni[0] * local_ni[1] * local_ni[2]; l++)
        in[l] /= (n[0]*n[1]*n[2]);

    /* Print error of back transformed data */
    err = pfftl_check_output_complex_3d(n, local_ni, local_i_start, in, comm_cart_2d);
    pfftl_printf(comm_cart_2d, "Error after one forward and backward trafo of size n=(%td, %td, %td):\n", n[0], n[1], n[2]);
    pfftl_printf(comm_cart_2d, "maxerror = %6.2Le;\n", err);

    /* free mem and finalize */
    pfftl_destroy_plan(plan_forw);
    pfftl_destroy_plan(plan_back);
    MPI_Comm_free(&comm_cart_2d);
    pfftl_free(in);
    pfftl_free(out);
    MPI_Finalize();
    return 0;
}
コード例 #2
0
static void pnfft_perform_guru(
    const ptrdiff_t *N, const ptrdiff_t *n, ptrdiff_t local_M,
    int m, const long double *x_max, unsigned window_flag,
    const int *np, MPI_Comm comm
    )
{
  int myrank;
  ptrdiff_t local_N[3], local_N_start[3];
  long double lower_border[3], upper_border[3];
  long double local_sum = 0;
  double time, time_max;
  MPI_Comm comm_cart_3d;
  pnfftl_complex *f_hat, *f, *f1;
  long double *x, f_hat_sum;
  pnfftl_plan pnfft;

  /* create three-dimensional process grid of size np[0] x np[1] x np[2], if possible */
  if( pnfftl_create_procmesh(3, comm, np, &comm_cart_3d) ){
    pfftl_fprintf(comm, stderr, "Error: Procmesh of size %d x %d x %d does not fit to number of allocated processes.\n", np[0], np[1], np[2]);
    pfftl_fprintf(comm, stderr, "       Please allocate %d processes (mpiexec -np %d ...) or change the procmesh (with -pnfft_np * * *).\n", np[0]*np[1]*np[2], np[0]*np[1]*np[2]);
    MPI_Finalize();
    exit(1);
  }

  MPI_Comm_rank(comm_cart_3d, &myrank);

  /* get parameters of data distribution */
  pnfftl_local_size_guru(3, N, n, x_max, m, comm_cart_3d, PNFFT_TRANSPOSED_NONE,
      local_N, local_N_start, lower_border, upper_border);

  /* plan parallel NFFT */
  pnfft = pnfftl_init_guru(3, N, n, x_max, local_M, m,
      PNFFT_MALLOC_X| PNFFT_MALLOC_F_HAT| PNFFT_MALLOC_F| window_flag, PFFT_ESTIMATE,
      comm_cart_3d);

  /* get data pointers */
  f_hat = pnfftl_get_f_hat(pnfft);
  f     = pnfftl_get_f(pnfft);
  x     = pnfftl_get_x(pnfft);

  /* initialize Fourier coefficients */
  pnfftl_init_f_hat_3d(N, local_N, local_N_start, PNFFT_TRANSPOSED_NONE,
      f_hat);

  /* initialize nonequispaced nodes */
  srand(myrank);
  init_random_x(lower_border, upper_border, x_max, local_M,
      x);
 
  /* execute parallel NFFT */
  time = -MPI_Wtime();
  pnfftl_trafo(pnfft);
  time += MPI_Wtime();
  
  /* print timing */
  MPI_Reduce(&time, &time_max, 1, MPI_DOUBLE, MPI_MAX, 0, comm);
  pfftl_printf(comm, "pnfftl_trafo needs %6.2e s\n", time_max);
 
  /* calculate norm of Fourier coefficients for calculation of relative error */ 
  for(ptrdiff_t k=0; k<local_N[0]*local_N[1]*local_N[2]; k++)
    local_sum += cabsl(f_hat[k]);
  MPI_Allreduce(&local_sum, &f_hat_sum, 1, MPI_LONG_DOUBLE, MPI_SUM, comm_cart_3d);

  /* store results of NFFT */
  f1 = pnfftl_alloc_complex(local_M);
  for(ptrdiff_t j=0; j<local_M; j++) f1[j] = f[j];

  /* execute parallel NDFT */
  time = -MPI_Wtime();
  pnfftl_direct_trafo(pnfft);
  time += MPI_Wtime();

  /* print timing */
  MPI_Reduce(&time, &time_max, 1, MPI_DOUBLE, MPI_MAX, 0, comm);
  pfftl_printf(comm, "pnfftl_direct_trafo needs %6.2e s\n", time_max);

  /* calculate error of PNFFT */
  compare_f(f1, f, local_M, f_hat_sum, "* Results in", MPI_COMM_WORLD);

  /* free mem and finalize */
  pnfftl_free(f1);
  pnfftl_finalize(pnfft, PNFFT_FREE_X | PNFFT_FREE_F | PNFFT_FREE_F_HAT);
  MPI_Comm_free(&comm_cart_3d);
}