int main(int argc, char **argv) { int np[2]; ptrdiff_t n[3]; ptrdiff_t alloc_local; ptrdiff_t local_ni[3], local_i_start[3]; ptrdiff_t local_no[3], local_o_start[3]; long double err; pfftl_complex *in, *out; pfftl_plan plan_forw=NULL, plan_back=NULL; MPI_Comm comm_cart_2d; /* Set size of FFT and process mesh */ n[0] = 29; n[1] = 27; n[2] = 31; np[0] = 2; np[1] = 2; /* Initialize MPI and PFFT */ MPI_Init(&argc, &argv); pfftl_init(); /* Create two-dimensional process grid of size np[0] x np[1], if possible */ if( pfftl_create_procmesh_2d(MPI_COMM_WORLD, np[0], np[1], &comm_cart_2d) ) { pfftl_fprintf(MPI_COMM_WORLD, stderr, "Error: This test file only works with %d processes.\n", np[0]*np[1]); MPI_Finalize(); return 1; } /* Get parameters of data distribution */ alloc_local = pfftl_local_size_dft_3d(n, comm_cart_2d, PFFT_TRANSPOSED_NONE, local_ni, local_i_start, local_no, local_o_start); /* Allocate memory */ in = pfftl_alloc_complex(alloc_local); out = pfftl_alloc_complex(alloc_local); /* Plan parallel forward FFT */ plan_forw = pfftl_plan_dft_3d( n, in, out, comm_cart_2d, PFFT_FORWARD, PFFT_TRANSPOSED_NONE| PFFT_MEASURE| PFFT_DESTROY_INPUT); /* Plan parallel backward FFT */ plan_back = pfftl_plan_dft_3d( n, out, in, comm_cart_2d, PFFT_BACKWARD, PFFT_TRANSPOSED_NONE| PFFT_MEASURE| PFFT_DESTROY_INPUT); /* Initialize input with random numbers */ pfftl_init_input_complex_3d(n, local_ni, local_i_start, in); /* execute parallel forward FFT */ pfftl_execute(plan_forw); /* execute parallel backward FFT */ pfftl_execute(plan_back); /* Scale data */ ptrdiff_t l; for(l=0; l < local_ni[0] * local_ni[1] * local_ni[2]; l++) in[l] /= (n[0]*n[1]*n[2]); /* Print error of back transformed data */ err = pfftl_check_output_complex_3d(n, local_ni, local_i_start, in, comm_cart_2d); pfftl_printf(comm_cart_2d, "Error after one forward and backward trafo of size n=(%td, %td, %td):\n", n[0], n[1], n[2]); pfftl_printf(comm_cart_2d, "maxerror = %6.2Le;\n", err); /* free mem and finalize */ pfftl_destroy_plan(plan_forw); pfftl_destroy_plan(plan_back); MPI_Comm_free(&comm_cart_2d); pfftl_free(in); pfftl_free(out); MPI_Finalize(); return 0; }
static void pnfft_perform_guru( const ptrdiff_t *N, const ptrdiff_t *n, ptrdiff_t local_M, int m, const long double *x_max, unsigned window_flag, const int *np, MPI_Comm comm ) { int myrank; ptrdiff_t local_N[3], local_N_start[3]; long double lower_border[3], upper_border[3]; long double local_sum = 0; double time, time_max; MPI_Comm comm_cart_3d; pnfftl_complex *f_hat, *f, *f1; long double *x, f_hat_sum; pnfftl_plan pnfft; /* create three-dimensional process grid of size np[0] x np[1] x np[2], if possible */ if( pnfftl_create_procmesh(3, comm, np, &comm_cart_3d) ){ pfftl_fprintf(comm, stderr, "Error: Procmesh of size %d x %d x %d does not fit to number of allocated processes.\n", np[0], np[1], np[2]); pfftl_fprintf(comm, stderr, " Please allocate %d processes (mpiexec -np %d ...) or change the procmesh (with -pnfft_np * * *).\n", np[0]*np[1]*np[2], np[0]*np[1]*np[2]); MPI_Finalize(); exit(1); } MPI_Comm_rank(comm_cart_3d, &myrank); /* get parameters of data distribution */ pnfftl_local_size_guru(3, N, n, x_max, m, comm_cart_3d, PNFFT_TRANSPOSED_NONE, local_N, local_N_start, lower_border, upper_border); /* plan parallel NFFT */ pnfft = pnfftl_init_guru(3, N, n, x_max, local_M, m, PNFFT_MALLOC_X| PNFFT_MALLOC_F_HAT| PNFFT_MALLOC_F| window_flag, PFFT_ESTIMATE, comm_cart_3d); /* get data pointers */ f_hat = pnfftl_get_f_hat(pnfft); f = pnfftl_get_f(pnfft); x = pnfftl_get_x(pnfft); /* initialize Fourier coefficients */ pnfftl_init_f_hat_3d(N, local_N, local_N_start, PNFFT_TRANSPOSED_NONE, f_hat); /* initialize nonequispaced nodes */ srand(myrank); init_random_x(lower_border, upper_border, x_max, local_M, x); /* execute parallel NFFT */ time = -MPI_Wtime(); pnfftl_trafo(pnfft); time += MPI_Wtime(); /* print timing */ MPI_Reduce(&time, &time_max, 1, MPI_DOUBLE, MPI_MAX, 0, comm); pfftl_printf(comm, "pnfftl_trafo needs %6.2e s\n", time_max); /* calculate norm of Fourier coefficients for calculation of relative error */ for(ptrdiff_t k=0; k<local_N[0]*local_N[1]*local_N[2]; k++) local_sum += cabsl(f_hat[k]); MPI_Allreduce(&local_sum, &f_hat_sum, 1, MPI_LONG_DOUBLE, MPI_SUM, comm_cart_3d); /* store results of NFFT */ f1 = pnfftl_alloc_complex(local_M); for(ptrdiff_t j=0; j<local_M; j++) f1[j] = f[j]; /* execute parallel NDFT */ time = -MPI_Wtime(); pnfftl_direct_trafo(pnfft); time += MPI_Wtime(); /* print timing */ MPI_Reduce(&time, &time_max, 1, MPI_DOUBLE, MPI_MAX, 0, comm); pfftl_printf(comm, "pnfftl_direct_trafo needs %6.2e s\n", time_max); /* calculate error of PNFFT */ compare_f(f1, f, local_M, f_hat_sum, "* Results in", MPI_COMM_WORLD); /* free mem and finalize */ pnfftl_free(f1); pnfftl_finalize(pnfft, PNFFT_FREE_X | PNFFT_FREE_F | PNFFT_FREE_F_HAT); MPI_Comm_free(&comm_cart_3d); }