static void pnfft_perform_guru( const ptrdiff_t *N, const ptrdiff_t *n, ptrdiff_t local_M, int m, const double *x_max, unsigned window_flag, const int *np, MPI_Comm comm, pnfft_complex **f, double *f_hat_sum ) { ptrdiff_t local_N[3], local_N_start[3]; double lower_border[3], upper_border[3]; double local_sum = 0, time, time_max; MPI_Comm comm_cart_3d; pnfft_complex *f_hat; double *x; pnfft_plan pnfft; /* create three-dimensional process grid of size np[0] x np[1] x np[2], if possible */ if( pnfft_create_procmesh(3, comm, np, &comm_cart_3d) ){ pfft_fprintf(comm, stderr, "Error: Procmesh of size %d x %d x %d does not fit to number of allocated processes.\n", np[0], np[1], np[2]); pfft_fprintf(comm, stderr, " Please allocate %d processes (mpiexec -np %d ...) or change the procmesh (with -pnfft_np * * *).\n", np[0]*np[1]*np[2], np[0]*np[1]*np[2]); MPI_Finalize(); exit(1); } /* get parameters of data distribution */ pnfft_local_size_guru(3, N, n, x_max, m, comm_cart_3d, PNFFT_TRANSPOSED_NONE, local_N, local_N_start, lower_border, upper_border); /* plan parallel NFFT */ pnfft = pnfft_init_guru(3, N, n, x_max, local_M, m, PNFFT_MALLOC_X| PNFFT_MALLOC_F_HAT| PNFFT_MALLOC_F| window_flag, PFFT_ESTIMATE, comm_cart_3d); /* get data pointers */ f_hat = pnfft_get_f_hat(pnfft); *f = pnfft_get_f(pnfft); x = pnfft_get_x(pnfft); /* initialize Fourier coefficients */ pnfft_init_f_hat_3d(N, local_N, local_N_start, PNFFT_TRANSPOSED_NONE, f_hat); /* initialize nonequispaced nodes */ srand(0); init_random_x(lower_border, upper_border, x_max, local_M, x); /* execute parallel NFFT */ time = -MPI_Wtime(); pnfft_trafo(pnfft); time += MPI_Wtime(); /* print timing */ MPI_Reduce(&time, &time_max, 1, MPI_DOUBLE, MPI_MAX, 0, comm); pfft_printf(comm, "pnfft_trafo needs %6.2e s\n", time_max); /* calculate norm of Fourier coefficients for calculation of relative error */ for(ptrdiff_t k=0; k<local_N[0]*local_N[1]*local_N[2]; k++) local_sum += cabs(f_hat[k]); MPI_Allreduce(&local_sum, f_hat_sum, 1, MPI_DOUBLE, MPI_SUM, comm_cart_3d); /* free mem and finalize, do not free nfft.f */ pnfft_finalize(pnfft, PNFFT_FREE_X | PNFFT_FREE_F_HAT); MPI_Comm_free(&comm_cart_3d); }
int main(int argc, char **argv){ int np[3]; ptrdiff_t N[3], local_M; ptrdiff_t local_N[3], local_N_start[3]; double lower_border[3], upper_border[3]; MPI_Comm comm_cart_3d; pnfft_complex *f_hat, *f; double *x; pnfft_plan pnfft; MPI_Init(&argc, &argv); pnfft_init(); /* Set default values */ N[0] = N[1] = N[2] = 16; np[0]=2; np[1]=2; np[2]=2; local_M = N[0]*N[1]*N[2]/(np[0]*np[1]*np[2]); /* Print infos */ pfft_printf(MPI_COMM_WORLD, "******************************************************************************************************\n"); pfft_printf(MPI_COMM_WORLD, "* Computation of parallel NFFT\n"); pfft_printf(MPI_COMM_WORLD, "* for N[0] x N[1] x N[2] = %td x %td x %td Fourier coefficients\n", N[0], N[1], N[2]); pfft_printf(MPI_COMM_WORLD, "* at local_M = %td nodes per process\n", local_M); pfft_printf(MPI_COMM_WORLD, "* on np[0] x np[1] x np[2] = %td x %td x %td processes\n", np[0], np[1], np[2]); pfft_printf(MPI_COMM_WORLD, "*******************************************************************************************************\n\n"); /* create three-dimensional process grid of size np[0] x np[1] x np[2], if possible */ if( pnfft_create_procmesh(3, MPI_COMM_WORLD, np, &comm_cart_3d) ){ pfft_fprintf(MPI_COMM_WORLD, stderr, "Error: Procmesh of size %d x %d x %d does not fit to number of allocated processes.\n", np[0], np[1], np[2]); pfft_fprintf(MPI_COMM_WORLD, stderr, " Please allocate %d processes (mpiexec -np %d ...) or change the procmesh (with -pnfft_np * * *).\n", np[0]*np[1]*np[2], np[0]*np[1]*np[2]); MPI_Finalize(); return 1; } /* Get parameters of data distribution */ pnfft_local_size_3d(N, comm_cart_3d, PNFFT_TRANSPOSED_NONE, local_N, local_N_start, lower_border, upper_border); /* Plan parallel NFFT */ pnfft = pnfft_init_3d(N, local_M, comm_cart_3d); /* Get data pointers */ f_hat = pnfft_get_f_hat(pnfft); f = pnfft_get_f(pnfft); x = pnfft_get_x(pnfft); /* Initialize Fourier coefficients */ pnfft_init_f_hat_3d(N, local_N, local_N_start, PNFFT_TRANSPOSED_NONE, f_hat); /* Initialize nonequispaced nodes */ init_random_x(lower_border, upper_border, local_M, x); /* Print input Fourier coefficents */ vpr_complex(comm_cart_3d, 8, f_hat, "Input Fourier coefficients on process 1:"); /* Execute parallel NFFT */ pnfft_trafo(pnfft); /* Print NFFT results */ vpr_complex(comm_cart_3d, 8, f, "PNFFT Results on process 1:"); /* Execute parallel adjoint NFFT */ pnfft_adj(pnfft); /* Scale data */ for(ptrdiff_t l=0; l < local_N[0] * local_N[1] * local_N[2]; l++) f_hat[l] /= (N[0]*N[1]*N[2]); /* Print output Fourier coefficents */ vpr_complex(comm_cart_3d, 8, f_hat, "Fourier coefficients after one forward and backward PNFFT on process 1:"); /* free mem and finalize */ pnfft_finalize(pnfft, PNFFT_FREE_X | PNFFT_FREE_F_HAT| PNFFT_FREE_F); MPI_Comm_free(&comm_cart_3d); pnfft_cleanup(); MPI_Finalize(); return 0; }
int main(int argc, char **argv) { int np[3]; ptrdiff_t N[3]; ptrdiff_t local_M; double err; MPI_Comm comm_cart_3d; ptrdiff_t local_N_c2c[3], local_N_start_c2c[3]; double lower_border_c2c[3], upper_border_c2c[3]; pnfft_plan plan_c2c; pnfft_complex *f_hat_c2c, *f_c2c; double *x_c2c; ptrdiff_t local_N_c2r[3], local_N_start_c2r[3]; double lower_border_c2r[3], upper_border_c2r[3]; pnfft_plan plan_c2r; pnfft_complex *f_hat_c2r; double *x_c2r, *f_c2r; /* Initialize MPI and PFFT */ MPI_Init(&argc, &argv); pnfft_init(); np[0] = 2; np[1] = 2; np[2] = 4; // for bigger transforms the data gets distributed differently, which makes comparing the results harder. N[0] = 8; N[1] = 8; N[2] = 16; local_M = N[0]*N[1]*N[2]/(np[0]*np[1]*np[2]); /* Print infos */ pfft_printf(MPI_COMM_WORLD, "******************************************************************************************************\n"); pfft_printf(MPI_COMM_WORLD, "* Computation of parallel NFFT\n"); pfft_printf(MPI_COMM_WORLD, "* for N[0] x N[1] x N[2] = %td x %td x %td Fourier coefficients)\n", N[0], N[1], N[2]); pfft_printf(MPI_COMM_WORLD, "* at local_M = %td nodes per process\n", local_M); pfft_printf(MPI_COMM_WORLD, "* on np[0] x np[1] x np[2] = %td x %td x %td processes\n", np[0], np[1], np[2]); pfft_printf(MPI_COMM_WORLD, "*******************************************************************************************************\n\n"); /* create three-dimensional process grid of size np[0] x np[1] x np[2], if possible */ if( pnfft_create_procmesh(3, MPI_COMM_WORLD, np, &comm_cart_3d) ){ pfft_fprintf(MPI_COMM_WORLD, stderr, "Error: Procmesh of size %d x %d x %d does not fit to number of allocated processes.\n", np[0], np[1], np[2]); pfft_fprintf(MPI_COMM_WORLD, stderr, " Please allocate %d processes (mpiexec -np %d ...) or change the procmesh (with -pnfft_np * * *).\n", np[0]*np[1]*np[2], np[0]*np[1]*np[2]); MPI_Finalize(); return 1; } /* Get parameters of data distribution */ pnfft_local_size_3d(N, comm_cart_3d, PNFFT_TRANSPOSED_NONE, local_N_c2c, local_N_start_c2c, lower_border_c2c, upper_border_c2c); pnfft_local_size_3d_c2r(N, comm_cart_3d, PNFFT_TRANSPOSED_NONE, local_N_c2r, local_N_start_c2r, lower_border_c2r, upper_border_c2r); /* Plan parallel NFFT */ plan_c2c = pnfft_init_adv(3, N, local_M, PNFFT_TRANSPOSED_NONE| PNFFT_WINDOW_SINC_POWER| PNFFT_MALLOC_X| PNFFT_MALLOC_F_HAT| PNFFT_MALLOC_F, PFFT_ESTIMATE, comm_cart_3d); plan_c2r = pnfft_init_adv_c2r(3, N, local_M, PNFFT_TRANSPOSED_NONE| PNFFT_WINDOW_SINC_POWER| PNFFT_MALLOC_X| PNFFT_MALLOC_F_HAT| PNFFT_MALLOC_F, PFFT_ESTIMATE, comm_cart_3d); f_hat_c2c = pnfft_get_f_hat(plan_c2c); f_c2c = pnfft_get_f(plan_c2c); x_c2c = pnfft_get_x(plan_c2c); f_hat_c2r = pnfft_get_f_hat(plan_c2r); f_c2r = pnfft_get_f(plan_c2r); x_c2r = pnfft_get_x(plan_c2r); /* Initialize Fourier coefficients with random numbers */ init_input(N, local_N_c2c, local_N_start_c2c, f_hat_c2c); init_input(N, local_N_c2r, local_N_start_c2r, f_hat_c2r); /* Initialize nodes with random numbers */ // pnfft_init_x_3d(lower_border_c2c, upper_border_c2c, local_M, x_c2c); init_equispaced_x(N, lower_border_c2c, upper_border_c2c, x_c2c); for (int k=0; k<local_M*3; k++) x_c2r[k] = x_c2c[k]; /* execute parallel NFFT */ pnfft_trafo(plan_c2c); MPI_Barrier(MPI_COMM_WORLD); pnfft_trafo(plan_c2r); MPI_Barrier(MPI_COMM_WORLD); err = compare_complex_real(local_M, f_c2c, f_c2r, comm_cart_3d); pfft_printf(MPI_COMM_WORLD, "max error between c2c and c2r after trafo: %6.2e\n", err); /* free mem and finalize */ pnfft_finalize(plan_c2c, PNFFT_FREE_X | PNFFT_FREE_F_HAT | PNFFT_FREE_F); pnfft_finalize(plan_c2r, PNFFT_FREE_X | PNFFT_FREE_F_HAT | PNFFT_FREE_F); MPI_Comm_free(&comm_cart_3d); /* Finalize MPI */ MPI_Finalize(); return 0; }