int main(int argc, char **argv){ int np[3], m, window; unsigned window_flag; ptrdiff_t N[3], n[3], local_M; double f_hat_sum, x_max[3]; pnfft_complex *f1, *f2; MPI_Init(&argc, &argv); pnfft_init(); /* set default values */ N[0] = N[1] = N[2] = 16; n[0] = n[1] = n[2] = 0; local_M = 0; m = 6; window = 4; x_max[0] = x_max[1] = x_max[2] = 0.5; np[0]=2; np[1]=2; np[2]=2; /* set parameters by command line */ init_parameters(argc, argv, N, n, &local_M, &m, &window, x_max, np); /* if M or n are set to zero, we choose nice values */ local_M = (local_M==0) ? N[0]*N[1]*N[2]/(np[0]*np[1]*np[2]) : local_M; for(int t=0; t<3; t++) n[t] = (n[t]==0) ? 2*N[t] : n[t]; switch(window){ case 0: window_flag = PNFFT_WINDOW_GAUSSIAN; break; case 1: window_flag = PNFFT_WINDOW_BSPLINE; break; case 2: window_flag = PNFFT_WINDOW_SINC_POWER; break; case 3: window_flag = PNFFT_WINDOW_BESSEL_I0; break; default: window_flag = PNFFT_WINDOW_KAISER_BESSEL; } pfft_printf(MPI_COMM_WORLD, "******************************************************************************************************\n"); pfft_printf(MPI_COMM_WORLD, "* Computation of parallel NFFT\n"); pfft_printf(MPI_COMM_WORLD, "* for N[0] x N[1] x N[2] = %td x %td x %td Fourier coefficients (change with -pnfft_N * * *)\n", N[0], N[1], N[2]); pfft_printf(MPI_COMM_WORLD, "* at local_M = %td nodes per process (change with -pnfft_local_M *)\n", local_M); pfft_printf(MPI_COMM_WORLD, "* with n[0] x n[1] x n[2] = %td x %td x %td FFT grid size (change with -pnfft_n * * *),\n", n[0], n[1], n[2]); pfft_printf(MPI_COMM_WORLD, "* m = %d real space cutoff (change with -pnfft_m *),\n", m); pfft_printf(MPI_COMM_WORLD, "* window = %d window function ", window); switch(window){ case 0: pfft_printf(MPI_COMM_WORLD, "(PNFFT_WINDOW_GAUSSIAN) "); break; case 1: pfft_printf(MPI_COMM_WORLD, "(PNFFT_WINDOW_BSPLINE) "); break; case 2: pfft_printf(MPI_COMM_WORLD, "(PNFFT_WINDOW_SINC_POWER) "); break; case 3: pfft_printf(MPI_COMM_WORLD, "(PNFFT_WINDOW_BESSEL_I0) "); break; default: pfft_printf(MPI_COMM_WORLD, "(PNFFT_WINDOW_KAISER_BESSEL) "); break; } pfft_printf(MPI_COMM_WORLD, "(change with -pnfft_window *),\n"); pfft_printf(MPI_COMM_WORLD, "* on np[0] x np[1] x np[2] = %td x %td x %td processes (change with -pnfft_np * * *)\n", np[0], np[1], np[2]); pfft_printf(MPI_COMM_WORLD, "*******************************************************************************************************\n\n"); /* calculate parallel NFFT */ pnfft_perform_guru(N, n, local_M, m, x_max, window_flag, np, MPI_COMM_WORLD, &f1, &f_hat_sum); /* calculate parallel NFFT with higher accuracy */ pnfft_perform_guru(N, n, local_M, m+2, x_max, PNFFT_WINDOW_KAISER_BESSEL, np, MPI_COMM_WORLD, &f2, &f_hat_sum); /* calculate error of PNFFT */ compare_f(f1, f2, local_M, f_hat_sum, "* Results in", MPI_COMM_WORLD); /* free mem and finalize */ pnfft_free(f1); pnfft_free(f2); pnfft_cleanup(); MPI_Finalize(); return 0; }
static void pnfft_perform_guru( const ptrdiff_t *N, const ptrdiff_t *n, ptrdiff_t local_M, int m, const long double *x_max, unsigned window_flag, const int *np, MPI_Comm comm ) { int myrank; ptrdiff_t local_N[3], local_N_start[3]; long double lower_border[3], upper_border[3]; long double local_sum = 0; double time, time_max; MPI_Comm comm_cart_3d; pnfftl_complex *f_hat, *f, *f1; long double *x, f_hat_sum; pnfftl_plan pnfft; /* create three-dimensional process grid of size np[0] x np[1] x np[2], if possible */ if( pnfftl_create_procmesh(3, comm, np, &comm_cart_3d) ){ pfftl_fprintf(comm, stderr, "Error: Procmesh of size %d x %d x %d does not fit to number of allocated processes.\n", np[0], np[1], np[2]); pfftl_fprintf(comm, stderr, " Please allocate %d processes (mpiexec -np %d ...) or change the procmesh (with -pnfft_np * * *).\n", np[0]*np[1]*np[2], np[0]*np[1]*np[2]); MPI_Finalize(); exit(1); } MPI_Comm_rank(comm_cart_3d, &myrank); /* get parameters of data distribution */ pnfftl_local_size_guru(3, N, n, x_max, m, comm_cart_3d, PNFFT_TRANSPOSED_NONE, local_N, local_N_start, lower_border, upper_border); /* plan parallel NFFT */ pnfft = pnfftl_init_guru(3, N, n, x_max, local_M, m, PNFFT_MALLOC_X| PNFFT_MALLOC_F_HAT| PNFFT_MALLOC_F| window_flag, PFFT_ESTIMATE, comm_cart_3d); /* get data pointers */ f_hat = pnfftl_get_f_hat(pnfft); f = pnfftl_get_f(pnfft); x = pnfftl_get_x(pnfft); /* initialize Fourier coefficients */ pnfftl_init_f_hat_3d(N, local_N, local_N_start, PNFFT_TRANSPOSED_NONE, f_hat); /* initialize nonequispaced nodes */ srand(myrank); init_random_x(lower_border, upper_border, x_max, local_M, x); /* execute parallel NFFT */ time = -MPI_Wtime(); pnfftl_trafo(pnfft); time += MPI_Wtime(); /* print timing */ MPI_Reduce(&time, &time_max, 1, MPI_DOUBLE, MPI_MAX, 0, comm); pfftl_printf(comm, "pnfftl_trafo needs %6.2e s\n", time_max); /* calculate norm of Fourier coefficients for calculation of relative error */ for(ptrdiff_t k=0; k<local_N[0]*local_N[1]*local_N[2]; k++) local_sum += cabsl(f_hat[k]); MPI_Allreduce(&local_sum, &f_hat_sum, 1, MPI_LONG_DOUBLE, MPI_SUM, comm_cart_3d); /* store results of NFFT */ f1 = pnfftl_alloc_complex(local_M); for(ptrdiff_t j=0; j<local_M; j++) f1[j] = f[j]; /* execute parallel NDFT */ time = -MPI_Wtime(); pnfftl_direct_trafo(pnfft); time += MPI_Wtime(); /* print timing */ MPI_Reduce(&time, &time_max, 1, MPI_DOUBLE, MPI_MAX, 0, comm); pfftl_printf(comm, "pnfftl_direct_trafo needs %6.2e s\n", time_max); /* calculate error of PNFFT */ compare_f(f1, f, local_M, f_hat_sum, "* Results in", MPI_COMM_WORLD); /* free mem and finalize */ pnfftl_free(f1); pnfftl_finalize(pnfft, PNFFT_FREE_X | PNFFT_FREE_F | PNFFT_FREE_F_HAT); MPI_Comm_free(&comm_cart_3d); }