int main(int argc, char **argv) { int np[2], inplace, loops; ptrdiff_t n[3]; unsigned opt, tune, destroy_input; /* Set size of FFT and process mesh */ n[0] = 29; n[1] = 27; n[2] = 31; np[0] = 2; np[1] = 2; inplace = 0; opt = PFFT_ESTIMATE; tune = PFFT_NO_TUNE; destroy_input = PFFT_PRESERVE_INPUT; loops = 1; /* Initialize MPI and PFFT */ MPI_Init(&argc, &argv); pfft_init(); /* set parameters by command line */ init_parameters(argc, argv, n, np, &loops, &inplace, &opt, &tune, &destroy_input); measure_pfft(n, np, MPI_COMM_WORLD, loops, inplace, opt | tune | destroy_input); MPI_Finalize(); return 0; }
int main(int argc, char **argv){ int np[2]; ptrdiff_t n[3]; ptrdiff_t alloc_local; ptrdiff_t local_ni[3], local_i_start[3]; ptrdiff_t local_no[3], local_o_start[3]; pfft_complex *in, *out; pfft_plan plan=NULL; MPI_Comm comm_cart_2d; /* Set size of FFT and process mesh */ n[0] = 2; n[1] = 2; n[2] = 4; np[0] = 2; np[1] = 2; /* Initialize MPI and PFFT */ MPI_Init(&argc, &argv); pfft_init(); /* Create two-dimensional process grid of size np[0] x np[1] */ pfft_create_procmesh_2d(MPI_COMM_WORLD, np[0], np[1], &comm_cart_2d); /* Get parameters of data distribution */ alloc_local = pfft_local_size_dft_3d( n, comm_cart_2d, PFFT_TRANSPOSED_NONE, local_ni, local_i_start, local_no, local_o_start); /* Allocate memory */ in = pfft_alloc_complex(alloc_local); out = pfft_alloc_complex(alloc_local); /* Plan parallel forward FFT */ plan = pfft_plan_dft_3d(n, in, out, comm_cart_2d, PFFT_FORWARD, PFFT_TRANSPOSED_NONE); /* Initialize input with random numbers */ pfft_init_input_complex_3d(n, local_ni, local_i_start, in); /* Execute parallel forward FFT */ pfft_execute(plan); /* free mem and finalize MPI */ pfft_destroy_plan(plan); MPI_Comm_free(&comm_cart_2d); pfft_free(in); pfft_free(out); MPI_Finalize(); return 0; }
int main(int argc, char **argv) { int np[2]; ptrdiff_t n[3]; ptrdiff_t alloc_local; ptrdiff_t local_ni[3], local_i_start[3]; ptrdiff_t local_no[3], local_o_start[3]; double err; double *planned_in, *executed_in; pfft_complex *planned_out, *executed_out; pfft_plan plan_forw=NULL, plan_back=NULL; MPI_Comm comm_cart_2d; /* Set size of FFT and process mesh */ n[0] = 29; n[1] = 27; n[2] = 31; np[0] = 2; np[1] = 2; /* Initialize MPI and PFFT */ MPI_Init(&argc, &argv); pfft_init(); /* Create two-dimensional process grid of size np[0] x np[1], if possible */ if( pfft_create_procmesh_2d(MPI_COMM_WORLD, np[0], np[1], &comm_cart_2d) ){ pfft_fprintf(MPI_COMM_WORLD, stderr, "Error: This test file only works with %d processes.\n", np[0]*np[1]); MPI_Finalize(); return 1; } /* Get parameters of data distribution */ alloc_local = pfft_local_size_dft_r2c_3d(n, comm_cart_2d, PFFT_TRANSPOSED_OUT| PFFT_PADDED_R2C, local_ni, local_i_start, local_no, local_o_start); /* Allocate memory for planning */ planned_in = pfft_alloc_real (2 * alloc_local); planned_out = pfft_alloc_complex(alloc_local); /* Plan parallel forward FFT */ plan_forw = pfft_plan_dft_r2c_3d( n, planned_in, planned_out, comm_cart_2d, PFFT_FORWARD, PFFT_TRANSPOSED_OUT| PFFT_MEASURE| PFFT_DESTROY_INPUT| PFFT_PADDED_R2C); /* Plan parallel backward FFT */ plan_back = pfft_plan_dft_c2r_3d( n, planned_out, planned_in, comm_cart_2d, PFFT_BACKWARD, PFFT_TRANSPOSED_IN| PFFT_MEASURE| PFFT_DESTROY_INPUT| PFFT_PADDED_C2R); /* Free planning arrays since we use other arrays for execution */ pfft_free(planned_in); pfft_free(planned_out); /* Allocate memory for execution */ executed_in = pfft_alloc_real(2 * alloc_local); executed_out = pfft_alloc_complex(alloc_local); /* Initialize input with random numbers */ pfft_init_input_real(3, n, local_ni, local_i_start, executed_in); /* execute parallel forward FFT */ pfft_execute_dft_r2c(plan_forw, executed_in, executed_out); /* clear the old input */ pfft_clear_input_real(3, n, local_ni, local_i_start, executed_in); /* execute parallel backward FFT */ pfft_execute_dft_c2r(plan_back, executed_out, executed_in); /* Scale data */ for(ptrdiff_t l=0; l < local_ni[0] * local_ni[1] * local_ni[2]; l++) executed_in[l] /= (n[0]*n[1]*n[2]); /* Print error of back transformed data */ err = pfft_check_output_real(3, n, local_ni, local_i_start, executed_in, comm_cart_2d); pfft_printf(comm_cart_2d, "Error after one forward and backward trafo of size n=(%td, %td, %td):\n", n[0], n[1], n[2]); pfft_printf(comm_cart_2d, "maxerror = %6.2e;\n", err); /* free mem and finalize */ pfft_destroy_plan(plan_forw); pfft_destroy_plan(plan_back); MPI_Comm_free(&comm_cart_2d); pfft_free(executed_in); pfft_free(executed_out); MPI_Finalize(); return 0; }
int main(int argc, char **argv) { int np[2]; ptrdiff_t n[3], ni[3], howmany; double err; ptrdiff_t alloc_local_c; ptrdiff_t local_ni_c[3], local_i_start_c[3]; ptrdiff_t local_no_c[3], local_o_start_c[3]; pfft_complex *in_c, *out_c; pfft_plan plan_forw_c=NULL, plan_back_c=NULL; ptrdiff_t alloc_local_r, alloc_local_forw, alloc_local_back; ptrdiff_t local_ni_r[3], local_i_start_r[3]; ptrdiff_t local_no_r[3], local_o_start_r[3]; pfft_complex *in_r; double *out_r; pfft_plan plan_forw_r=NULL, plan_back_r=NULL; MPI_Comm comm_cart_2d; /* Set size of FFT and process mesh */ ni[0] = 4; ni[1] = 4; ni[2] = 4; n[0] = 6; n[1] = 6; n[2] = 6; np[0] = 2; np[1] = 2; howmany = 1; /* Initialize MPI and PFFT */ MPI_Init(&argc, &argv); pfft_init(); /* Create two-dimensional process grid of size np[0] x np[1], if possible */ if( pfft_create_procmesh_2d(MPI_COMM_WORLD, np[0], np[1], &comm_cart_2d) ){ pfft_fprintf(MPI_COMM_WORLD, stderr, "Error: This test file only works with %d processes.\n", np[0]*np[1]); MPI_Finalize(); return 1; } /* Get parameters of data distribution */ alloc_local_forw = pfft_local_size_many_dft(3, n, ni, n, howmany, PFFT_DEFAULT_BLOCKS, PFFT_DEFAULT_BLOCKS, comm_cart_2d, PFFT_TRANSPOSED_NONE| PFFT_SHIFTED_IN | PFFT_SHIFTED_OUT, local_ni_c, local_i_start_c, local_no_c, local_o_start_c); alloc_local_back = pfft_local_size_many_dft(3, n, n, ni, howmany, PFFT_DEFAULT_BLOCKS, PFFT_DEFAULT_BLOCKS, comm_cart_2d, PFFT_TRANSPOSED_NONE| PFFT_SHIFTED_IN | PFFT_SHIFTED_OUT, local_no_c, local_o_start_c, local_ni_c, local_i_start_c); alloc_local_c = (alloc_local_forw > alloc_local_back) ? alloc_local_forw : alloc_local_back; alloc_local_forw = pfft_local_size_many_dft_c2r(3, n, ni, n, howmany, PFFT_DEFAULT_BLOCKS, PFFT_DEFAULT_BLOCKS, comm_cart_2d, PFFT_TRANSPOSED_NONE| PFFT_SHIFTED_IN | PFFT_SHIFTED_OUT, local_ni_r, local_i_start_r, local_no_r, local_o_start_r); alloc_local_back = pfft_local_size_many_dft_r2c(3, n, n, ni, howmany, PFFT_DEFAULT_BLOCKS, PFFT_DEFAULT_BLOCKS, comm_cart_2d, PFFT_TRANSPOSED_NONE| PFFT_SHIFTED_IN | PFFT_SHIFTED_OUT, local_no_r, local_o_start_r, local_ni_r, local_i_start_r); alloc_local_r = (alloc_local_forw > alloc_local_back) ? alloc_local_forw : alloc_local_back; /* Allocate memory */ in_c = pfft_alloc_complex(alloc_local_c); out_c = pfft_alloc_complex(alloc_local_c); in_r = pfft_alloc_complex(alloc_local_r); out_r = pfft_alloc_real(2*alloc_local_r); /* Plan parallel forward FFT */ plan_forw_c = pfft_plan_many_dft(3, n, ni, n, howmany, PFFT_DEFAULT_BLOCKS, PFFT_DEFAULT_BLOCKS, in_c, out_c, comm_cart_2d, PFFT_FORWARD, PFFT_TRANSPOSED_NONE| PFFT_MEASURE| PFFT_DESTROY_INPUT| PFFT_SHIFTED_IN | PFFT_SHIFTED_OUT); plan_forw_r = pfft_plan_many_dft_c2r(3, n, ni, n, howmany, PFFT_DEFAULT_BLOCKS, PFFT_DEFAULT_BLOCKS, in_r, out_r, comm_cart_2d, PFFT_FORWARD, PFFT_TRANSPOSED_NONE| PFFT_MEASURE| PFFT_DESTROY_INPUT| PFFT_SHIFTED_IN | PFFT_SHIFTED_OUT); /* Plan parallel backward FFT */ plan_back_c = pfft_plan_many_dft(3, n, n, ni, howmany, PFFT_DEFAULT_BLOCKS, PFFT_DEFAULT_BLOCKS, out_c, in_c, comm_cart_2d, PFFT_BACKWARD, PFFT_TRANSPOSED_NONE| PFFT_MEASURE| PFFT_DESTROY_INPUT| PFFT_SHIFTED_IN | PFFT_SHIFTED_OUT); plan_back_r = pfft_plan_many_dft_r2c(3, n, n, ni, howmany, PFFT_DEFAULT_BLOCKS, PFFT_DEFAULT_BLOCKS, out_r, in_r, comm_cart_2d, PFFT_BACKWARD, PFFT_TRANSPOSED_NONE| PFFT_MEASURE| PFFT_DESTROY_INPUT| PFFT_SHIFTED_IN | PFFT_SHIFTED_OUT); /* Initialize input with random numbers */ init_input(ni, local_ni_c, local_i_start_c, in_c); init_input(ni, local_ni_r, local_i_start_r, in_r); // pfft_apr_complex_3d(in_c, local_ni_c, local_i_start_c, "c2c input:\n", comm_cart_2d); // pfft_apr_complex_3d(in_r, local_ni_r, local_i_start_r, "c2r input:\n", comm_cart_2d); /* execute parallel forward FFT */ pfft_execute(plan_forw_c); /* clear the old input */ pfft_execute(plan_forw_r); // pfft_apr_complex_3d(out_c, local_no_c, local_o_start_c, "c2c output:\n", comm_cart_2d); // pfft_apr_real_3d(out_r, local_no_r, local_o_start_r, "c2r output:\n", comm_cart_2d); /* execute parallel backward FFT */ pfft_execute(plan_back_c); pfft_execute(plan_back_r); // pfft_apr_complex_3d(in_c, local_ni_c, local_i_start_c, "c2c^ output:\n", comm_cart_2d); // pfft_apr_complex_3d(in_r, local_ni_r, local_i_start_r, "c2r^ output:\n", comm_cart_2d); /* Scale data */ for(ptrdiff_t l=0; l < local_ni_c[0] * local_ni_c[1] * local_ni_c[2]; l++) in_c[l] /= (n[0]*n[1]*n[2]); for(ptrdiff_t l=0; l < local_ni_r[0] * local_ni_r[1] * local_ni_r[2]; l++) in_r[l] /= (n[0]*n[1]*n[2]); /* Print error of back transformed data */ err = compare_c2c_c2r(local_ni_c, local_ni_r, in_c, in_r, comm_cart_2d); pfft_printf(comm_cart_2d, "Error after one forward and backward trafo of size n=(%td, %td, %td):\n", n[0], n[1], n[2]); pfft_printf(comm_cart_2d, "maxerror = %6.2e;\n", err); /* free mem and finalize */ pfft_destroy_plan(plan_forw_c); pfft_destroy_plan(plan_back_c); pfft_destroy_plan(plan_forw_r); pfft_destroy_plan(plan_back_r); MPI_Comm_free(&comm_cart_2d); pfft_free(in_c); pfft_free(out_c); pfft_free(in_r); pfft_free(out_r); MPI_Finalize(); return 0; }
int main(int argc, char **argv) { int np[2]; ptrdiff_t n[3]; ptrdiff_t alloc_local; ptrdiff_t local_ni[3], local_i_start[3]; ptrdiff_t local_no[3], local_o_start[3]; double err; pfft_complex *in, *out; pfft_plan plan_forw=NULL, plan_back=NULL; MPI_Comm comm_cart_2d; double time; pfft_timer timer_forw, timer_back; unsigned pfft_opt_flag; /* setup default parameters */ int iter = 10, inplace = 0, patience = 0; /* Set size of FFT and process mesh */ n[0] = n[1] = n[2] = 16; np[0] = 2; np[1] = 2; /* Initialize MPI and PFFT */ MPI_Init(&argc, &argv); pfft_init(); /* read parameters from command line */ init_parameters(argc, argv, np, n, &iter, &inplace, &patience); /* setup FFTWs planing depth */ switch(patience){ case 1: pfft_opt_flag = PFFT_MEASURE; break; case 2: pfft_opt_flag = PFFT_PATIENT; break; case 3: pfft_opt_flag = PFFT_EXHAUSTIVE; break; default: pfft_opt_flag = PFFT_ESTIMATE; } pfft_opt_flag |= PFFT_DESTROY_INPUT; /* Create two-dimensional process grid of size np[0] x np[1], if possible */ if( pfft_create_procmesh_2d(MPI_COMM_WORLD, np[0], np[1], &comm_cart_2d) ){ pfft_fprintf(MPI_COMM_WORLD, stderr, "Error: Procmesh %d x %d requires MPI launch with %d processes.\n", np[0], np[1], np[0]*np[1]); MPI_Finalize(); MPI_Finalize(); return 1; } /* Get parameters of data distribution */ alloc_local = pfft_local_size_dft_3d(n, comm_cart_2d, PFFT_TRANSPOSED_OUT, local_ni, local_i_start, local_no, local_o_start); /* Allocate memory */ in = pfft_alloc_complex(alloc_local); out = (inplace) ? in : pfft_alloc_complex(alloc_local); /* We often want to scale large FFTs, which do not fit on few processes. */ if( (in == NULL) || (out == NULL)){ fprintf(stderr, "!!! Error: Not enough memory to allocate input/output arrays !!!\n"); MPI_Finalize(); MPI_Finalize(); return 1; } /* Plan parallel forward FFT */ time = -MPI_Wtime(); plan_forw = pfft_plan_dft_3d( n, in, out, comm_cart_2d, PFFT_FORWARD, PFFT_TRANSPOSED_OUT| pfft_opt_flag); time += MPI_Wtime(); // printf("time for forw planing: %.2e\n", time); /* Plan parallel backward FFT */ time = -MPI_Wtime(); plan_back = pfft_plan_dft_3d( n, out, in, comm_cart_2d, PFFT_BACKWARD, PFFT_TRANSPOSED_IN| pfft_opt_flag); time += MPI_Wtime(); // printf("time for back planing: %.2e\n", time); /* Initialize input with random numbers */ pfft_init_input_c2c_3d(n, local_ni, local_i_start, in); for(int t=0; t<iter; t++){ /* execute parallel forward FFT */ pfft_execute(plan_forw); /* execute parallel backward FFT */ pfft_execute(plan_back); } /* check individual timers for workbalance */ timer_forw = pfft_get_timer(plan_forw); // printf("timer_forw->whole = %.2e\n", timer_forw->whole); pfft_destroy_timer(timer_forw); timer_back = pfft_get_timer(plan_back); // printf("timer_back->whole = %.2e\n", timer_back->whole); pfft_destroy_timer(timer_back); /* read out PFFT timers */ pfft_print_average_timer_adv(plan_forw, comm_cart_2d); pfft_print_average_timer_adv(plan_back, comm_cart_2d); if(inplace){ pfft_write_average_timer_adv(plan_forw, "measure_forw_inplace.m", comm_cart_2d); pfft_write_average_timer_adv(plan_back, "measure_back_inplace.m", comm_cart_2d); } else { pfft_write_average_timer_adv(plan_forw, "measure_forw_outofplace.m", comm_cart_2d); pfft_write_average_timer_adv(plan_back, "measure_back_outofplace.m", comm_cart_2d); } /* Scale data */ for(int t=0; t<iter; t++) for(ptrdiff_t l=0; l < local_ni[0] * local_ni[1] * local_ni[2]; l++) in[l] /= (n[0]*n[1]*n[2]); /* Print error of back transformed data */ MPI_Barrier(MPI_COMM_WORLD); err = pfft_check_output_c2c_3d(n, local_ni, local_i_start, in, comm_cart_2d); pfft_printf(comm_cart_2d, "Error after one forward and backward trafo of size n=(%td, %td, %td):\n", n[0], n[1], n[2]); pfft_printf(comm_cart_2d, "maxerror = %6.2e;\n", err); /* free mem and finalize */ pfft_destroy_plan(plan_forw); pfft_destroy_plan(plan_back); MPI_Comm_free(&comm_cart_2d); pfft_free(in); if(!inplace) pfft_free(out); MPI_Finalize(); return 0; }
int main(int argc, char **argv){ int np[2]; ptrdiff_t n[3], ni[3], no[3]; ptrdiff_t alloc_local_forw, alloc_local_back, alloc_local, howmany; ptrdiff_t local_ni[3], local_i_start[3]; ptrdiff_t local_n[3], local_start[3]; ptrdiff_t local_no[3], local_o_start[3]; double err, *in; pfft_complex *out; pfft_plan plan_forw=NULL, plan_back=NULL; MPI_Comm comm_cart_2d; /* Set size of FFT and process mesh */ ni[0] = ni[1] = ni[2] = 16; n[0] = 29; n[1] = 27; n[2] = 31; for(int t=0; t<3; t++) no[t] = ni[t]; np[0] = 2; np[1] = 2; howmany = 1; /* Initialize MPI and PFFT */ MPI_Init(&argc, &argv); pfft_init(); /* Create two-dimensional process grid of size np[0] x np[1], if possible */ if( pfft_create_procmesh_2d(MPI_COMM_WORLD, np[0], np[1], &comm_cart_2d) ){ pfft_fprintf(MPI_COMM_WORLD, stderr, "Error: This test file only works with %d processes.\n", np[0]*np[1]); MPI_Finalize(); return 1; } /* Get parameters of data distribution */ alloc_local_forw = pfft_local_size_many_dft_r2c(3, n, ni, n, howmany, PFFT_DEFAULT_BLOCKS, PFFT_DEFAULT_BLOCKS, comm_cart_2d, PFFT_TRANSPOSED_NONE | PFFT_PADDED_R2C, local_ni, local_i_start, local_n, local_start); alloc_local_back = pfft_local_size_many_dft_c2r(3, n, n, no, howmany, PFFT_DEFAULT_BLOCKS, PFFT_DEFAULT_BLOCKS, comm_cart_2d, PFFT_TRANSPOSED_NONE | PFFT_PADDED_R2C, local_n, local_start, local_no, local_o_start); /* Allocate enough memory for both trafos */ alloc_local = (alloc_local_forw > alloc_local_back) ? alloc_local_forw : alloc_local_back; in = pfft_alloc_real(2 * alloc_local); out = pfft_alloc_complex(alloc_local); /* Plan parallel forward FFT */ plan_forw = pfft_plan_many_dft_r2c( 3, n, ni, n, howmany, PFFT_DEFAULT_BLOCKS, PFFT_DEFAULT_BLOCKS, in, out, comm_cart_2d, PFFT_FORWARD, PFFT_TRANSPOSED_NONE| PFFT_MEASURE| PFFT_DESTROY_INPUT| PFFT_PADDED_R2C); /* Plan parallel backward FFT */ plan_back = pfft_plan_many_dft_c2r( 3, n, n, no, howmany, PFFT_DEFAULT_BLOCKS, PFFT_DEFAULT_BLOCKS, out, in, comm_cart_2d, PFFT_BACKWARD, PFFT_TRANSPOSED_NONE| PFFT_MEASURE| PFFT_DESTROY_INPUT| PFFT_PADDED_R2C); /* Initialize input with random numbers */ pfft_init_input_real(3, ni, local_ni, local_i_start, in); /* Execute parallel forward FFT */ pfft_execute(plan_forw); /* clear the old input */ pfft_clear_input_real(3, ni, local_ni, local_i_start, in); /* execute parallel backward FFT */ pfft_execute(plan_back); /* Scale data */ for(ptrdiff_t l=0; l < local_ni[0] * local_ni[1] * local_ni[2]; l++) in[l] /= (n[0]*n[1]*n[2]); /* Print error of back transformed data */ MPI_Barrier(MPI_COMM_WORLD); err = pfft_check_output_real(3, ni, local_ni, local_i_start, in, comm_cart_2d); pfft_printf(comm_cart_2d, "Error after one forward and backward trafo of size n=(%td, %td, %td):\n", n[0], n[1], n[2]); pfft_printf(comm_cart_2d, "maxerror = %6.2e;\n", err); /* free mem and finalize MPI */ pfft_destroy_plan(plan_forw); pfft_destroy_plan(plan_back); MPI_Comm_free(&comm_cart_2d); pfft_free(in); pfft_free(out); MPI_Finalize(); return 0; }
int main(int argc, char **argv) { int n[3]; pfft_complex *in, *out; FFTW(plan) plan_forw=NULL, plan_back=NULL; double err, time, time_fftw[2], max_time_fftw[2]; unsigned fftw_flag; /* setup default parameters */ int iter = 10, inplace = 0, patience = 0; /* Set size of FFT and process mesh */ n[0] = n[1] = n[2] = 16; /* Initialize MPI and PFFT */ MPI_Init(&argc, &argv); pfft_init(); /* read parameters from command line */ init_parameters(argc, argv, n, &iter, &inplace, &patience); /* setup FFTWs planing depth */ switch(patience){ case 1: fftw_flag = FFTW_MEASURE; break; case 2: fftw_flag = FFTW_PATIENT; break; case 3: fftw_flag = FFTW_EXHAUSTIVE; break; default: fftw_flag = FFTW_ESTIMATE; } if(!inplace) fftw_flag |= FFTW_DESTROY_INPUT; /* Allocate memory */ in = pfft_alloc_complex(n[0]*n[1]*n[2]); out = (inplace) ? in : pfft_alloc_complex(n[0]*n[1]*n[2]); /* We often want to scale large FFTs, which do not fit on few processes. */ if( (in == NULL) || (out == NULL)){ fprintf(stderr, "!!! Error: Not enough memory to allocate input/output arrays !!!\n"); MPI_Finalize(); MPI_Finalize(); return 1; } ptrdiff_t local_ni[3], local_i_start[3], n_ptr[3]; for(int t=0; t<3; t++){ local_i_start[t] = 0; n_ptr[t] = local_ni[t] = (ptrdiff_t) n[t]; } plan_forw = FFTW(plan_dft_3d)(n[0], n[1], n[2], in, out, FFTW_FORWARD, fftw_flag); plan_back = FFTW(plan_dft_3d)(n[0], n[1], n[2], out, in, FFTW_BACKWARD, fftw_flag); /* Initialize input with random numbers */ pfft_init_input_complex_3d(n_ptr, local_ni, local_i_start, in); time_fftw[0] = time_fftw[1] = 0; for(int t=0; t<iter; t++){ /* execute parallel forward FFT */ time_fftw[0] -= MPI_Wtime(); FFTW(execute)(plan_forw); time_fftw[0] += MPI_Wtime(); /* execute parallel backward FFT */ time_fftw[1] -= MPI_Wtime(); FFTW(execute)(plan_back); time_fftw[1] += MPI_Wtime(); } /* Scale data */ for(int t=0; t<iter; t++) for(ptrdiff_t l=0; l < n[0] * n[1] * n[2]; l++) in[l] /= (n[0]*n[1]*n[2]); printf("fftw_forw = %.2e, fftw_back = %.2e\n", time_fftw[0]/iter, time_fftw[1]/iter); err = pfft_check_output_complex_3d(n_ptr, local_ni, local_i_start, in, MPI_COMM_WORLD); printf("Error after several forward and backward FFTWs of size n=(%td, %td, %td):\n", n[0], n[1], n[2]); printf("maxerror = %6.2e;\n", err); /* free mem and finalize */ FFTW(destroy_plan)(plan_forw); FFTW(destroy_plan)(plan_back); pfft_free(in); if(!inplace) pfft_free(out); MPI_Finalize(); return 0; }
int main(int argc, char **argv) { int np[3]; ptrdiff_t n[4], N[4]; ptrdiff_t alloc_local; ptrdiff_t local_ni[4], local_i_start[4]; ptrdiff_t local_no[4], local_o_start[4]; double err, *in, *out; pfft_plan plan_forw=NULL, plan_back=NULL; MPI_Comm comm_cart_3d; pfft_r2r_kind kinds_forw[4], kinds_back[4]; /* Set size of FFT and process mesh */ n[0] = 13; n[1] = 14; n[2] = 19; n[3] = 17; np[0] = 2; np[1] = 2; np[2] = 2; /* Set FFTW kinds of 1d R2R trafos */ kinds_forw[0] = PFFT_REDFT00; kinds_back[0] = PFFT_REDFT00; kinds_forw[1] = PFFT_REDFT01; kinds_back[1] = PFFT_REDFT10; kinds_forw[2] = PFFT_RODFT00; kinds_back[2] = PFFT_RODFT00; kinds_forw[3] = PFFT_RODFT10; kinds_back[3] = PFFT_RODFT01; /* Set logical DFT sizes corresponding to FFTW manual: * for REDFT00 N=2*(n-1), for RODFT00 N=2*(n+1), otherwise N=2*n */ N[0] = 2*(n[0]-1); N[1] = 2*n[1]; N[2] = 2*(n[2]+1); N[3] = 2*n[3]; /* Initialize MPI and PFFT */ MPI_Init(&argc, &argv); pfft_init(); /* Create three-dimensional process grid of size np[0] x np[1] x np[2], if possible */ if( pfft_create_procmesh(3, MPI_COMM_WORLD, np, &comm_cart_3d) ){ pfft_fprintf(MPI_COMM_WORLD, stderr, "Error: This test file only works with %d processes.\n", np[0]*np[1]*np[2]); MPI_Finalize(); return 1; } /* Get parameters of data distribution */ alloc_local = pfft_local_size_r2r(4, n, comm_cart_3d, PFFT_TRANSPOSED_NONE, local_ni, local_i_start, local_no, local_o_start); /* Allocate memory */ in = pfft_alloc_real(alloc_local); out = pfft_alloc_real(alloc_local); /* Plan parallel forward FFT */ plan_forw = pfft_plan_r2r( 4, n, in, out, comm_cart_3d, kinds_forw, PFFT_TRANSPOSED_NONE| PFFT_MEASURE| PFFT_DESTROY_INPUT); /* Plan parallel backward FFT */ plan_back = pfft_plan_r2r( 4, n, out, in, comm_cart_3d, kinds_back, PFFT_TRANSPOSED_NONE| PFFT_MEASURE| PFFT_DESTROY_INPUT); /* Initialize input with random numbers */ pfft_init_input_real(4, n, local_ni, local_i_start, in); /* execute parallel forward FFT */ pfft_execute(plan_forw); /* clear the old input */ pfft_clear_input_real(4, n, local_ni, local_i_start, in); /* execute parallel backward FFT */ pfft_execute(plan_back); /* Scale data */ for(ptrdiff_t l=0; l < local_ni[0] * local_ni[1] * local_ni[2] * local_ni[3]; l++) in[l] /= (N[0]*N[1]*N[2]*N[3]); /* Print error of back transformed data */ MPI_Barrier(MPI_COMM_WORLD); err = pfft_check_output_real(4, n, local_ni, local_i_start, in, comm_cart_3d); pfft_printf(comm_cart_3d, "Error after one forward and backward trafo of size n=(%td, %td, %td, %td):\n", n[0], n[1], n[2], n[3]); pfft_printf(comm_cart_3d, "maxerror = %6.2e;\n", err); /* free mem and finalize */ pfft_destroy_plan(plan_forw); pfft_destroy_plan(plan_back); MPI_Comm_free(&comm_cart_3d); pfft_free(in); pfft_free(out); MPI_Finalize(); return 0; }
int main(int argc, char **argv){ int np[2]; ptrdiff_t n[3], ni[3], no[3], N[3]; ptrdiff_t alloc_local_forw, alloc_local_back, alloc_local, howmany; ptrdiff_t local_ni[3], local_i_start[3]; ptrdiff_t local_n[3], local_start[3]; ptrdiff_t local_no[3], local_o_start[3]; double err, *in, *out; pfft_plan plan_forw=NULL, plan_back=NULL; MPI_Comm comm_cart_2d; fftw_r2r_kind kinds_forw[3], kinds_back[3]; /* Set size of FFT and process mesh */ ni[0] = ni[1] = ni[2] = 16; n[0] = 29; n[1] = 27; n[2] = 31; for(int t=0; t<3; t++) no[t] = ni[t]; np[0] = 2; np[1] = 2; howmany = 1; /* Set PFFT kinds of 1d R2R trafos */ kinds_forw[0] = PFFT_REDFT00; kinds_back[0] = PFFT_REDFT00; kinds_forw[1] = PFFT_REDFT01; kinds_back[1] = PFFT_REDFT10; kinds_forw[2] = PFFT_RODFT00; kinds_back[2] = PFFT_RODFT00; /* Set logical DFT sizes corresponding to FFTW manual: * for REDFT00 N=2*(n-1), for RODFT00 N=2*(n+1), otherwise N=2*n */ N[0] = 2*(n[0]-1); N[1] = 2*n[1]; N[2] = 2*(n[2]+1); /* Initialize MPI and PFFT */ MPI_Init(&argc, &argv); pfft_init(); /* Create two-dimensional process grid of size np[0] x np[1], if possible */ if( pfft_create_procmesh_2d(MPI_COMM_WORLD, np[0], np[1], &comm_cart_2d) ){ pfft_fprintf(MPI_COMM_WORLD, stderr, "Error: This test file only works with %d processes.\n", np[0]*np[1]); MPI_Finalize(); return 1; } /* Get parameters of data distribution */ alloc_local_forw = pfft_local_size_many_r2r(3, n, ni, n, howmany, PFFT_DEFAULT_BLOCKS, PFFT_DEFAULT_BLOCKS, comm_cart_2d, PFFT_TRANSPOSED_OUT, local_ni, local_i_start, local_n, local_start); alloc_local_back = pfft_local_size_many_r2r(3, n, n, no, howmany, PFFT_DEFAULT_BLOCKS, PFFT_DEFAULT_BLOCKS, comm_cart_2d, PFFT_TRANSPOSED_IN, local_n, local_start, local_no, local_o_start); /* Allocate enough memory for both trafos */ alloc_local = (alloc_local_forw > alloc_local_back) ? alloc_local_forw : alloc_local_back; in = fftw_alloc_real(alloc_local); out = fftw_alloc_real(alloc_local); /* Plan parallel forward FFT */ plan_forw = pfft_plan_many_r2r( 3, n, ni, n, howmany, PFFT_DEFAULT_BLOCKS, PFFT_DEFAULT_BLOCKS, in, out, comm_cart_2d, kinds_forw, PFFT_TRANSPOSED_OUT| PFFT_MEASURE| PFFT_DESTROY_INPUT); /* Plan parallel backward FFT */ plan_back = pfft_plan_many_r2r( 3, n, n, no, howmany, PFFT_DEFAULT_BLOCKS, PFFT_DEFAULT_BLOCKS, out, in, comm_cart_2d, kinds_back, PFFT_TRANSPOSED_IN| PFFT_MEASURE| PFFT_DESTROY_INPUT); /* Initialize input with random numbers */ pfft_init_input_real_3d(ni, local_ni, local_i_start, in); /* Execute parallel forward FFT */ pfft_execute(plan_forw); /* clear the old input */ pfft_clear_input_real_3d(ni, local_ni, local_i_start, in); /* execute parallel backward FFT */ pfft_execute(plan_back); /* Scale data */ for(ptrdiff_t l=0; l < local_ni[0] * local_ni[1] * local_ni[2]; l++) in[l] /= (N[0]*N[1]*N[2]); /* Print error of back transformed data */ MPI_Barrier(MPI_COMM_WORLD); err = pfft_check_output_real_3d(ni, local_ni, local_i_start, in, comm_cart_2d); pfft_printf(comm_cart_2d, "Error after one forward and backward trafo of size n=(%td, %td, %td):\n", n[0], n[1], n[2]); pfft_printf(comm_cart_2d, "maxerror = %6.2e;\n", err); /* free mem and finalize MPI */ pfft_destroy_plan(plan_forw); pfft_destroy_plan(plan_back); MPI_Comm_free(&comm_cart_2d); fftw_free(in); fftw_free(out); MPI_Finalize(); return 0; }
int main(int argc, char **argv) { int np[2]; ptrdiff_t n[3]; ptrdiff_t alloc_local; ptrdiff_t local_ni[3], local_i_start[3]; ptrdiff_t local_no[3], local_o_start[3]; double err; pfft_complex *in, *out; pfft_plan plan_forw=NULL, plan_back=NULL; MPI_Comm comm_cart_2d; /* Set size of FFT and process mesh */ n[0] = 2; n[1] = 2; n[2] = 4; np[0] = 2; np[1] = 2; /* Initialize MPI and PFFT */ MPI_Init(&argc, &argv); pfft_init(); /* Create two-dimensional process grid of size np[0] x np[1], if possible */ if( pfft_create_procmesh_2d(MPI_COMM_WORLD, np[0], np[1], &comm_cart_2d) ){ pfft_fprintf(MPI_COMM_WORLD, stderr, "Error: This test file only works with %d processes.\n", np[0]*np[1]); MPI_Finalize(); return 1; } /* Get parameters of data distribution */ alloc_local = pfft_local_size_dft_3d(n, comm_cart_2d, PFFT_TRANSPOSED_NONE, local_ni, local_i_start, local_no, local_o_start); /* Allocate memory */ in = pfft_alloc_complex(alloc_local); out = pfft_alloc_complex(alloc_local); /* Plan parallel forward FFT */ plan_forw = pfft_plan_dft_3d( n, in, out, comm_cart_2d, PFFT_FORWARD, PFFT_TRANSPOSED_NONE| PFFT_MEASURE| PFFT_DESTROY_INPUT); /* Plan parallel backward FFT */ plan_back = pfft_plan_dft_3d( n, out, in, comm_cart_2d, PFFT_BACKWARD, PFFT_TRANSPOSED_NONE| PFFT_MEASURE| PFFT_DESTROY_INPUT); /* Initialize input with random numbers */ pfft_init_input_complex_3d(n, local_ni, local_i_start, in); /* Print input data */ pfft_apr_complex_3d( in, local_ni, local_i_start, "PFFT, g_hat", MPI_COMM_WORLD); /* execute parallel forward FFT */ pfft_execute(plan_forw); /* Print transformed data */ pfft_apr_complex_3d( out, local_no, local_o_start, "PFFT, g", MPI_COMM_WORLD); /* execute parallel backward FFT */ pfft_execute(plan_back); /* Scale data */ for(ptrdiff_t l=0; l < local_ni[0] * local_ni[1] * local_ni[2]; l++) in[l] /= (n[0]*n[1]*n[2]); /* Print back transformed data */ pfft_apr_complex_3d( in, local_ni, local_i_start, "PFFT^H, g_hat", MPI_COMM_WORLD); /* Print error of back transformed data */ MPI_Barrier(MPI_COMM_WORLD); err = pfft_check_output_complex_3d(n, local_ni, local_i_start, in, comm_cart_2d); pfft_printf(comm_cart_2d, "Error after one forward and backward trafo of size n=(%td, %td, %td):\n", n[0], n[1], n[2]); pfft_printf(comm_cart_2d, "maxerror = %6.2e;\n", err); /* free mem and finalize MPI */ pfft_destroy_plan(plan_forw); pfft_destroy_plan(plan_back); MPI_Comm_free(&comm_cart_2d); pfft_free(in); pfft_free(out); MPI_Finalize(); return 0; }
int main(int argc, char **argv){ ptrdiff_t n[3], gc_below[3], gc_above[3]; ptrdiff_t local_ni[3], local_i_start[3]; ptrdiff_t local_no[3], local_o_start[3]; ptrdiff_t local_ngc[3], local_gc_start[3]; ptrdiff_t alloc_local, alloc_local_gc; int np[3], rnk_self, size, verbose; double err; MPI_Comm comm_cart_2d; pfft_complex *data; pfft_gcplan ths; MPI_Init(&argc, &argv); pfft_init(); MPI_Comm_rank(MPI_COMM_WORLD, &rnk_self); MPI_Comm_size(MPI_COMM_WORLD, &size); /* default values */ n[0] = n[1] = n[2] = 8; /* n[0] = 3; n[1] = 5; n[2] = 7;*/ np[0]=2; np[1]=2; np[2] = 1; verbose = 0; for(int t=0; t<3; t++){ gc_below[t] = 0; gc_above[t] = 0; } gc_below[0] = 0; gc_above[0] = 8; /* set values by commandline */ init_parameters(argc, argv, n, np, gc_below, gc_above, &verbose); /* Create two-dimensional process grid of size np[0] x np[1], if possible */ if( pfft_create_procmesh_2d(MPI_COMM_WORLD, np[0], np[1], &comm_cart_2d) ){ pfft_fprintf(MPI_COMM_WORLD, stderr, "Error: This test file only works with %d processes.\n", np[0]*np[1]); MPI_Finalize(); return 1; } /* Get parameters of data distribution */ alloc_local = pfft_local_size_dft_3d(n, comm_cart_2d, PFFT_TRANSPOSED_NONE, local_ni, local_i_start, local_no, local_o_start); alloc_local_gc = pfft_local_size_gc_3d( local_ni, local_i_start, alloc_local, gc_below, gc_above, local_ngc, local_gc_start); /* Allocate memory */ data = pfft_alloc_complex(alloc_local_gc); /* Plan parallel ghost cell send */ ths = pfft_plan_cgc_3d(n, gc_below, gc_above, data, comm_cart_2d, PFFT_GC_NONTRANSPOSED); /* Initialize input with random numbers */ pfft_init_input_c2c_3d(n, local_ni, local_i_start, data); /* check gcell input */ if(verbose) pfft_apr_complex_3d(data, local_ni, local_i_start, "gcell input", comm_cart_2d); /* Execute parallel ghost cell send */ pfft_exchange(ths); /* Check gcell output */ if(verbose) pfft_apr_complex_3d(data, local_ngc, local_gc_start, "exchanged gcells", comm_cart_2d); /* Execute adjoint parallel ghost cell send */ pfft_reduce(ths); /* check input */ if(verbose) pfft_apr_complex_3d(data, local_no, local_o_start, "reduced gcells", comm_cart_2d); /* Scale data */ for(ptrdiff_t l=0; l < local_ni[0] * local_ni[1] * local_ni[2]; l++) data[l] /= 3; /* Print error of back transformed data */ MPI_Barrier(comm_cart_2d); err = pfft_check_output_c2c_3d(n, local_ni, local_i_start, data, comm_cart_2d); pfft_printf(comm_cart_2d, "Error after one gcell exchange and reduce of size n=(%td, %td, %td),\n", n[0], n[1], n[2]); pfft_printf(comm_cart_2d, "gc_below = (%td, %td, %td), gc_above = (%td, %td, %td):\n", gc_below[0], gc_below[1], gc_below[2], gc_above[0], gc_above[1], gc_above[2]); pfft_printf(comm_cart_2d, "maxerror = %6.2e;\n", err); /* free mem and finalize */ pfft_destroy_gcplan(ths); MPI_Comm_free(&comm_cart_2d); pfft_free(data); MPI_Finalize(); return 0; }
int main(int argc, char **argv) { int nthreads=1; /*number of threads to initialize openmp with*/ int runs=1; /*number of runs for testing*/ int np[2]; ptrdiff_t n[3]; ptrdiff_t alloc_local; ptrdiff_t local_ni[3], local_i_start[3]; ptrdiff_t local_no[3], local_o_start[3]; double err; pfft_complex *in, *out; pfft_plan plan_forw=NULL, plan_back=NULL; MPI_Comm comm_cart_2d; /* Init OpenMP */ pfft_get_args(argc,argv,"-pfft_omp_threads",1,PFFT_INT,&nthreads); pfft_get_args(argc,argv,"-pfft_runs",1,PFFT_INT,&runs); pfft_plan_with_nthreads(nthreads); /* Set size of FFT and process mesh */ n[0] = NNN;n[1] =NNN; n[2] =NNN; np[0] = 1; np[1] = 1; /* Initialize MPI and PFFT */ MPI_Init(&argc, &argv); pfft_init(); pfft_plan_with_nthreads(nthreads); pfft_printf(MPI_COMM_WORLD, "# %4d threads will be used for openmp (default is 1)\n", nthreads); /* Create two-dimensional process grid of size np[0] x np[1], if possible */ if( pfft_create_procmesh_2d(MPI_COMM_WORLD, np[0], np[1], &comm_cart_2d) ){ pfft_fprintf(MPI_COMM_WORLD, stderr, "Error: This test file only works with %d processes.\n", np[0]*np[1]); MPI_Finalize(); return 1; } /* Get parameters of data distribution */ alloc_local = pfft_local_size_dft_3d(n, comm_cart_2d, PFFT_TRANSPOSED_NONE, local_ni, local_i_start, local_no, local_o_start); /* Allocate memory */ in = pfft_alloc_complex(alloc_local); out = pfft_alloc_complex(alloc_local); /* Plan parallel forward FFT */ plan_forw = pfft_plan_dft_3d( n, in, out, comm_cart_2d, PFFT_FORWARD, PFFT_TRANSPOSED_OUT| PFFT_MEASURE| PFFT_DESTROY_INPUT| PFFT_TUNE| PFFT_SHIFTED_IN); /* Plan parallel backward FFT */ plan_back = pfft_plan_dft_3d( n, out, in, comm_cart_2d, PFFT_BACKWARD, PFFT_TRANSPOSED_IN| PFFT_MEASURE| PFFT_DESTROY_INPUT| PFFT_TUNE| PFFT_SHIFTED_OUT); /* Initialize input with random numbers */ pfft_init_input_complex_3d(n, local_ni, local_i_start, in); for(int i=0; i<runs; i++) { /* execute parallel forward FFT */ pfft_execute(plan_forw); /* clear the old input */ /* pfft_clear_input_complex_3d(n, local_ni, local_i_start, in); */ /* execute parallel backward FFT */ pfft_execute(plan_back); /* Scale data */ ptrdiff_t l; for(l=0; l < local_ni[0] * local_ni[1] * local_ni[2]; l++) in[l] /= (n[0]*n[1]*n[2]); } pfft_print_average_timer_adv(plan_forw, MPI_COMM_WORLD); pfft_print_average_timer_adv(plan_back, MPI_COMM_WORLD); /* Print error of back transformed data */ err = pfft_check_output_complex_3d(n, local_ni, local_i_start, in, comm_cart_2d); pfft_printf(comm_cart_2d, "Error after %d forward and backward trafos of size n=(%td, %td, %td):\n", runs, n[0], n[1], n[2]); pfft_printf(comm_cart_2d, "maxerror = %6.2e;\n", err); /* free mem and finalize */ pfft_destroy_plan(plan_forw); pfft_destroy_plan(plan_back); MPI_Comm_free(&comm_cart_2d); pfft_free(in); pfft_free(out); MPI_Finalize(); return 0; }
int main(int argc, char **argv) { int np[2]; ptrdiff_t n[3], N[3]; ptrdiff_t alloc_local; ptrdiff_t local_ni[3], local_i_start[3]; ptrdiff_t local_no[3], local_o_start[3]; double err, *in, *out; pfft_plan plan_forw=NULL, plan_back=NULL; MPI_Comm comm_cart_2d; pfft_r2r_kind kinds_forw[3], kinds_back[3]; /* Set size of FFT and process mesh */ n[0] = 29; n[1] = 27; n[2] = 31; np[0] = 2; np[1] = 2; /* Set FFTW kinds of 1d R2R trafos */ kinds_forw[0] = PFFT_REDFT00; kinds_back[0] = PFFT_REDFT00; kinds_forw[1] = PFFT_REDFT01; kinds_back[1] = PFFT_REDFT10; kinds_forw[2] = PFFT_RODFT00; kinds_back[2] = PFFT_RODFT00; /* Set logical DFT sizes corresponding to FFTW manual: * for REDFT00 N=2*(n-1), for RODFT00 N=2*(n+1), otherwise N=2*n */ N[0] = 2*(n[0]-1); N[1] = 2*n[1]; N[2] = 2*(n[2]+1); /* Initialize MPI and PFFT */ MPI_Init(&argc, &argv); pfft_init(); /* Create two-dimensional process grid of size np[0] x np[1], if possible */ if( pfft_create_procmesh_2d(MPI_COMM_WORLD, np[0], np[1], &comm_cart_2d) ){ pfft_fprintf(MPI_COMM_WORLD, stderr, "Error: This test file only works with %d processes.\n", np[0]*np[1]); MPI_Finalize(); return 1; } /* Get parameters of data distribution */ alloc_local = pfft_local_size_r2r_3d(n, comm_cart_2d, PFFT_TRANSPOSED_OUT, local_ni, local_i_start, local_no, local_o_start); /* Allocate memory */ in = pfft_alloc_real(alloc_local); out = pfft_alloc_real(alloc_local); /* Plan parallel forward FFT */ plan_forw = pfft_plan_r2r_3d( n, in, out, comm_cart_2d, kinds_forw, PFFT_TRANSPOSED_OUT| PFFT_MEASURE| PFFT_DESTROY_INPUT); /* Plan parallel backward FFT */ plan_back = pfft_plan_r2r_3d( n, out, in, comm_cart_2d, kinds_back, PFFT_TRANSPOSED_IN| PFFT_MEASURE| PFFT_DESTROY_INPUT); /* Initialize input with random numbers */ pfft_init_input_real_3d(n, local_ni, local_i_start, in); int myrank, size; MPI_Comm_rank(MPI_COMM_WORLD, &myrank); MPI_Comm_size(MPI_COMM_WORLD, &size); // for(int t=0; t<size; t++){ // if(t == myrank){ // int m=0; // for(int k0=0; k0<local_ni[0]; k0++) // for(int k1=0; k1<local_ni[1]; k1++){ // for(int k2=0; k2<local_ni[2]; k2++, m++) // printf("in[%d, %d, %d] = %.2f\t", k0+local_i_start[0], k1+local_i_start[1], k2+local_i_start[2], in[m]); // printf("\n"); // } // } // fflush(stderr); // MPI_Barrier(MPI_COMM_WORLD); // } /* execute parallel forward FFT */ pfft_execute(plan_forw); // for(int t=0; t<size; t++){ // if(t == myrank){ // int m=0; // for(int k1=0; k1<local_no[1]; k1++) // for(int k2=0; k2<local_no[2]; k2++){ // for(int k0=0; k0<local_no[0]; k0++, m++) // printf("out[%d, %d, %d] = %.2f\t", k0+local_o_start[0], k1+local_o_start[1], k2+local_o_start[2], out[m]); // printf("\n"); // } // } // fflush(stderr); // MPI_Barrier(MPI_COMM_WORLD); // } /* execute parallel backward FFT */ pfft_execute(plan_back); /* Scale data */ for(ptrdiff_t l=0; l < local_ni[0] * local_ni[1] * local_ni[2]; l++) in[l] /= (N[0]*N[1]*N[2]); /* Print error of back transformed data */ MPI_Barrier(MPI_COMM_WORLD); err = pfft_check_output_real_3d(n, local_ni, local_i_start, in, comm_cart_2d); pfft_printf(comm_cart_2d, "Error after one forward and backward trafo of size n=(%td, %td, %td):\n", n[0], n[1], n[2]); pfft_printf(comm_cart_2d, "maxerror = %6.2e;\n", err); /* free mem and finalize */ pfft_destroy_plan(plan_forw); pfft_destroy_plan(plan_back); MPI_Comm_free(&comm_cart_2d); pfft_free(in); pfft_free(out); MPI_Finalize(); return 0; }