/** discrete mpolar FFT */ static int mpolar_dft(fftw_complex *f_hat, int NN, fftw_complex *f, int T, int R, int m) { ticks t0, t1; int j,k; /**< index for nodes and freqencies */ nfft_plan my_nfft_plan; /**< plan for the nfft-2D */ double *x, *w; /**< knots and associated weights */ int N[2],n[2]; int M; /**< number of knots */ N[0]=NN; n[0]=2*N[0]; /**< oversampling factor sigma=2 */ N[1]=NN; n[1]=2*N[1]; /**< oversampling factor sigma=2 */ x = (double *)nfft_malloc(5*(T/2)*R*(sizeof(double))); if (x==NULL) return -1; w = (double *)nfft_malloc(5*(T*R)/4*(sizeof(double))); if (w==NULL) return -1; /** init two dimensional NFFT plan */ M=mpolar_grid(T,R,x,w); nfft_init_guru(&my_nfft_plan, 2, N, M, n, m, PRE_PHI_HUT| PRE_PSI| MALLOC_X | MALLOC_F_HAT| MALLOC_F| FFTW_INIT | FFT_OUT_OF_PLACE, FFTW_MEASURE| FFTW_DESTROY_INPUT); /** init nodes from mpolar grid*/ for(j=0;j<my_nfft_plan.M_total;j++) { my_nfft_plan.x[2*j+0] = x[2*j+0]; my_nfft_plan.x[2*j+1] = x[2*j+1]; } /** init Fourier coefficients from given image */ for(k=0;k<my_nfft_plan.N_total;k++) my_nfft_plan.f_hat[k] = f_hat[k]; t0 = getticks(); /** NDFT-2D */ nfft_trafo_direct(&my_nfft_plan); t1 = getticks(); GLOBAL_elapsed_time = nfft_elapsed_seconds(t1,t0); /** copy result */ for(j=0;j<my_nfft_plan.M_total;j++) f[j] = my_nfft_plan.f[j]; /** finalise the plans and free the variables */ nfft_finalize(&my_nfft_plan); nfft_free(x); nfft_free(w); return EXIT_SUCCESS; }
static void simple_test_nfft_1d(void) { nfft_plan p; double t; int N=14; int M=19; ticks t0, t1; /** init an one dimensional plan */ nfft_init_1d(&p,N,M); /** init pseudo random nodes */ nfft_vrand_shifted_unit_double(p.x,p.M_total); /** precompute psi, the entries of the matrix B */ if(p.nfft_flags & PRE_ONE_PSI) nfft_precompute_one_psi(&p); /** init pseudo random Fourier coefficients and show them */ nfft_vrand_unit_complex(p.f_hat,p.N_total); nfft_vpr_complex(p.f_hat,p.N_total,"given Fourier coefficients, vector f_hat"); /** direct trafo and show the result */ t0 = getticks(); nfft_trafo_direct(&p); t1 = getticks(); t = nfft_elapsed_seconds(t1,t0); nfft_vpr_complex(p.f,p.M_total,"ndft, vector f"); printf(" took %e seconds.\n",t); /** approx. trafo and show the result */ nfft_trafo(&p); nfft_vpr_complex(p.f,p.M_total,"nfft, vector f"); /** approx. adjoint and show the result */ nfft_adjoint_direct(&p); nfft_vpr_complex(p.f_hat,p.N_total,"adjoint ndft, vector f_hat"); /** approx. adjoint and show the result */ nfft_adjoint(&p); nfft_vpr_complex(p.f_hat,p.N_total,"adjoint nfft, vector f_hat"); /** finalise the one dimensional plan */ nfft_finalize(&p); }
/** * Compares execution times for the fast and discrete Gauss transform. * * \arg ths The pointer to the fgt plan * \arg dgt If this parameter is set \ref dgt_trafo is called as well * * \author Stefan Kunis */ double fgt_test_measure_time(fgt_plan *ths, unsigned dgt) { int r; ticks t0, t1; double t_out; double tau=0.01; t_out=0; r=0; while(t_out<tau) { r++; t0 = getticks(); if (dgt) dgt_trafo(ths); else fgt_trafo(ths); t1 = getticks(); t_out += nfft_elapsed_seconds(t1,t0); } t_out/=r; return t_out; }
int main(int argc, char **argv) { int j,k,t; /**< indices */ int d; /**< number of dimensions */ int N; /**< number of source nodes */ int M; /**< number of target nodes */ int n; /**< expansion degree */ int m; /**< cut-off parameter */ int p; /**< degree of smoothness */ char *s; /**< name of kernel */ double _Complex (*kernel)(double , int , const double *); /**< kernel function */ double c; /**< parameter for kernel */ fastsum_plan my_fastsum_plan; /**< plan for fast summation */ double _Complex *direct; /**< array for direct computation */ ticks t0, t1; /**< for time measurement */ double time; /**< for time measurement */ double error=0.0; /**< for error computation */ double eps_I; /**< inner boundary */ double eps_B; /**< outer boundary */ if (argc!=11) { printf("\nfastsum_test d N M n m p kernel c eps_I eps_B\n\n"); printf(" d dimension \n"); printf(" N number of source nodes \n"); printf(" M number of target nodes \n"); printf(" n expansion degree \n"); printf(" m cut-off parameter \n"); printf(" p degree of smoothness \n"); printf(" kernel kernel function (e.g., gaussian)\n"); printf(" c kernel parameter \n"); printf(" eps_I inner boundary \n"); printf(" eps_B outer boundary \n\n"); exit(-1); } else { d=atoi(argv[1]); N=atoi(argv[2]); c=1.0/pow((double)N,1.0/(double)d); M=atoi(argv[3]); n=atoi(argv[4]); m=atoi(argv[5]); p=atoi(argv[6]); s=argv[7]; c=atof(argv[8]); eps_I=atof(argv[9]); eps_B=atof(argv[10]); if (strcmp(s,"gaussian")==0) kernel = gaussian; else if (strcmp(s,"multiquadric")==0) kernel = multiquadric; else if (strcmp(s,"inverse_multiquadric")==0) kernel = inverse_multiquadric; else if (strcmp(s,"logarithm")==0) kernel = logarithm; else if (strcmp(s,"thinplate_spline")==0) kernel = thinplate_spline; else if (strcmp(s,"one_over_square")==0) kernel = one_over_square; else if (strcmp(s,"one_over_modulus")==0) kernel = one_over_modulus; else if (strcmp(s,"one_over_x")==0) kernel = one_over_x; else if (strcmp(s,"inverse_multiquadric3")==0) kernel = inverse_multiquadric3; else if (strcmp(s,"sinc_kernel")==0) kernel = sinc_kernel; else if (strcmp(s,"cosc")==0) kernel = cosc; else if (strcmp(s,"cot")==0) kernel = kcot; else { s="multiquadric"; kernel = multiquadric; } } printf("d=%d, N=%d, M=%d, n=%d, m=%d, p=%d, kernel=%s, c=%g, eps_I=%g, eps_B=%g \n",d,N,M,n,m,p,s,c,eps_I,eps_B); #ifdef NF_KUB printf("nearfield correction using piecewise cubic Lagrange interpolation\n"); #elif defined(NF_QUADR) printf("nearfield correction using piecewise quadratic Lagrange interpolation\n"); #elif defined(NF_LIN) printf("nearfield correction using piecewise linear Lagrange interpolation\n"); #endif #ifdef _OPENMP #pragma omp parallel { #pragma omp single { printf("nthreads=%d\n", omp_get_max_threads()); } } fftw_init_threads(); #endif /** init d-dimensional fastsum plan */ fastsum_init_guru(&my_fastsum_plan, d, N, M, kernel, &c, 0, n, m, p, eps_I, eps_B); //fastsum_init_guru(&my_fastsum_plan, d, N, M, kernel, &c, NEARFIELD_BOXES, n, m, p, eps_I, eps_B); if (my_fastsum_plan.flags & NEARFIELD_BOXES) printf("determination of nearfield candidates based on partitioning into boxes\n"); else printf("determination of nearfield candidates based on search tree\n"); /** init source knots in a d-ball with radius 0.25-eps_b/2 */ k = 0; while (k < N) { double r_max = 0.25 - my_fastsum_plan.eps_B/2.0; double r2 = 0.0; for (j=0; j<d; j++) my_fastsum_plan.x[k*d+j] = 2.0 * r_max * (double)rand()/(double)RAND_MAX - r_max; for (j=0; j<d; j++) r2 += my_fastsum_plan.x[k*d+j] * my_fastsum_plan.x[k*d+j]; if (r2 >= r_max * r_max) continue; k++; } for (k=0; k<N; k++) { /* double r=(0.25-my_fastsum_plan.eps_B/2.0)*pow((double)rand()/(double)RAND_MAX,1.0/d); my_fastsum_plan.x[k*d+0] = r; for (j=1; j<d; j++) { double phi=2.0*KPI*(double)rand()/(double)RAND_MAX; my_fastsum_plan.x[k*d+j] = r; for (t=0; t<j; t++) { my_fastsum_plan.x[k*d+t] *= cos(phi); } my_fastsum_plan.x[k*d+j] *= sin(phi); } */ my_fastsum_plan.alpha[k] = (double)rand()/(double)RAND_MAX + _Complex_I*(double)rand()/(double)RAND_MAX; } /** init target knots in a d-ball with radius 0.25-eps_b/2 */ k = 0; while (k < M) { double r_max = 0.25 - my_fastsum_plan.eps_B/2.0; double r2 = 0.0; for (j=0; j<d; j++) my_fastsum_plan.y[k*d+j] = 2.0 * r_max * (double)rand()/(double)RAND_MAX - r_max; for (j=0; j<d; j++) r2 += my_fastsum_plan.y[k*d+j] * my_fastsum_plan.y[k*d+j]; if (r2 >= r_max * r_max) continue; k++; } /* for (k=0; k<M; k++) { double r=(0.25-my_fastsum_plan.eps_B/2.0)*pow((double)rand()/(double)RAND_MAX,1.0/d); my_fastsum_plan.y[k*d+0] = r; for (j=1; j<d; j++) { double phi=2.0*KPI*(double)rand()/(double)RAND_MAX; my_fastsum_plan.y[k*d+j] = r; for (t=0; t<j; t++) { my_fastsum_plan.y[k*d+t] *= cos(phi); } my_fastsum_plan.y[k*d+j] *= sin(phi); } } */ /** direct computation */ printf("direct computation: "); fflush(NULL); t0 = getticks(); fastsum_exact(&my_fastsum_plan); t1 = getticks(); time=nfft_elapsed_seconds(t1,t0); printf("%fsec\n",time); /** copy result */ direct = (double _Complex *)nfft_malloc(my_fastsum_plan.M_total*(sizeof(double _Complex))); for (j=0; j<my_fastsum_plan.M_total; j++) direct[j]=my_fastsum_plan.f[j]; /** precomputation */ printf("pre-computation: "); fflush(NULL); t0 = getticks(); fastsum_precompute(&my_fastsum_plan); t1 = getticks(); time=nfft_elapsed_seconds(t1,t0); printf("%fsec\n",time); /** fast computation */ printf("fast computation: "); fflush(NULL); t0 = getticks(); fastsum_trafo(&my_fastsum_plan); t1 = getticks(); time=nfft_elapsed_seconds(t1,t0); printf("%fsec\n",time); /** compute max error */ error=0.0; for (j=0; j<my_fastsum_plan.M_total; j++) { if (cabs(direct[j]-my_fastsum_plan.f[j])/cabs(direct[j])>error) error=cabs(direct[j]-my_fastsum_plan.f[j])/cabs(direct[j]); } printf("max relative error: %e\n",error); /** finalise the plan */ fastsum_finalize(&my_fastsum_plan); return 0; }
void bench_openmp(int trafo_adjoint, int N, int M, double *x, C *f_hat, C *f, int m, int nfsft_flags, int psi_flags) { nfsft_plan plan; int k, n; // int N, M, trafo_adjoint; int t, j; ticks t0, t1; double tt_total, tt_pre; // fscanf(infile, "%d %d %d", &trafo_adjoint, &N, &M); /*#ifdef _OPENMP fftw_import_wisdom_from_filename("nfsft_benchomp_detail_threads.plan"); #else fftw_import_wisdom_from_filename("nfsft_benchomp_detail_single.plan"); #endif*/ /* precomputation (for fast polynomial transform) */ // nfsft_precompute(N,1000.0,0U,0U); /* Initialize transform plan using the guru interface. All input and output * arrays are allocated by nfsft_init_guru(). Computations are performed with * respect to L^2-normalized spherical harmonics Y_k^n. The array of spherical * Fourier coefficients is preserved during transformations. The NFFT uses a * cut-off parameter m = 6. See the NFFT 3 manual for details. */ nfsft_init_guru(&plan, N, M, nfsft_flags | NFSFT_MALLOC_X | NFSFT_MALLOC_F | NFSFT_MALLOC_F_HAT | NFSFT_NORMALIZED | NFSFT_PRESERVE_F_HAT, PRE_PHI_HUT | psi_flags | FFTW_INIT | FFT_OUT_OF_PLACE, m); /*#ifdef _OPENMP fftw_export_wisdom_to_filename("nfsft_benchomp_detail_threads.plan"); #else fftw_export_wisdom_to_filename("nfsft_benchomp_detail_single.plan"); #endif*/ for (j=0; j < plan.M_total; j++) { for (t=0; t < 2; t++) // fscanf(infile, "%lg", plan.x+2*j+t); plan.x[2*j+t] = x[2*j+t]; } if (trafo_adjoint==0) { memset(plan.f_hat,0U,plan.N_total*sizeof(double _Complex)); for (k = 0; k <= plan.N; k++) for (n = -k; n <= k; n++) { // fscanf(infile, "%lg %lg", &re, &im); // plan.f_hat[NFSFT_INDEX(k,n,&plan)] = re + _Complex_I * im; plan.f_hat[NFSFT_INDEX(k,n,&plan)] = f_hat[NFSFT_INDEX(k,n,&plan)]; } } else { for (j=0; j < plan.M_total; j++) { // fscanf(infile, "%lg %lg", &re, &im); // plan.f[j] = re + _Complex_I * im; plan.f[j] = f[j]; } memset(plan.f_hat,0U,plan.N_total*sizeof(double _Complex)); } t0 = getticks(); /* precomputation (for NFFT, node-dependent) */ nfsft_precompute_x(&plan); t1 = getticks(); tt_pre = nfft_elapsed_seconds(t1,t0); if (trafo_adjoint==0) nfsft_trafo(&plan); else nfsft_adjoint(&plan); t1 = getticks(); tt_total = nfft_elapsed_seconds(t1,t0); #ifndef MEASURE_TIME plan.MEASURE_TIME_t[0] = 0.0; plan.MEASURE_TIME_t[2] = 0.0; #endif #ifndef MEASURE_TIME_FFTW plan.MEASURE_TIME_t[1] = 0.0; #endif printf("%.6e %.6e %6e %.6e %.6e %.6e\n", tt_pre, plan.MEASURE_TIME_t[0], plan.MEASURE_TIME_t[1], plan.MEASURE_TIME_t[2], tt_total-tt_pre-plan.MEASURE_TIME_t[0]-plan.MEASURE_TIME_t[1]-plan.MEASURE_TIME_t[2], tt_total); /** finalise the one dimensional plan */ nfsft_finalize(&plan); }
/** * The main program. * * \param argc The number of arguments * \param argv An array containing the arguments as C-strings * * \return Exit code */ int main (int argc, char **argv) { int tc; /**< The index variable for testcases */ int tc_max; /**< The number of testcases */ int *NQ; /**< The array containing the cut-off degrees * \f$N\f$ */ int NQ_max; /**< The maximum cut-off degree \f$N\f$ for the* current testcase */ int *SQ; /**< The array containing the grid size parameters */ int SQ_max; /**< The maximum grid size parameter */ int *RQ; /**< The array containing the grid size parameters */ int iNQ; /**< Index variable for cut-off degrees */ int iNQ_max; /**< The maximum number of cut-off degrees */ int testfunction; /**< The testfunction */ int N; /**< The test function's bandwidth */ int use_nfsft; /**< Whether to use the NFSFT algorithm or not */ int use_nfft; /**< Whether to use the NFFT algorithm or not */ int use_fpt; /**< Whether to use the FPT algorithm or not */ int cutoff; /**< The current NFFT cut-off parameter */ double threshold; /**< The current NFSFT threshold parameter */ int gridtype; /**< The type of quadrature grid to be used */ int repetitions; /**< The number of repetitions to be performed */ int mode; /**< The number of repetitions to be performed */ double *w; /**< The quadrature weights */ double *x_grid; /**< The quadrature nodes */ double *x_compare; /**< The quadrature nodes */ double _Complex *f_grid; /**< The reference function values */ double _Complex *f_compare; /**< The function values */ double _Complex *f; /**< The function values */ double _Complex *f_hat_gen; /**< The reference spherical Fourier * coefficients */ double _Complex *f_hat; /**< The spherical Fourier coefficients */ nfsft_plan plan_adjoint; /**< The NFSFT plan */ nfsft_plan plan; /**< The NFSFT plan */ nfsft_plan plan_gen; /**< The NFSFT plan */ double t_avg; /**< The average computation time needed */ double err_infty_avg; /**< The average error \f$E_\infty\f$ */ double err_2_avg; /**< The average error \f$E_2\f$ */ int i; /**< A loop variable */ int k; /**< A loop variable */ int n; /**< A loop variable */ int d; /**< A loop variable */ int m_theta; /**< The current number of different * colatitudinal angles (for grids) */ int m_phi; /**< The current number of different * longitudinal angles (for grids). */ int m_total; /**< The total number nodes. */ double *theta; /**< An array for saving the angles theta of a * grid */ double *phi; /**< An array for saving the angles phi of a * grid */ fftw_plan fplan; /**< An FFTW plan for computing Clenshaw-Curtis quadrature weights */ //int nside; /**< The size parameter for the HEALPix grid */ int d2; int M; double theta_s; double x1,x2,x3,temp; int m_compare; nfsft_plan *plan_adjoint_ptr; nfsft_plan *plan_ptr; double *w_temp; int testmode; ticks t0, t1; /* Read the number of testcases. */ fscanf(stdin,"testcases=%d\n",&tc_max); fprintf(stdout,"%d\n",tc_max); /* Process each testcase. */ for (tc = 0; tc < tc_max; tc++) { /* Check if the fast transform shall be used. */ fscanf(stdin,"nfsft=%d\n",&use_nfsft); fprintf(stdout,"%d\n",use_nfsft); if (use_nfsft != NO) { /* Check if the NFFT shall be used. */ fscanf(stdin,"nfft=%d\n",&use_nfft); fprintf(stdout,"%d\n",use_nfsft); if (use_nfft != NO) { /* Read the cut-off parameter. */ fscanf(stdin,"cutoff=%d\n",&cutoff); fprintf(stdout,"%d\n",cutoff); } else { /* TODO remove this */ /* Initialize unused variable with dummy value. */ cutoff = 1; } /* Check if the fast polynomial transform shall be used. */ fscanf(stdin,"fpt=%d\n",&use_fpt); fprintf(stdout,"%d\n",use_fpt); if (use_fpt != NO) { /* Read the NFSFT threshold parameter. */ fscanf(stdin,"threshold=%lf\n",&threshold); fprintf(stdout,"%lf\n",threshold); } else { /* TODO remove this */ /* Initialize unused variable with dummy value. */ threshold = 1000.0; } } else { /* TODO remove this */ /* Set dummy values. */ use_nfft = NO; use_fpt = NO; cutoff = 3; threshold = 1000.0; } /* Read the testmode type. */ fscanf(stdin,"testmode=%d\n",&testmode); fprintf(stdout,"%d\n",testmode); if (testmode == ERROR) { /* Read the quadrature grid type. */ fscanf(stdin,"gridtype=%d\n",&gridtype); fprintf(stdout,"%d\n",gridtype); /* Read the test function. */ fscanf(stdin,"testfunction=%d\n",&testfunction); fprintf(stdout,"%d\n",testfunction); /* Check if random bandlimited function has been chosen. */ if (testfunction == FUNCTION_RANDOM_BANDLIMITED) { /* Read the bandwidht. */ fscanf(stdin,"bandlimit=%d\n",&N); fprintf(stdout,"%d\n",N); } else { N = 1; } /* Read the number of repetitions. */ fscanf(stdin,"repetitions=%d\n",&repetitions); fprintf(stdout,"%d\n",repetitions); fscanf(stdin,"mode=%d\n",&mode); fprintf(stdout,"%d\n",mode); if (mode == RANDOM) { /* Read the bandwidht. */ fscanf(stdin,"points=%d\n",&m_compare); fprintf(stdout,"%d\n",m_compare); x_compare = (double*) nfft_malloc(2*m_compare*sizeof(double)); d = 0; while (d < m_compare) { x1 = 2.0*(((double)rand())/RAND_MAX) - 1.0; x2 = 2.0*(((double)rand())/RAND_MAX) - 1.0; x3 = 2.0*(((double)rand())/RAND_MAX) - 1.0; temp = sqrt(x1*x1+x2*x2+x3*x3); if (temp <= 1) { x_compare[2*d+1] = acos(x3); if (x_compare[2*d+1] == 0 || x_compare[2*d+1] == KPI) { x_compare[2*d] = 0.0; } else { x_compare[2*d] = atan2(x2/sin(x_compare[2*d+1]),x1/sin(x_compare[2*d+1])); } x_compare[2*d] *= 1.0/(2.0*KPI); x_compare[2*d+1] *= 1.0/(2.0*KPI); d++; } } f_compare = (double _Complex*) nfft_malloc(m_compare*sizeof(double _Complex)); f = (double _Complex*) nfft_malloc(m_compare*sizeof(double _Complex)); } } /* Initialize maximum cut-off degree and grid size parameter. */ NQ_max = 0; SQ_max = 0; /* Read the number of cut-off degrees. */ fscanf(stdin,"bandwidths=%d\n",&iNQ_max); fprintf(stdout,"%d\n",iNQ_max); /* Allocate memory for the cut-off degrees and grid size parameters. */ NQ = (int*) nfft_malloc(iNQ_max*sizeof(int)); SQ = (int*) nfft_malloc(iNQ_max*sizeof(int)); if (testmode == TIMING) { RQ = (int*) nfft_malloc(iNQ_max*sizeof(int)); } /* Read the cut-off degrees and grid size parameters. */ for (iNQ = 0; iNQ < iNQ_max; iNQ++) { if (testmode == TIMING) { /* Read cut-off degree and grid size parameter. */ fscanf(stdin,"%d %d %d\n",&NQ[iNQ],&SQ[iNQ],&RQ[iNQ]); fprintf(stdout,"%d %d %d\n",NQ[iNQ],SQ[iNQ],RQ[iNQ]); NQ_max = MAX(NQ_max,NQ[iNQ]); SQ_max = MAX(SQ_max,SQ[iNQ]); } else { /* Read cut-off degree and grid size parameter. */ fscanf(stdin,"%d %d\n",&NQ[iNQ],&SQ[iNQ]); fprintf(stdout,"%d %d\n",NQ[iNQ],SQ[iNQ]); NQ_max = MAX(NQ_max,NQ[iNQ]); SQ_max = MAX(SQ_max,SQ[iNQ]); } } /* Do precomputation. */ //fprintf(stderr,"NFSFT Precomputation\n"); //fflush(stderr); nfsft_precompute(NQ_max, threshold, ((use_nfsft==NO)?(NFSFT_NO_FAST_ALGORITHM):(0U)), 0U); if (testmode == TIMING) { /* Allocate data structures. */ f_hat = (double _Complex*) nfft_malloc(NFSFT_F_HAT_SIZE(NQ_max)*sizeof(double _Complex)); f = (double _Complex*) nfft_malloc(SQ_max*sizeof(double _Complex)); x_grid = (double*) nfft_malloc(2*SQ_max*sizeof(double)); for (d = 0; d < SQ_max; d++) { f[d] = (((double)rand())/RAND_MAX)-0.5 + _Complex_I*((((double)rand())/RAND_MAX)-0.5); x_grid[2*d] = (((double)rand())/RAND_MAX) - 0.5; x_grid[2*d+1] = (((double)rand())/RAND_MAX) * 0.5; } } //fprintf(stderr,"Entering loop\n"); //fflush(stderr); /* Process all cut-off bandwidths. */ for (iNQ = 0; iNQ < iNQ_max; iNQ++) { if (testmode == TIMING) { nfsft_init_guru(&plan,NQ[iNQ],SQ[iNQ], NFSFT_NORMALIZED | ((use_nfft!=NO)?(0U):(NFSFT_USE_NDFT)) | ((use_fpt!=NO)?(0U):(NFSFT_USE_DPT)), PRE_PHI_HUT | PRE_PSI | FFTW_INIT | FFTW_MEASURE | FFT_OUT_OF_PLACE, cutoff); plan.f_hat = f_hat; plan.x = x_grid; plan.f = f; nfsft_precompute_x(&plan); t_avg = 0.0; for (i = 0; i < RQ[iNQ]; i++) { t0 = getticks(); if (use_nfsft != NO) { /* Execute the adjoint NFSFT transformation. */ nfsft_adjoint(&plan); } else { /* Execute the adjoint direct NDSFT transformation. */ nfsft_adjoint_direct(&plan); } t1 = getticks(); t_avg += nfft_elapsed_seconds(t1,t0); } t_avg = t_avg/((double)RQ[iNQ]); nfsft_finalize(&plan); fprintf(stdout,"%+le\n", t_avg); fprintf(stderr,"%d: %4d %4d %+le\n", tc, NQ[iNQ], SQ[iNQ], t_avg); } else { /* Determine the maximum number of nodes. */ switch (gridtype) { case GRID_GAUSS_LEGENDRE: /* Calculate grid dimensions. */ m_theta = SQ[iNQ] + 1; m_phi = 2*SQ[iNQ] + 2; m_total = m_theta*m_phi; break; case GRID_CLENSHAW_CURTIS: /* Calculate grid dimensions. */ m_theta = 2*SQ[iNQ] + 1; m_phi = 2*SQ[iNQ] + 2; m_total = m_theta*m_phi; break; case GRID_HEALPIX: m_theta = 1; m_phi = 12*SQ[iNQ]*SQ[iNQ]; m_total = m_theta * m_phi; //fprintf("HEALPix: SQ = %d, m_theta = %d, m_phi= %d, m"); break; case GRID_EQUIDISTRIBUTION: case GRID_EQUIDISTRIBUTION_UNIFORM: m_theta = 2; //fprintf(stderr,"ed: m_theta = %d\n",m_theta); for (k = 1; k < SQ[iNQ]; k++) { m_theta += (int)floor((2*KPI)/acos((cos(KPI/(double)SQ[iNQ])- cos(k*KPI/(double)SQ[iNQ])*cos(k*KPI/(double)SQ[iNQ]))/ (sin(k*KPI/(double)SQ[iNQ])*sin(k*KPI/(double)SQ[iNQ])))); //fprintf(stderr,"ed: m_theta = %d\n",m_theta); } //fprintf(stderr,"ed: m_theta final = %d\n",m_theta); m_phi = 1; m_total = m_theta * m_phi; break; } /* Allocate memory for data structures. */ w = (double*) nfft_malloc(m_theta*sizeof(double)); x_grid = (double*) nfft_malloc(2*m_total*sizeof(double)); //fprintf(stderr,"NQ = %d\n",NQ[iNQ]); //fflush(stderr); switch (gridtype) { case GRID_GAUSS_LEGENDRE: //fprintf(stderr,"Generating grid for NQ = %d, SQ = %d\n",NQ[iNQ],SQ[iNQ]); //fflush(stderr); /* Read quadrature weights. */ for (k = 0; k < m_theta; k++) { fscanf(stdin,"%le\n",&w[k]); w[k] *= (2.0*KPI)/((double)m_phi); } //fprintf(stderr,"Allocating theta and phi\n"); //fflush(stderr); /* Allocate memory to store the grid's angles. */ theta = (double*) nfft_malloc(m_theta*sizeof(double)); phi = (double*) nfft_malloc(m_phi*sizeof(double)); //if (theta == NULL || phi == NULL) //{ //fprintf(stderr,"Couldn't allocate theta and phi\n"); //fflush(stderr); //} /* Read angles theta. */ for (k = 0; k < m_theta; k++) { fscanf(stdin,"%le\n",&theta[k]); } /* Generate the grid angles phi. */ for (n = 0; n < m_phi; n++) { phi[n] = n/((double)m_phi); phi[n] -= ((phi[n]>=0.5)?(1.0):(0.0)); } //fprintf(stderr,"Generating grid nodes\n"); //fflush(stderr); /* Generate the grid's nodes. */ d = 0; for (k = 0; k < m_theta; k++) { for (n = 0; n < m_phi; n++) { x_grid[2*d] = phi[n]; x_grid[2*d+1] = theta[k]; d++; } } //fprintf(stderr,"Freeing theta and phi\n"); //fflush(stderr); /* Free the arrays for the grid's angles. */ nfft_free(theta); nfft_free(phi); break; case GRID_CLENSHAW_CURTIS: /* Allocate memory to store the grid's angles. */ theta = (double*) nfft_malloc(m_theta*sizeof(double)); phi = (double*) nfft_malloc(m_phi*sizeof(double)); /* Generate the grid angles theta. */ for (k = 0; k < m_theta; k++) { theta[k] = k/((double)2*(m_theta-1)); } /* Generate the grid angles phi. */ for (n = 0; n < m_phi; n++) { phi[n] = n/((double)m_phi); phi[n] -= ((phi[n]>=0.5)?(1.0):(0.0)); } /* Generate quadrature weights. */ fplan = fftw_plan_r2r_1d(SQ[iNQ]+1, w, w, FFTW_REDFT00, 0U); for (k = 0; k < SQ[iNQ]+1; k++) { w[k] = -2.0/(4*k*k-1); } fftw_execute(fplan); w[0] *= 0.5; for (k = 0; k < SQ[iNQ]+1; k++) { w[k] *= (2.0*KPI)/((double)(m_theta-1)*m_phi); w[m_theta-1-k] = w[k]; } fftw_destroy_plan(fplan); /* Generate the grid's nodes. */ d = 0; for (k = 0; k < m_theta; k++) { for (n = 0; n < m_phi; n++) { x_grid[2*d] = phi[n]; x_grid[2*d+1] = theta[k]; d++; } } /* Free the arrays for the grid's angles. */ nfft_free(theta); nfft_free(phi); break; case GRID_HEALPIX: d = 0; for (k = 1; k <= SQ[iNQ]-1; k++) { for (n = 0; n <= 4*k-1; n++) { x_grid[2*d+1] = 1 - (k*k)/((double)(3.0*SQ[iNQ]*SQ[iNQ])); x_grid[2*d] = ((n+0.5)/(4*k)); x_grid[2*d] -= (x_grid[2*d]>=0.5)?(1.0):(0.0); d++; } } d2 = d-1; for (k = SQ[iNQ]; k <= 3*SQ[iNQ]; k++) { for (n = 0; n <= 4*SQ[iNQ]-1; n++) { x_grid[2*d+1] = 2.0/(3*SQ[iNQ])*(2*SQ[iNQ]-k); x_grid[2*d] = (n+((k%2==0)?(0.5):(0.0)))/(4*SQ[iNQ]); x_grid[2*d] -= (x_grid[2*d]>=0.5)?(1.0):(0.0); d++; } } for (k = 1; k <= SQ[iNQ]-1; k++) { for (n = 0; n <= 4*k-1; n++) { x_grid[2*d+1] = -x_grid[2*d2+1]; x_grid[2*d] = x_grid[2*d2]; d++; d2--; } } for (d = 0; d < m_total; d++) { x_grid[2*d+1] = acos(x_grid[2*d+1])/(2.0*KPI); } w[0] = (4.0*KPI)/(m_total); break; case GRID_EQUIDISTRIBUTION: case GRID_EQUIDISTRIBUTION_UNIFORM: /* TODO Compute the weights. */ if (gridtype == GRID_EQUIDISTRIBUTION) { w_temp = (double*) nfft_malloc((SQ[iNQ]+1)*sizeof(double)); fplan = fftw_plan_r2r_1d(SQ[iNQ]/2+1, w_temp, w_temp, FFTW_REDFT00, 0U); for (k = 0; k < SQ[iNQ]/2+1; k++) { w_temp[k] = -2.0/(4*k*k-1); } fftw_execute(fplan); w_temp[0] *= 0.5; for (k = 0; k < SQ[iNQ]/2+1; k++) { w_temp[k] *= (2.0*KPI)/((double)(SQ[iNQ])); w_temp[SQ[iNQ]-k] = w_temp[k]; } fftw_destroy_plan(fplan); } d = 0; x_grid[2*d] = -0.5; x_grid[2*d+1] = 0.0; if (gridtype == GRID_EQUIDISTRIBUTION) { w[d] = w_temp[0]; } else { w[d] = (4.0*KPI)/(m_total); } d = 1; x_grid[2*d] = -0.5; x_grid[2*d+1] = 0.5; if (gridtype == GRID_EQUIDISTRIBUTION) { w[d] = w_temp[SQ[iNQ]]; } else { w[d] = (4.0*KPI)/(m_total); } d = 2; for (k = 1; k < SQ[iNQ]; k++) { theta_s = (double)k*KPI/(double)SQ[iNQ]; M = (int)floor((2.0*KPI)/acos((cos(KPI/(double)SQ[iNQ])- cos(theta_s)*cos(theta_s))/(sin(theta_s)*sin(theta_s)))); for (n = 0; n < M; n++) { x_grid[2*d] = (n + 0.5)/M; x_grid[2*d] -= (x_grid[2*d]>=0.5)?(1.0):(0.0); x_grid[2*d+1] = theta_s/(2.0*KPI); if (gridtype == GRID_EQUIDISTRIBUTION) { w[d] = w_temp[k]/((double)(M)); } else { w[d] = (4.0*KPI)/(m_total); } d++; } } if (gridtype == GRID_EQUIDISTRIBUTION) { nfft_free(w_temp); } break; default: break; } /* Allocate memory for grid values. */ f_grid = (double _Complex*) nfft_malloc(m_total*sizeof(double _Complex)); if (mode == RANDOM) { } else { m_compare = m_total; f_compare = (double _Complex*) nfft_malloc(m_compare*sizeof(double _Complex)); x_compare = x_grid; f = f_grid; } //fprintf(stderr,"Generating test function\n"); //fflush(stderr); switch (testfunction) { case FUNCTION_RANDOM_BANDLIMITED: f_hat_gen = (double _Complex*) nfft_malloc(NFSFT_F_HAT_SIZE(N)*sizeof(double _Complex)); //fprintf(stderr,"Generating random test function\n"); //fflush(stderr); /* Generate random function samples by sampling a bandlimited * function. */ nfsft_init_guru(&plan_gen,N,m_total, NFSFT_NORMALIZED | ((use_nfft!=NO)?(0U):(NFSFT_USE_NDFT)) | ((use_fpt!=NO)?(0U):(NFSFT_USE_DPT)), ((N>512)?(0U):(PRE_PHI_HUT | PRE_PSI)) | FFTW_INIT | FFT_OUT_OF_PLACE, cutoff); plan_gen.f_hat = f_hat_gen; plan_gen.x = x_grid; plan_gen.f = f_grid; nfsft_precompute_x(&plan_gen); for (k = 0; k < plan_gen.N_total; k++) { f_hat_gen[k] = 0.0; } for (k = 0; k <= N; k++) { for (n = -k; n <= k; n++) { f_hat_gen[NFSFT_INDEX(k,n,&plan_gen)] = (((double)rand())/RAND_MAX)-0.5 + _Complex_I*((((double)rand())/RAND_MAX)-0.5); } } if (use_nfsft != NO) { /* Execute the NFSFT transformation. */ nfsft_trafo(&plan_gen); } else { /* Execute the direct NDSFT transformation. */ nfsft_trafo_direct(&plan_gen); } nfsft_finalize(&plan_gen); if (mode == RANDOM) { nfsft_init_guru(&plan_gen,N,m_compare, NFSFT_NORMALIZED | ((use_nfft!=NO)?(0U):(NFSFT_USE_NDFT)) | ((use_fpt!=NO)?(0U):(NFSFT_USE_DPT)), ((N>512)?(0U):(PRE_PHI_HUT | PRE_PSI)) | FFTW_INIT | FFT_OUT_OF_PLACE, cutoff); plan_gen.f_hat = f_hat_gen; plan_gen.x = x_compare; plan_gen.f = f_compare; nfsft_precompute_x(&plan_gen); if (use_nfsft != NO) { /* Execute the NFSFT transformation. */ nfsft_trafo(&plan_gen); } else { /* Execute the direct NDSFT transformation. */ nfsft_trafo_direct(&plan_gen); } nfsft_finalize(&plan_gen); } else { memcpy(f_compare,f_grid,m_total*sizeof(double _Complex)); } nfft_free(f_hat_gen); break; case FUNCTION_F1: for (d = 0; d < m_total; d++) { x1 = sin(x_grid[2*d+1]*2.0*KPI)*cos(x_grid[2*d]*2.0*KPI); x2 = sin(x_grid[2*d+1]*2.0*KPI)*sin(x_grid[2*d]*2.0*KPI); x3 = cos(x_grid[2*d+1]*2.0*KPI); f_grid[d] = x1*x2*x3; } if (mode == RANDOM) { for (d = 0; d < m_compare; d++) { x1 = sin(x_compare[2*d+1]*2.0*KPI)*cos(x_compare[2*d]*2.0*KPI); x2 = sin(x_compare[2*d+1]*2.0*KPI)*sin(x_compare[2*d]*2.0*KPI); x3 = cos(x_compare[2*d+1]*2.0*KPI); f_compare[d] = x1*x2*x3; } } else { memcpy(f_compare,f_grid,m_total*sizeof(double _Complex)); } break; case FUNCTION_F2: for (d = 0; d < m_total; d++) { x1 = sin(x_grid[2*d+1]*2.0*KPI)*cos(x_grid[2*d]*2.0*KPI); x2 = sin(x_grid[2*d+1]*2.0*KPI)*sin(x_grid[2*d]*2.0*KPI); x3 = cos(x_grid[2*d+1]*2.0*KPI); f_grid[d] = 0.1*exp(x1+x2+x3); } if (mode == RANDOM) { for (d = 0; d < m_compare; d++) { x1 = sin(x_compare[2*d+1]*2.0*KPI)*cos(x_compare[2*d]*2.0*KPI); x2 = sin(x_compare[2*d+1]*2.0*KPI)*sin(x_compare[2*d]*2.0*KPI); x3 = cos(x_compare[2*d+1]*2.0*KPI); f_compare[d] = 0.1*exp(x1+x2+x3); } } else { memcpy(f_compare,f_grid,m_total*sizeof(double _Complex)); } break; case FUNCTION_F3: for (d = 0; d < m_total; d++) { x1 = sin(x_grid[2*d+1]*2.0*KPI)*cos(x_grid[2*d]*2.0*KPI); x2 = sin(x_grid[2*d+1]*2.0*KPI)*sin(x_grid[2*d]*2.0*KPI); x3 = cos(x_grid[2*d+1]*2.0*KPI); temp = sqrt(x1*x1)+sqrt(x2*x2)+sqrt(x3*x3); f_grid[d] = 0.1*temp; } if (mode == RANDOM) { for (d = 0; d < m_compare; d++) { x1 = sin(x_compare[2*d+1]*2.0*KPI)*cos(x_compare[2*d]*2.0*KPI); x2 = sin(x_compare[2*d+1]*2.0*KPI)*sin(x_compare[2*d]*2.0*KPI); x3 = cos(x_compare[2*d+1]*2.0*KPI); temp = sqrt(x1*x1)+sqrt(x2*x2)+sqrt(x3*x3); f_compare[d] = 0.1*temp; } } else { memcpy(f_compare,f_grid,m_total*sizeof(double _Complex)); } break; case FUNCTION_F4: for (d = 0; d < m_total; d++) { x1 = sin(x_grid[2*d+1]*2.0*KPI)*cos(x_grid[2*d]*2.0*KPI); x2 = sin(x_grid[2*d+1]*2.0*KPI)*sin(x_grid[2*d]*2.0*KPI); x3 = cos(x_grid[2*d+1]*2.0*KPI); temp = sqrt(x1*x1)+sqrt(x2*x2)+sqrt(x3*x3); f_grid[d] = 1.0/(temp); } if (mode == RANDOM) { for (d = 0; d < m_compare; d++) { x1 = sin(x_compare[2*d+1]*2.0*KPI)*cos(x_compare[2*d]*2.0*KPI); x2 = sin(x_compare[2*d+1]*2.0*KPI)*sin(x_compare[2*d]*2.0*KPI); x3 = cos(x_compare[2*d+1]*2.0*KPI); temp = sqrt(x1*x1)+sqrt(x2*x2)+sqrt(x3*x3); f_compare[d] = 1.0/(temp); } } else { memcpy(f_compare,f_grid,m_total*sizeof(double _Complex)); } break; case FUNCTION_F5: for (d = 0; d < m_total; d++) { x1 = sin(x_grid[2*d+1]*2.0*KPI)*cos(x_grid[2*d]*2.0*KPI); x2 = sin(x_grid[2*d+1]*2.0*KPI)*sin(x_grid[2*d]*2.0*KPI); x3 = cos(x_grid[2*d+1]*2.0*KPI); temp = sqrt(x1*x1)+sqrt(x2*x2)+sqrt(x3*x3); f_grid[d] = 0.1*sin(1+temp)*sin(1+temp); } if (mode == RANDOM) { for (d = 0; d < m_compare; d++) { x1 = sin(x_compare[2*d+1]*2.0*KPI)*cos(x_compare[2*d]*2.0*KPI); x2 = sin(x_compare[2*d+1]*2.0*KPI)*sin(x_compare[2*d]*2.0*KPI); x3 = cos(x_compare[2*d+1]*2.0*KPI); temp = sqrt(x1*x1)+sqrt(x2*x2)+sqrt(x3*x3); f_compare[d] = 0.1*sin(1+temp)*sin(1+temp); } } else { memcpy(f_compare,f_grid,m_total*sizeof(double _Complex)); } break; case FUNCTION_F6: for (d = 0; d < m_total; d++) { if (x_grid[2*d+1] <= 0.25) { f_grid[d] = 1.0; } else { f_grid[d] = 1.0/(sqrt(1+3*cos(2.0*KPI*x_grid[2*d+1])*cos(2.0*KPI*x_grid[2*d+1]))); } } if (mode == RANDOM) { for (d = 0; d < m_compare; d++) { if (x_compare[2*d+1] <= 0.25) { f_compare[d] = 1.0; } else { f_compare[d] = 1.0/(sqrt(1+3*cos(2.0*KPI*x_compare[2*d+1])*cos(2.0*KPI*x_compare[2*d+1]))); } } } else { memcpy(f_compare,f_grid,m_total*sizeof(double _Complex)); } break; default: //fprintf(stderr,"Generating one function\n"); //fflush(stderr); for (d = 0; d < m_total; d++) { f_grid[d] = 1.0; } if (mode == RANDOM) { for (d = 0; d < m_compare; d++) { f_compare[d] = 1.0; } } else { memcpy(f_compare,f_grid,m_total*sizeof(double _Complex)); } break; } //fprintf(stderr,"Initializing trafo\n"); //fflush(stderr); /* Init transform plan. */ nfsft_init_guru(&plan_adjoint,NQ[iNQ],m_total, NFSFT_NORMALIZED | ((use_nfft!=NO)?(0U):(NFSFT_USE_NDFT)) | ((use_fpt!=NO)?(0U):(NFSFT_USE_DPT)), ((NQ[iNQ]>512)?(0U):(PRE_PHI_HUT | PRE_PSI)) | FFTW_INIT | FFT_OUT_OF_PLACE, cutoff); plan_adjoint_ptr = &plan_adjoint; if (mode == RANDOM) { nfsft_init_guru(&plan,NQ[iNQ],m_compare, NFSFT_NORMALIZED | ((use_nfft!=NO)?(0U):(NFSFT_USE_NDFT)) | ((use_fpt!=NO)?(0U):(NFSFT_USE_DPT)), ((NQ[iNQ]>512)?(0U):(PRE_PHI_HUT | PRE_PSI)) | FFTW_INIT | FFT_OUT_OF_PLACE, cutoff); plan_ptr = &plan; } else { plan_ptr = &plan_adjoint; } f_hat = (double _Complex*) nfft_malloc(NFSFT_F_HAT_SIZE(NQ[iNQ])*sizeof(double _Complex)); plan_adjoint_ptr->f_hat = f_hat; plan_adjoint_ptr->x = x_grid; plan_adjoint_ptr->f = f_grid; plan_ptr->f_hat = f_hat; plan_ptr->x = x_compare; plan_ptr->f = f; //fprintf(stderr,"Precomputing for x\n"); //fflush(stderr); nfsft_precompute_x(plan_adjoint_ptr); if (plan_adjoint_ptr != plan_ptr) { nfsft_precompute_x(plan_ptr); } /* Initialize cumulative time variable. */ t_avg = 0.0; err_infty_avg = 0.0; err_2_avg = 0.0; /* Cycle through all runs. */ for (i = 0; i < 1/*repetitions*/; i++) { //fprintf(stderr,"Copying original values\n"); //fflush(stderr); /* Copy exact funtion values to working array. */ //memcpy(f,f_grid,m_total*sizeof(double _Complex)); /* Initialize time measurement. */ t0 = getticks(); //fprintf(stderr,"Multiplying with quadrature weights\n"); //fflush(stderr); /* Multiplication with the quadrature weights. */ /*fprintf(stderr,"\n");*/ d = 0; for (k = 0; k < m_theta; k++) { for (n = 0; n < m_phi; n++) { /*fprintf(stderr,"f_ref[%d] = %le + I*%le,\t f[%d] = %le + I*%le, \t w[%d] = %le\n", d,creal(f_ref[d]),cimag(f_ref[d]),d,creal(f[d]),cimag(f[d]),k, w[k]);*/ f_grid[d] *= w[k]; d++; } } t1 = getticks(); t_avg += nfft_elapsed_seconds(t1,t0); nfft_free(w); t0 = getticks(); /*fprintf(stderr,"\n"); d = 0; for (d = 0; d < grid_total; d++) { fprintf(stderr,"f[%d] = %le + I*%le, theta[%d] = %le, phi[%d] = %le\n", d,creal(f[d]),cimag(f[d]),d,x[2*d+1],d,x[2*d]); }*/ //fprintf(stderr,"Executing adjoint\n"); //fflush(stderr); /* Check if the fast NFSFT algorithm shall be tested. */ if (use_nfsft != NO) { /* Execute the adjoint NFSFT transformation. */ nfsft_adjoint(plan_adjoint_ptr); } else { /* Execute the adjoint direct NDSFT transformation. */ nfsft_adjoint_direct(plan_adjoint_ptr); } /* Multiplication with the Fourier-Legendre coefficients. */ /*for (k = 0; k <= m[im]; k++) { for (n = -k; n <= k; n++) { fprintf(stderr,"f_hat[%d,%d] = %le\t + I*%le\n",k,n, creal(f_hat[NFSFT_INDEX(k,n,&plan_adjoint)]), cimag(f_hat[NFSFT_INDEX(k,n,&plan_adjoint)])); } }*/ //fprintf(stderr,"Executing trafo\n"); //fflush(stderr); if (use_nfsft != NO) { /* Execute the NFSFT transformation. */ nfsft_trafo(plan_ptr); } else { /* Execute the direct NDSFT transformation. */ nfsft_trafo_direct(plan_ptr); } t1 = getticks(); t_avg += nfft_elapsed_seconds(t1,t0); //fprintf(stderr,"Finalizing\n"); //fflush(stderr); /* Finalize the NFSFT plans */ nfsft_finalize(plan_adjoint_ptr); if (plan_ptr != plan_adjoint_ptr) { nfsft_finalize(plan_ptr); } /* Free data arrays. */ nfft_free(f_hat); nfft_free(x_grid); err_infty_avg += X(error_l_infty_complex)(f, f_compare, m_compare); err_2_avg += X(error_l_2_complex)(f, f_compare, m_compare); nfft_free(f_grid); if (mode == RANDOM) { } else { nfft_free(f_compare); } /*for (d = 0; d < m_total; d++) { fprintf(stderr,"f_ref[%d] = %le + I*%le,\t f[%d] = %le + I*%le\n", d,creal(f_ref[d]),cimag(f_ref[d]),d,creal(f[d]),cimag(f[d])); }*/ } //fprintf(stderr,"Calculating the error\n"); //fflush(stderr); /* Calculate average time needed. */ t_avg = t_avg/((double)repetitions); /* Calculate the average error. */ err_infty_avg = err_infty_avg/((double)repetitions); /* Calculate the average error. */ err_2_avg = err_2_avg/((double)repetitions); /* Print out the error measurements. */ fprintf(stdout,"%+le %+le %+le\n", t_avg, err_infty_avg, err_2_avg); fprintf(stderr,"%d: %4d %4d %+le %+le %+le\n", tc, NQ[iNQ], SQ[iNQ], t_avg, err_infty_avg, err_2_avg); } } /* for (im = 0; im < im_max; im++) - Process all cut-off * bandwidths.*/ fprintf(stderr,"\n"); /* Delete precomputed data. */ nfsft_forget(); /* Free memory for cut-off bandwidths and grid size parameters. */ nfft_free(NQ); nfft_free(SQ); if (testmode == TIMING) { nfft_free(RQ); } if (mode == RANDOM) { nfft_free(x_compare); nfft_free(f_compare); nfft_free(f); } if (testmode == TIMING) { /* Allocate data structures. */ nfft_free(f_hat); nfft_free(f); nfft_free(x_grid); } } /* for (tc = 0; tc < tc_max; tc++) - Process each testcase. */ /* Return exit code for successful run. */ return EXIT_SUCCESS; }
/** fast NFFT-based summation */ void fastsum_trafo(fastsum_plan *ths) { int j,k,t; ticks t0, t1; ths->MEASURE_TIME_t[4] = 0.0; ths->MEASURE_TIME_t[5] = 0.0; ths->MEASURE_TIME_t[6] = 0.0; ths->MEASURE_TIME_t[7] = 0.0; #ifdef MEASURE_TIME t0 = getticks(); #endif /** first step of algorithm */ nfft_adjoint(&(ths->mv1)); #ifdef MEASURE_TIME t1 = getticks(); ths->MEASURE_TIME_t[4] += nfft_elapsed_seconds(t1,t0); #endif #ifdef MEASURE_TIME t0 = getticks(); #endif /** second step of algorithm */ #pragma omp parallel for default(shared) private(k) for (k=0; k<ths->mv2.N_total; k++) ths->mv2.f_hat[k] = ths->b[k] * ths->mv1.f_hat[k]; #ifdef MEASURE_TIME t1 = getticks(); ths->MEASURE_TIME_t[5] += nfft_elapsed_seconds(t1,t0); #endif #ifdef MEASURE_TIME t0 = getticks(); #endif /** third step of algorithm */ nfft_trafo(&(ths->mv2)); #ifdef MEASURE_TIME t1 = getticks(); ths->MEASURE_TIME_t[6] += nfft_elapsed_seconds(t1,t0); #endif #ifdef MEASURE_TIME t0 = getticks(); #endif /** add near field */ #pragma omp parallel for default(shared) private(j,k,t) for (j=0; j<ths->M_total; j++) { double ymin[ths->d], ymax[ths->d]; /** limits for d-dimensional near field box */ if (ths->flags & NEARFIELD_BOXES) { ths->f[j] = ths->mv2.f[j] + SearchBox(ths->y + ths->d*j, ths); } else { for (t=0; t<ths->d; t++) { ymin[t] = ths->y[ths->d*j+t] - ths->eps_I; ymax[t] = ths->y[ths->d*j+t] + ths->eps_I; } ths->f[j] = ths->mv2.f[j] + SearchTree(ths->d,0, ths->x, ths->alpha, ymin, ymax, ths->N_total, ths->k, ths->kernel_param, ths->Ad, ths->Add, ths->p, ths->flags); } /* ths->f[j] = ths->mv2.f[j]; */ /* ths->f[j] = SearchTree(ths->d,0, ths->x, ths->alpha, ymin, ymax, ths->N_total, ths->k, ths->kernel_param, ths->Ad, ths->Add, ths->p, ths->flags); */ } #ifdef MEASURE_TIME t1 = getticks(); ths->MEASURE_TIME_t[7] += nfft_elapsed_seconds(t1,t0); #endif }
/** precomputation for fastsum */ void fastsum_precompute(fastsum_plan *ths) { int j,k,t; int n_total; ticks t0, t1; ths->MEASURE_TIME_t[0] = 0.0; ths->MEASURE_TIME_t[1] = 0.0; ths->MEASURE_TIME_t[2] = 0.0; ths->MEASURE_TIME_t[3] = 0.0; #ifdef MEASURE_TIME t0 = getticks(); #endif if (ths->flags & NEARFIELD_BOXES) { BuildBox(ths); } else { /** sort source knots */ BuildTree(ths->d,0,ths->x,ths->alpha,ths->N_total); } #ifdef MEASURE_TIME t1 = getticks(); ths->MEASURE_TIME_t[3] += nfft_elapsed_seconds(t1,t0); #endif #ifdef MEASURE_TIME t0 = getticks(); #endif /** precompute spline values for near field*/ if (!(ths->flags & EXACT_NEARFIELD)) { if (ths->d==1) #pragma omp parallel for default(shared) private(k) for (k=-ths->Ad/2-2; k <= ths->Ad/2+2; k++) ths->Add[k+ths->Ad/2+2] = regkern1(ths->k, ths->eps_I*(double)k/ths->Ad*2, ths->p, ths->kernel_param, ths->eps_I, ths->eps_B); else #pragma omp parallel for default(shared) private(k) for (k=0; k <= ths->Ad+2; k++) ths->Add[k] = regkern3(ths->k, ths->eps_I*(double)k/ths->Ad, ths->p, ths->kernel_param, ths->eps_I, ths->eps_B); } #ifdef MEASURE_TIME t1 = getticks(); ths->MEASURE_TIME_t[0] += nfft_elapsed_seconds(t1,t0); #endif #ifdef MEASURE_TIME t0 = getticks(); #endif /** init NFFT plan for transposed transform in first step*/ for (k=0; k<ths->mv1.M_total; k++) for (t=0; t<ths->mv1.d; t++) ths->mv1.x[ths->mv1.d*k+t] = - ths->x[ths->mv1.d*k+t]; /* note the factor -1 for transposed transform instead of adjoint*/ /** precompute psi, the entries of the matrix B */ if(ths->mv1.nfft_flags & PRE_LIN_PSI) nfft_precompute_lin_psi(&(ths->mv1)); if(ths->mv1.nfft_flags & PRE_PSI) nfft_precompute_psi(&(ths->mv1)); if(ths->mv1.nfft_flags & PRE_FULL_PSI) nfft_precompute_full_psi(&(ths->mv1)); #ifdef MEASURE_TIME t1 = getticks(); ths->MEASURE_TIME_t[1] += nfft_elapsed_seconds(t1,t0); #endif /** init Fourier coefficients */ for(k=0; k<ths->mv1.M_total;k++) ths->mv1.f[k] = ths->alpha[k]; #ifdef MEASURE_TIME t0 = getticks(); #endif /** init NFFT plan for transform in third step*/ for (j=0; j<ths->mv2.M_total; j++) for (t=0; t<ths->mv2.d; t++) ths->mv2.x[ths->mv2.d*j+t] = - ths->y[ths->mv2.d*j+t]; /* note the factor -1 for conjugated transform instead of standard*/ /** precompute psi, the entries of the matrix B */ if(ths->mv2.nfft_flags & PRE_LIN_PSI) nfft_precompute_lin_psi(&(ths->mv2)); if(ths->mv2.nfft_flags & PRE_PSI) nfft_precompute_psi(&(ths->mv2)); if(ths->mv2.nfft_flags & PRE_FULL_PSI) nfft_precompute_full_psi(&(ths->mv2)); #ifdef MEASURE_TIME t1 = getticks(); ths->MEASURE_TIME_t[2] += nfft_elapsed_seconds(t1,t0); #endif #ifdef MEASURE_TIME t0 = getticks(); #endif /** precompute Fourier coefficients of regularised kernel*/ n_total = 1; for (t=0; t<ths->d; t++) n_total *= ths->n; #pragma omp parallel for default(shared) private(j,k,t) for (j=0; j<n_total; j++) { if (ths->d==1) ths->b[j] = regkern1(ths->k, (double)j / (ths->n) - 0.5, ths->p, ths->kernel_param, ths->eps_I, ths->eps_B)/n_total; else { k=j; ths->b[j]=0.0; for (t=0; t<ths->d; t++) { ths->b[j] += ((double)(k % (ths->n)) / (ths->n) - 0.5) * ((double)(k % (ths->n)) / (ths->n) - 0.5); k = k / (ths->n); } ths->b[j] = regkern3(ths->k, sqrt(ths->b[j]), ths->p, ths->kernel_param, ths->eps_I, ths->eps_B)/n_total; } } nfft_fftshift_complex(ths->b, ths->mv1.d, ths->mv1.N); fftw_execute(ths->fft_plan); nfft_fftshift_complex(ths->b, ths->mv1.d, ths->mv1.N); #ifdef MEASURE_TIME t1 = getticks(); ths->MEASURE_TIME_t[0] += nfft_elapsed_seconds(t1,t0); #endif }
static void simple_test_nfft_2d(void) { int K,N[2],n[2],M; double t; ticks t0, t1; nfft_plan p; N[0]=32; n[0]=64; N[1]=14; n[1]=32; M=N[0]*N[1]; K=16; t0 = getticks(); /** init a two dimensional plan */ nfft_init_guru(&p, 2, N, M, n, 7, PRE_PHI_HUT| PRE_FULL_PSI| MALLOC_F_HAT| MALLOC_X| MALLOC_F | FFTW_INIT| FFT_OUT_OF_PLACE, FFTW_ESTIMATE| FFTW_DESTROY_INPUT); /** init pseudo random nodes */ nfft_vrand_shifted_unit_double(p.x,p.d*p.M_total); /** precompute psi, the entries of the matrix B */ if(p.nfft_flags & PRE_ONE_PSI) nfft_precompute_one_psi(&p); /** init pseudo random Fourier coefficients and show them */ nfft_vrand_unit_complex(p.f_hat,p.N_total); t1 = getticks(); t = nfft_elapsed_seconds(t1,t0); nfft_vpr_complex(p.f_hat,K, "given Fourier coefficients, vector f_hat (first few entries)"); printf(" ... initialisation took %e seconds.\n",t); /** direct trafo and show the result */ t0 = getticks(); nfft_trafo_direct(&p); t1 = getticks(); t = nfft_elapsed_seconds(t1,t0); nfft_vpr_complex(p.f,K,"ndft, vector f (first few entries)"); printf(" took %e seconds.\n",t); /** approx. trafo and show the result */ t0 = getticks(); nfft_trafo(&p); t1 = getticks(); t = nfft_elapsed_seconds(t1,t0); nfft_vpr_complex(p.f,K,"nfft, vector f (first few entries)"); printf(" took %e seconds.\n",t); /** direct adjoint and show the result */ t0 = getticks(); nfft_adjoint_direct(&p); t1 = getticks(); t = nfft_elapsed_seconds(t1,t0); nfft_vpr_complex(p.f_hat,K,"adjoint ndft, vector f_hat (first few entries)"); printf(" took %e seconds.\n",t); /** approx. adjoint and show the result */ t0 = getticks(); nfft_adjoint(&p); t1 = getticks(); t = nfft_elapsed_seconds(t1,t0); nfft_vpr_complex(p.f_hat,K,"adjoint nfft, vector f_hat (first few entries)"); printf(" took %e seconds.\n",t); /** finalise the two dimensional plan */ nfft_finalize(&p); }
static void reconstruct(char* filename,int N,int M,int iteration , int weight) { int j,k,l; double time,min_time,max_time,min_inh,max_inh; ticks t0, t1; double t,real,imag; double w,epsilon=0.0000003; /* epsilon is a the break criterium for the iteration */; mri_inh_2d1d_plan my_plan; solver_plan_complex my_iplan; FILE* fp,*fw,*fout_real,*fout_imag,*finh,*ftime; int my_N[3],my_n[3]; int flags = PRE_PHI_HUT| PRE_PSI |MALLOC_X| MALLOC_F_HAT| MALLOC_F| FFTW_INIT| FFT_OUT_OF_PLACE; unsigned infft_flags = CGNR | PRECOMPUTE_DAMP; double Ts; double W,T; int N3; int m=2; double sigma = 1.25; ftime=fopen("readout_time.dat","r"); finh=fopen("inh.dat","r"); min_time=INT_MAX; max_time=INT_MIN; for(j=0;j<M;j++) { fscanf(ftime,"%le ",&time); if(time<min_time) min_time = time; if(time>max_time) max_time = time; } fclose(ftime); Ts=(min_time+max_time)/2.0; min_inh=INT_MAX; max_inh=INT_MIN; for(j=0;j<N*N;j++) { fscanf(finh,"%le ",&w); if(w<min_inh) min_inh = w; if(w>max_inh) max_inh = w; } fclose(finh); N3=ceil((MAX(fabs(min_inh),fabs(max_inh))*(max_time-min_time)/2.0+(m)/(2*sigma))*4*sigma); /* N3 has to be even */ if(N3%2!=0) N3++; T=((max_time-min_time)/2.0)/(0.5-((double) (m))/N3); W=N3/T; my_N[0]=N; my_n[0]=ceil(N*sigma); my_N[1]=N; my_n[1]=ceil(N*sigma); my_N[2]=N3; my_n[2]=N3; /* initialise nfft */ mri_inh_2d1d_init_guru(&my_plan, my_N, M, my_n, m, sigma, flags, FFTW_MEASURE| FFTW_DESTROY_INPUT); /* precompute lin psi if set */ if(my_plan.plan.nfft_flags & PRE_LIN_PSI) nfft_precompute_lin_psi(&my_plan.plan); if (weight) infft_flags = infft_flags | PRECOMPUTE_WEIGHT; /* initialise my_iplan, advanced */ solver_init_advanced_complex(&my_iplan,(nfft_mv_plan_complex*)(&my_plan), infft_flags ); /* get the weights */ if(my_iplan.flags & PRECOMPUTE_WEIGHT) { fw=fopen("weights.dat","r"); for(j=0;j<my_plan.M_total;j++) { fscanf(fw,"%le ",&my_iplan.w[j]); } fclose(fw); } /* get the damping factors */ if(my_iplan.flags & PRECOMPUTE_DAMP) { for(j=0;j<N;j++){ for(k=0;k<N;k++) { int j2= j-N/2; int k2= k-N/2; double r=sqrt(j2*j2+k2*k2); if(r>(double) N/2) my_iplan.w_hat[j*N+k]=0.0; else my_iplan.w_hat[j*N+k]=1.0; } } } fp=fopen(filename,"r"); ftime=fopen("readout_time.dat","r"); for(j=0;j<my_plan.M_total;j++) { fscanf(fp,"%le %le %le %le",&my_plan.plan.x[2*j+0],&my_plan.plan.x[2*j+1],&real,&imag); my_iplan.y[j]=real+ _Complex_I*imag; fscanf(ftime,"%le ",&my_plan.t[j]); my_plan.t[j] = (my_plan.t[j]-Ts)/T; } fclose(fp); fclose(ftime); finh=fopen("inh.dat","r"); for(j=0;j<N*N;j++) { fscanf(finh,"%le ",&my_plan.w[j]); my_plan.w[j]/=W; } fclose(finh); if(my_plan.plan.nfft_flags & PRE_PSI) { nfft_precompute_psi(&my_plan.plan); } if(my_plan.plan.nfft_flags & PRE_FULL_PSI) { nfft_precompute_full_psi(&my_plan.plan); } /* init some guess */ for(j=0;j<my_plan.N_total;j++) { my_iplan.f_hat_iter[j]=0.0; } t0 = getticks(); /* inverse trafo */ solver_before_loop_complex(&my_iplan); for(l=0;l<iteration;l++) { /* break if dot_r_iter is smaller than epsilon*/ if(my_iplan.dot_r_iter<epsilon) break; fprintf(stderr,"%e, %i of %i\n",sqrt(my_iplan.dot_r_iter), l+1,iteration); solver_loop_one_step_complex(&my_iplan); } t1 = getticks(); t = nfft_elapsed_seconds(t1,t0); fout_real=fopen("output_real.dat","w"); fout_imag=fopen("output_imag.dat","w"); for (j=0;j<N*N;j++) { /* Verschiebung wieder herausrechnen */ my_iplan.f_hat_iter[j]*=cexp(-2.0*_Complex_I*KPI*Ts*my_plan.w[j]*W); fprintf(fout_real,"%le ",creal(my_iplan.f_hat_iter[j])); fprintf(fout_imag,"%le ",cimag(my_iplan.f_hat_iter[j])); } fclose(fout_real); fclose(fout_imag); solver_finalize_complex(&my_iplan); mri_inh_2d1d_finalize(&my_plan); }
/** Comparison of the FFTW, mpolar FFT, and inverse mpolar FFT */ static int comparison_fft(FILE *fp, int N, int T, int R) { ticks t0, t1; fftw_plan my_fftw_plan; fftw_complex *f_hat,*f; int m,k; double t_fft, t_dft_mpolar; f_hat = (fftw_complex *)nfft_malloc(sizeof(fftw_complex)*N*N); f = (fftw_complex *)nfft_malloc(sizeof(fftw_complex)*(T*R/4)*5); my_fftw_plan = fftw_plan_dft_2d(N,N,f_hat,f,FFTW_BACKWARD,FFTW_MEASURE); for(k=0; k<N*N; k++) f_hat[k] = (((double)rand())/RAND_MAX) + _Complex_I* (((double)rand())/RAND_MAX); t0 = getticks(); for(m=0;m<65536/N;m++) { fftw_execute(my_fftw_plan); /* touch */ f_hat[2]=2*f_hat[0]; } t1 = getticks(); GLOBAL_elapsed_time = nfft_elapsed_seconds(t1,t0); t_fft=N*GLOBAL_elapsed_time/65536; if(N<256) { mpolar_dft(f_hat,N,f,T,R,1); t_dft_mpolar=GLOBAL_elapsed_time; } for (m=3; m<=9; m+=3) { if((m==3)&&(N<256)) fprintf(fp,"%d\t&\t&\t%1.1e&\t%1.1e&\t%d\t",N,t_fft,t_dft_mpolar,m); else if(m==3) fprintf(fp,"%d\t&\t&\t%1.1e&\t &\t%d\t",N,t_fft,m); else fprintf(fp," \t&\t&\t &\t &\t%d\t",m); printf("N=%d\tt_fft=%1.1e\tt_dft_mpolar=%1.1e\tm=%d\t",N,t_fft,t_dft_mpolar,m); mpolar_fft(f_hat,N,f,T,R,m); fprintf(fp,"%1.1e&\t",GLOBAL_elapsed_time); printf("t_mpolar=%1.1e\t",GLOBAL_elapsed_time); inverse_mpolar_fft(f,T,R,f_hat,N,2*m,m); if(m==9) fprintf(fp,"%1.1e\\\\\\hline\n",GLOBAL_elapsed_time); else fprintf(fp,"%1.1e\\\\\n",GLOBAL_elapsed_time); printf("t_impolar=%1.1e\n",GLOBAL_elapsed_time); } fflush(fp); nfft_free(f); nfft_free(f_hat); return EXIT_SUCCESS; }
/** inverse NFFT-based mpolar FFT */ static int inverse_mpolar_fft(fftw_complex *f, int T, int R, fftw_complex *f_hat, int NN, int max_i, int m) { ticks t0, t1; int j,k; /**< index for nodes and freqencies */ nfft_plan my_nfft_plan; /**< plan for the nfft-2D */ solver_plan_complex my_infft_plan; /**< plan for the inverse nfft */ double *x, *w; /**< knots and associated weights */ int l; /**< index for iterations */ int N[2],n[2]; int M; /**< number of knots */ N[0]=NN; n[0]=2*N[0]; /**< oversampling factor sigma=2 */ N[1]=NN; n[1]=2*N[1]; /**< oversampling factor sigma=2 */ x = (double *)nfft_malloc(5*T*R/2*(sizeof(double))); if (x==NULL) return -1; w = (double *)nfft_malloc(5*T*R/4*(sizeof(double))); if (w==NULL) return -1; /** init two dimensional NFFT plan */ M=mpolar_grid(T,R,x,w); nfft_init_guru(&my_nfft_plan, 2, N, M, n, m, PRE_PHI_HUT| PRE_PSI| MALLOC_X | MALLOC_F_HAT| MALLOC_F| FFTW_INIT | FFT_OUT_OF_PLACE, FFTW_MEASURE| FFTW_DESTROY_INPUT); /** init two dimensional infft plan */ solver_init_advanced_complex(&my_infft_plan,(nfft_mv_plan_complex*)(&my_nfft_plan), CGNR | PRECOMPUTE_WEIGHT ); /** init nodes, given samples and weights */ for(j=0;j<my_nfft_plan.M_total;j++) { my_nfft_plan.x[2*j+0] = x[2*j+0]; my_nfft_plan.x[2*j+1] = x[2*j+1]; my_infft_plan.y[j] = f[j]; my_infft_plan.w[j] = w[j]; } /** precompute psi, the entries of the matrix B */ if(my_nfft_plan.nfft_flags & PRE_LIN_PSI) nfft_precompute_lin_psi(&my_nfft_plan); if(my_nfft_plan.nfft_flags & PRE_PSI) nfft_precompute_psi(&my_nfft_plan); if(my_nfft_plan.nfft_flags & PRE_FULL_PSI) nfft_precompute_full_psi(&my_nfft_plan); /** initialise damping factors */ if(my_infft_plan.flags & PRECOMPUTE_DAMP) for(j=0;j<my_nfft_plan.N[0];j++) for(k=0;k<my_nfft_plan.N[1];k++) { my_infft_plan.w_hat[j*my_nfft_plan.N[1]+k]= (sqrt(pow(j-my_nfft_plan.N[0]/2,2)+pow(k-my_nfft_plan.N[1]/2,2))>(my_nfft_plan.N[0]/2)?0:1); } /** initialise some guess f_hat_0 */ for(k=0;k<my_nfft_plan.N_total;k++) my_infft_plan.f_hat_iter[k] = 0.0 + _Complex_I*0.0; t0 = getticks(); /** solve the system */ solver_before_loop_complex(&my_infft_plan); if (max_i<1) { l=1; for(k=0;k<my_nfft_plan.N_total;k++) my_infft_plan.f_hat_iter[k] = my_infft_plan.p_hat_iter[k]; } else { for(l=1;l<=max_i;l++) { solver_loop_one_step_complex(&my_infft_plan); } } t1 = getticks(); GLOBAL_elapsed_time = nfft_elapsed_seconds(t1,t0); /** copy result */ for(k=0;k<my_nfft_plan.N_total;k++) f_hat[k] = my_infft_plan.f_hat_iter[k]; /** finalise the plans and free the variables */ solver_finalize_complex(&my_infft_plan); nfft_finalize(&my_nfft_plan); nfft_free(x); nfft_free(w); return EXIT_SUCCESS; }
/** * The main program. * * \param argc The number of arguments * \param argv An array containing the arguments as C-strings * * \return Exit code * * \author Jens Keiner */ int main (int argc, char **argv) { double **p; /* The array containing the parameter sets * * for the kernel functions */ int *m; /* The array containing the cut-off degrees M */ int **ld; /* The array containing the numbers of source * * and target nodes, L and D */ int ip; /* Index variable for p */ int im; /* Index variable for m */ int ild; /* Index variable for l */ int ipp; /* Index for kernel parameters */ int ip_max; /* The maximum index for p */ int im_max; /* The maximum index for m */ int ild_max; /* The maximum index for l */ int ipp_max; /* The maximum index for ip */ int tc_max; /* The number of testcases */ int m_max; /* The maximum cut-off degree M for the * * current dataset */ int l_max; /* The maximum number of source nodes L for * * the current dataset */ int d_max; /* The maximum number of target nodes D for * * the current dataset */ long ld_max_prec; /* The maximum number of source and target * * nodes for precomputation multiplied */ long l_max_prec; /* The maximum number of source nodes for * * precomputation */ int tc; /* Index variable for testcases */ int kt; /* The kernel function */ int cutoff; /* The current NFFT cut-off parameter */ double threshold; /* The current NFSFT threshold parameter */ double t_d; /* Time for direct algorithm in seconds */ double t_dp; /* Time for direct algorithm with * precomputation in seconds */ double t_fd; /* Time for fast direct algorithm in seconds */ double t_f; /* Time for fast algorithm in seconds */ double temp; /* */ double err_f; /* Error E_infty for fast algorithm */ double err_fd; /* Error E_\infty for fast direct algorithm */ ticks t0, t1; /* */ int precompute = NO; /* */ fftw_complex *ptr; /* */ double* steed; /* */ fftw_complex *b; /* The weights (b_l)_{l=0}^{L-1} */ fftw_complex *f_hat; /* The spherical Fourier coefficients */ fftw_complex *a; /* The Fourier-Legendre coefficients */ double *xi; /* Target nodes */ double *eta; /* Source nodes */ fftw_complex *f_m; /* Approximate function values */ fftw_complex *f; /* Exact function values */ fftw_complex *prec = NULL; /* */ nfsft_plan plan; /* NFSFT plan */ nfsft_plan plan_adjoint; /* adjoint NFSFT plan */ int i; /* */ int k; /* */ int n; /* */ int d; /* */ int l; /* */ int use_nfsft; /* */ int use_nfft; /* */ int use_fpt; /* */ int rinc; /* */ double constant; /* */ /* Read the number of testcases. */ fscanf(stdin,"testcases=%d\n",&tc_max); fprintf(stdout,"%d\n",tc_max); /* Process each testcase. */ for (tc = 0; tc < tc_max; tc++) { /* Check if the fast transform shall be used. */ fscanf(stdin,"nfsft=%d\n",&use_nfsft); fprintf(stdout,"%d\n",use_nfsft); if (use_nfsft != NO) { /* Check if the NFFT shall be used. */ fscanf(stdin,"nfft=%d\n",&use_nfft); fprintf(stdout,"%d\n",use_nfft); if (use_nfft != NO) { /* Read the cut-off parameter. */ fscanf(stdin,"cutoff=%d\n",&cutoff); fprintf(stdout,"%d\n",cutoff); } else { /* TODO remove this */ /* Initialize unused variable with dummy value. */ cutoff = 1; } /* Check if the fast polynomial transform shall be used. */ fscanf(stdin,"fpt=%d\n",&use_fpt); fprintf(stdout,"%d\n",use_fpt); /* Read the NFSFT threshold parameter. */ fscanf(stdin,"threshold=%lf\n",&threshold); fprintf(stdout,"%lf\n",threshold); } else { /* TODO remove this */ /* Set dummy values. */ cutoff = 3; threshold = 1000000000000.0; } /* Initialize bandwidth bound. */ m_max = 0; /* Initialize source nodes bound. */ l_max = 0; /* Initialize target nodes bound. */ d_max = 0; /* Initialize source nodes bound for precomputation. */ l_max_prec = 0; /* Initialize source and target nodes bound for precomputation. */ ld_max_prec = 0; /* Read the kernel type. This is one of KT_ABEL_POISSON, KT_SINGULARITY, * KT_LOC_SUPP and KT_GAUSSIAN. */ fscanf(stdin,"kernel=%d\n",&kt); fprintf(stdout,"%d\n",kt); /* Read the number of parameter sets. */ fscanf(stdin,"parameter_sets=%d\n",&ip_max); fprintf(stdout,"%d\n",ip_max); /* Allocate memory for pointers to parameter sets. */ p = (double**) nfft_malloc(ip_max*sizeof(double*)); /* We now read in the parameter sets. */ /* Read number of parameters. */ fscanf(stdin,"parameters=%d\n",&ipp_max); fprintf(stdout,"%d\n",ipp_max); for (ip = 0; ip < ip_max; ip++) { /* Allocate memory for the parameters. */ p[ip] = (double*) nfft_malloc(ipp_max*sizeof(double)); /* Read the parameters. */ for (ipp = 0; ipp < ipp_max; ipp++) { /* Read the next parameter. */ fscanf(stdin,"%lf\n",&p[ip][ipp]); fprintf(stdout,"%lf\n",p[ip][ipp]); } } /* Read the number of cut-off degrees. */ fscanf(stdin,"bandwidths=%d\n",&im_max); fprintf(stdout,"%d\n",im_max); m = (int*) nfft_malloc(im_max*sizeof(int)); /* Read the cut-off degrees. */ for (im = 0; im < im_max; im++) { /* Read cut-off degree. */ fscanf(stdin,"%d\n",&m[im]); fprintf(stdout,"%d\n",m[im]); m_max = MAX(m_max,m[im]); } /* Read number of node specifications. */ fscanf(stdin,"node_sets=%d\n",&ild_max); fprintf(stdout,"%d\n",ild_max); ld = (int**) nfft_malloc(ild_max*sizeof(int*)); /* Read the run specification. */ for (ild = 0; ild < ild_max; ild++) { /* Allocate memory for the run parameters. */ ld[ild] = (int*) nfft_malloc(5*sizeof(int)); /* Read number of source nodes. */ fscanf(stdin,"L=%d ",&ld[ild][0]); fprintf(stdout,"%d\n",ld[ild][0]); l_max = MAX(l_max,ld[ild][0]); /* Read number of target nodes. */ fscanf(stdin,"D=%d ",&ld[ild][1]); fprintf(stdout,"%d\n",ld[ild][1]); d_max = MAX(d_max,ld[ild][1]); /* Determine whether direct and fast algorithm shall be compared. */ fscanf(stdin,"compare=%d ",&ld[ild][2]); fprintf(stdout,"%d\n",ld[ild][2]); /* Check if precomputation for the direct algorithm is used. */ if (ld[ild][2] == YES) { /* Read whether the precomputed version shall also be used. */ fscanf(stdin,"precomputed=%d\n",&ld[ild][3]); fprintf(stdout,"%d\n",ld[ild][3]); /* Read the number of repetitions over which measurements are * averaged. */ fscanf(stdin,"repetitions=%d\n",&ld[ild][4]); fprintf(stdout,"%d\n",ld[ild][4]); /* Update ld_max_prec and l_max_prec. */ if (ld[ild][3] == YES) { /* Update ld_max_prec. */ ld_max_prec = MAX(ld_max_prec,ld[ild][0]*ld[ild][1]); /* Update l_max_prec. */ l_max_prec = MAX(l_max_prec,ld[ild][0]); /* Turn on the precomputation for the direct algorithm. */ precompute = YES; } } else { /* Set default value for the number of repetitions. */ ld[ild][4] = 1; } } /* Allocate memory for data structures. */ b = (fftw_complex*) nfft_malloc(l_max*sizeof(fftw_complex)); eta = (double*) nfft_malloc(2*l_max*sizeof(double)); f_hat = (fftw_complex*) nfft_malloc(NFSFT_F_HAT_SIZE(m_max)*sizeof(fftw_complex)); a = (fftw_complex*) nfft_malloc((m_max+1)*sizeof(fftw_complex)); xi = (double*) nfft_malloc(2*d_max*sizeof(double)); f_m = (fftw_complex*) nfft_malloc(d_max*sizeof(fftw_complex)); f = (fftw_complex*) nfft_malloc(d_max*sizeof(fftw_complex)); /* Allocate memory for precomputed data. */ if (precompute == YES) { prec = (fftw_complex*) nfft_malloc(ld_max_prec*sizeof(fftw_complex)); } /* Generate random source nodes and weights. */ for (l = 0; l < l_max; l++) { b[l] = (((double)rand())/RAND_MAX) - 0.5; eta[2*l] = (((double)rand())/RAND_MAX) - 0.5; eta[2*l+1] = acos(2.0*(((double)rand())/RAND_MAX) - 1.0)/(K2PI); } /* Generate random target nodes. */ for (d = 0; d < d_max; d++) { xi[2*d] = (((double)rand())/RAND_MAX) - 0.5; xi[2*d+1] = acos(2.0*(((double)rand())/RAND_MAX) - 1.0)/(K2PI); } /* Do precomputation. */ nfsft_precompute(m_max,threshold, ((use_nfsft==NO)?(NFSFT_NO_FAST_ALGORITHM):(0U/*NFSFT_NO_DIRECT_ALGORITHM*/)), 0U); /* Process all parameter sets. */ for (ip = 0; ip < ip_max; ip++) { /* Compute kernel coeffcients up to the maximum cut-off degree m_max. */ switch (kt) { case KT_ABEL_POISSON: /* Compute Fourier-Legendre coefficients for the Poisson kernel. */ for (k = 0; k <= m_max; k++) a[k] = SYMBOL_ABEL_POISSON(k,p[ip][0]); break; case KT_SINGULARITY: /* Compute Fourier-Legendre coefficients for the singularity * kernel. */ for (k = 0; k <= m_max; k++) a[k] = SYMBOL_SINGULARITY(k,p[ip][0]); break; case KT_LOC_SUPP: /* Compute Fourier-Legendre coefficients for the locally supported * kernel. */ a[0] = 1.0; if (1 <= m_max) a[1] = ((p[ip][1]+1+p[ip][0])/(p[ip][1]+2.0))*a[0]; for (k = 2; k <= m_max; k++) a[k] = (1.0/(k+p[ip][1]+1))*((2*k-1)*p[ip][0]*a[k-1] - (k-p[ip][1]-2)*a[k-2]); break; case KT_GAUSSIAN: /* Fourier-Legendre coefficients */ steed = (double*) nfft_malloc((m_max+1)*sizeof(double)); smbi(2.0*p[ip][0],0.5,m_max+1,2,steed); for (k = 0; k <= m_max; k++) a[k] = K2PI*(sqrt(KPI/p[ip][0]))*steed[k]; nfft_free(steed); break; } /* Normalize Fourier-Legendre coefficients. */ for (k = 0; k <= m_max; k++) a[k] *= (2*k+1)/(K4PI); /* Process all node sets. */ for (ild = 0; ild < ild_max; ild++) { /* Check if the fast algorithm shall be used. */ if (ld[ild][2] != NO) { /* Check if the direct algorithm with precomputation should be * tested. */ if (ld[ild][3] != NO) { /* Get pointer to start of data. */ ptr = prec; /* Calculate increment from one row to the next. */ rinc = l_max_prec-ld[ild][0]; /* Process al target nodes. */ for (d = 0; d < ld[ild][1]; d++) { /* Process all source nodes. */ for (l = 0; l < ld[ild][0]; l++) { /* Compute inner product between current source and target * node. */ temp = innerProduct(eta[2*l],eta[2*l+1],xi[2*d],xi[2*d+1]); /* Switch by the kernel type. */ switch (kt) { case KT_ABEL_POISSON: /* Evaluate the Poisson kernel for the current value. */ *ptr++ = poissonKernel(temp,p[ip][0]); break; case KT_SINGULARITY: /* Evaluate the singularity kernel for the current * value. */ *ptr++ = singularityKernel(temp,p[ip][0]); break; case KT_LOC_SUPP: /* Evaluate the localized kernel for the current * value. */ *ptr++ = locallySupportedKernel(temp,p[ip][0],p[ip][1]); break; case KT_GAUSSIAN: /* Evaluate the spherical Gaussian kernel for the current * value. */ *ptr++ = gaussianKernel(temp,p[ip][0]); break; } } /* Increment pointer for next row. */ ptr += rinc; } /* Initialize cumulative time variable. */ t_dp = 0.0; /* Initialize time measurement. */ t0 = getticks(); /* Cycle through all runs. */ for (i = 0; i < ld[ild][4]; i++) { /* Reset pointer to start of precomputed data. */ ptr = prec; /* Calculate increment from one row to the next. */ rinc = l_max_prec-ld[ild][0]; /* Check if the localized kernel is used. */ if (kt == KT_LOC_SUPP) { /* Perform final summation */ /* Calculate the multiplicative constant. */ constant = ((p[ip][1]+1)/(K2PI*pow(1-p[ip][0],p[ip][1]+1))); /* Process all target nodes. */ for (d = 0; d < ld[ild][1]; d++) { /* Initialize function value. */ f[d] = 0.0; /* Process all source nodes. */ for (l = 0; l < ld[ild][0]; l++) f[d] += b[l]*(*ptr++); /* Multiply with the constant. */ f[d] *= constant; /* Proceed to next row. */ ptr += rinc; } } else { /* Process all target nodes. */ for (d = 0; d < ld[ild][1]; d++) { /* Initialize function value. */ f[d] = 0.0; /* Process all source nodes. */ for (l = 0; l < ld[ild][0]; l++) f[d] += b[l]*(*ptr++); /* Proceed to next row. */ ptr += rinc; } } } /* Calculate the time needed. */ t1 = getticks(); t_dp = nfft_elapsed_seconds(t1,t0); /* Calculate average time needed. */ t_dp = t_dp/((double)ld[ild][4]); } else { /* Initialize cumulative time variable with dummy value. */ t_dp = -1.0; } /* Initialize cumulative time variable. */ t_d = 0.0; /* Initialize time measurement. */ t0 = getticks(); /* Cycle through all runs. */ for (i = 0; i < ld[ild][4]; i++) { /* Switch by the kernel type. */ switch (kt) { case KT_ABEL_POISSON: /* Process all target nodes. */ for (d = 0; d < ld[ild][1]; d++) { /* Initialize function value. */ f[d] = 0.0; /* Process all source nodes. */ for (l = 0; l < ld[ild][0]; l++) { /* Compute the inner product for the current source and * target nodes. */ temp = innerProduct(eta[2*l],eta[2*l+1],xi[2*d],xi[2*d+1]); /* Evaluate the Poisson kernel for the current value and add * to the result. */ f[d] += b[l]*poissonKernel(temp,p[ip][0]); } } break; case KT_SINGULARITY: /* Process all target nodes. */ for (d = 0; d < ld[ild][1]; d++) { /* Initialize function value. */ f[d] = 0.0; /* Process all source nodes. */ for (l = 0; l < ld[ild][0]; l++) { /* Compute the inner product for the current source and * target nodes. */ temp = innerProduct(eta[2*l],eta[2*l+1],xi[2*d],xi[2*d+1]); /* Evaluate the Poisson kernel for the current value and add * to the result. */ f[d] += b[l]*singularityKernel(temp,p[ip][0]); } } break; case KT_LOC_SUPP: /* Calculate the multiplicative constant. */ constant = ((p[ip][1]+1)/(K2PI*pow(1-p[ip][0],p[ip][1]+1))); /* Process all target nodes. */ for (d = 0; d < ld[ild][1]; d++) { /* Initialize function value. */ f[d] = 0.0; /* Process all source nodes. */ for (l = 0; l < ld[ild][0]; l++) { /* Compute the inner product for the current source and * target nodes. */ temp = innerProduct(eta[2*l],eta[2*l+1],xi[2*d],xi[2*d+1]); /* Evaluate the Poisson kernel for the current value and add * to the result. */ f[d] += b[l]*locallySupportedKernel(temp,p[ip][0],p[ip][1]); } /* Multiply result with constant. */ f[d] *= constant; } break; case KT_GAUSSIAN: /* Process all target nodes. */ for (d = 0; d < ld[ild][1]; d++) { /* Initialize function value. */ f[d] = 0.0; /* Process all source nodes. */ for (l = 0; l < ld[ild][0]; l++) { /* Compute the inner product for the current source and * target nodes. */ temp = innerProduct(eta[2*l],eta[2*l+1],xi[2*d],xi[2*d+1]); /* Evaluate the Poisson kernel for the current value and add * to the result. */ f[d] += b[l]*gaussianKernel(temp,p[ip][0]); } } break; } } /* Calculate and add the time needed. */ t1 = getticks(); t_d = nfft_elapsed_seconds(t1,t0); /* Calculate average time needed. */ t_d = t_d/((double)ld[ild][4]); } else { /* Initialize cumulative time variable with dummy value. */ t_d = -1.0; t_dp = -1.0; } /* Initialize error and cumulative time variables for the fast * algorithm. */ err_fd = -1.0; err_f = -1.0; t_fd = -1.0; t_f = -1.0; /* Process all cut-off bandwidths. */ for (im = 0; im < im_max; im++) { /* Init transform plans. */ nfsft_init_guru(&plan_adjoint, m[im],ld[ild][0], ((use_nfft!=0)?(0U):(NFSFT_USE_NDFT)) | ((use_fpt!=0)?(0U):(NFSFT_USE_DPT)), PRE_PHI_HUT | PRE_PSI | FFTW_INIT | FFT_OUT_OF_PLACE, cutoff); nfsft_init_guru(&plan,m[im],ld[ild][1], ((use_nfft!=0)?(0U):(NFSFT_USE_NDFT)) | ((use_fpt!=0)?(0U):(NFSFT_USE_DPT)), PRE_PHI_HUT | PRE_PSI | FFTW_INIT | FFT_OUT_OF_PLACE, cutoff); plan_adjoint.f_hat = f_hat; plan_adjoint.x = eta; plan_adjoint.f = b; plan.f_hat = f_hat; plan.x = xi; plan.f = f_m; nfsft_precompute_x(&plan_adjoint); nfsft_precompute_x(&plan); /* Check if direct algorithm shall also be tested. */ if (use_nfsft == BOTH) { /* Initialize cumulative time variable. */ t_fd = 0.0; /* Initialize time measurement. */ t0 = getticks(); /* Cycle through all runs. */ for (i = 0; i < ld[ild][4]; i++) { /* Execute adjoint direct NDSFT transformation. */ nfsft_adjoint_direct(&plan_adjoint); /* Multiplication with the Fourier-Legendre coefficients. */ for (k = 0; k <= m[im]; k++) for (n = -k; n <= k; n++) f_hat[NFSFT_INDEX(k,n,&plan_adjoint)] *= a[k]; /* Execute direct NDSFT transformation. */ nfsft_trafo_direct(&plan); } /* Calculate and add the time needed. */ t1 = getticks(); t_fd = nfft_elapsed_seconds(t1,t0); /* Calculate average time needed. */ t_fd = t_fd/((double)ld[ild][4]); /* Check if error E_infty should be computed. */ if (ld[ild][2] != NO) { /* Compute the error E_infinity. */ err_fd = X(error_l_infty_1_complex)(f, f_m, ld[ild][1], b, ld[ild][0]); } } /* Check if the fast NFSFT algorithm shall also be tested. */ if (use_nfsft != NO) { /* Initialize cumulative time variable for the NFSFT algorithm. */ t_f = 0.0; } else { /* Initialize cumulative time variable for the direct NDSFT * algorithm. */ t_fd = 0.0; } /* Initialize time measurement. */ t0 = getticks(); /* Cycle through all runs. */ for (i = 0; i < ld[ild][4]; i++) { /* Check if the fast NFSFT algorithm shall also be tested. */ if (use_nfsft != NO) { /* Execute the adjoint NFSFT transformation. */ nfsft_adjoint(&plan_adjoint); } else { /* Execute the adjoint direct NDSFT transformation. */ nfsft_adjoint_direct(&plan_adjoint); } /* Multiplication with the Fourier-Legendre coefficients. */ for (k = 0; k <= m[im]; k++) for (n = -k; n <= k; n++) f_hat[NFSFT_INDEX(k,n,&plan_adjoint)] *= a[k]; /* Check if the fast NFSFT algorithm shall also be tested. */ if (use_nfsft != NO) { /* Execute the NFSFT transformation. */ nfsft_trafo(&plan); } else { /* Execute the NDSFT transformation. */ nfsft_trafo_direct(&plan); } } /* Check if the fast NFSFT algorithm has been used. */ t1 = getticks(); if (use_nfsft != NO) t_f = nfft_elapsed_seconds(t1,t0); else t_fd = nfft_elapsed_seconds(t1,t0); /* Check if the fast NFSFT algorithm has been used. */ if (use_nfsft != NO) { /* Calculate average time needed. */ t_f = t_f/((double)ld[ild][4]); } else { /* Calculate average time needed. */ t_fd = t_fd/((double)ld[ild][4]); } /* Check if error E_infty should be computed. */ if (ld[ild][2] != NO) { /* Check if the fast NFSFT algorithm has been used. */ if (use_nfsft != NO) { /* Compute the error E_infinity. */ err_f = X(error_l_infty_1_complex)(f, f_m, ld[ild][1], b, ld[ild][0]); } else { /* Compute the error E_infinity. */ err_fd = X(error_l_infty_1_complex)(f, f_m, ld[ild][1], b, ld[ild][0]); } } /* Print out the error measurements. */ fprintf(stdout,"%e\n%e\n%e\n%e\n%e\n%e\n\n",t_d,t_dp,t_fd,t_f,err_fd, err_f); /* Finalize the NFSFT plans */ nfsft_finalize(&plan_adjoint); nfsft_finalize(&plan); } /* for (im = 0; im < im_max; im++) - Process all cut-off * bandwidths.*/ } /* for (ild = 0; ild < ild_max; ild++) - Process all node sets. */ } /* for (ip = 0; ip < ip_max; ip++) - Process all parameter sets. */ /* Delete precomputed data. */ nfsft_forget(); /* Check if memory for precomputed data of the matrix K has been * allocated. */ if (precompute == YES) { /* Free memory for precomputed matrix K. */ nfft_free(prec); } /* Free data arrays. */ nfft_free(f); nfft_free(f_m); nfft_free(xi); nfft_free(eta); nfft_free(a); nfft_free(f_hat); nfft_free(b); /* Free memory for node sets. */ for (ild = 0; ild < ild_max; ild++) nfft_free(ld[ild]); nfft_free(ld); /* Free memory for cut-off bandwidths. */ nfft_free(m); /* Free memory for parameter sets. */ for (ip = 0; ip < ip_max; ip++) nfft_free(p[ip]); nfft_free(p); } /* for (tc = 0; tc < tc_max; tc++) - Process each testcase. */ /* Return exit code for successful run. */ return EXIT_SUCCESS; }
void bench_openmp(FILE *infile, int m, int psi_flag) { nfft_plan p; int *N; int *n; int M, d, trafo_adjoint; int t, j; double re,im; ticks t0, t1; double tt_total, tt_preonepsi; fscanf(infile, "%d %d", &d, &trafo_adjoint); N = malloc(d*sizeof(int)); n = malloc(d*sizeof(int)); for (t=0; t<d; t++) fscanf(infile, "%d", N+t); for (t=0; t<d; t++) fscanf(infile, "%d", n+t); fscanf(infile, "%d", &M); #ifdef _OPENMP fftw_import_wisdom_from_filename("nfft_benchomp_detail_threads.plan"); #else fftw_import_wisdom_from_filename("nfft_benchomp_detail_single.plan"); #endif /** init an d-dimensional plan */ nfft_init_guru(&p, d, N, M, n, m, PRE_PHI_HUT| psi_flag | MALLOC_X | MALLOC_F_HAT| MALLOC_F| FFTW_INIT | FFT_OUT_OF_PLACE, FFTW_MEASURE| FFTW_DESTROY_INPUT); #ifdef _OPENMP fftw_export_wisdom_to_filename("nfft_benchomp_detail_threads.plan"); #else fftw_export_wisdom_to_filename("nfft_benchomp_detail_single.plan"); #endif for (j=0; j < p.M_total; j++) { for (t=0; t < p.d; t++) fscanf(infile, "%lg", p.x+p.d*j+t); } if (trafo_adjoint==0) { for (j=0; j < p.N_total; j++) { fscanf(infile, "%lg %lg", &re, &im); p.f_hat[j] = re + _Complex_I * im; } } else { for (j=0; j < p.M_total; j++) { fscanf(infile, "%lg %lg", &re, &im); p.f[j] = re + _Complex_I * im; } } t0 = getticks(); /** precompute psi, the entries of the matrix B */ if(p.nfft_flags & PRE_ONE_PSI) nfft_precompute_one_psi(&p); t1 = getticks(); tt_preonepsi = nfft_elapsed_seconds(t1,t0); if (trafo_adjoint==0) nfft_trafo(&p); else nfft_adjoint(&p); t1 = getticks(); tt_total = nfft_elapsed_seconds(t1,t0); #ifndef MEASURE_TIME p.MEASURE_TIME_t[0] = 0.0; p.MEASURE_TIME_t[2] = 0.0; #endif #ifndef MEASURE_TIME_FFTW p.MEASURE_TIME_t[1] = 0.0; #endif printf("%.6e %.6e %6e %.6e %.6e %.6e\n", tt_preonepsi, p.MEASURE_TIME_t[0], p.MEASURE_TIME_t[1], p.MEASURE_TIME_t[2], tt_total-tt_preonepsi-p.MEASURE_TIME_t[0]-p.MEASURE_TIME_t[1]-p.MEASURE_TIME_t[2], tt_total); // printf("%.6e\n", tt); free(N); free(n); /** finalise the one dimensional plan */ nfft_finalize(&p); }