void nfsft_benchomp_createdataset(unsigned int trafo_adjoint, int N, int M) { int t, j, k, n; R *x; C *f, *f_hat; int N_total = (2*N+2) * (2*N+2); nfsft_plan ptemp; nfsft_init_guru(&ptemp, N, M, NFSFT_MALLOC_X | NFSFT_MALLOC_F | NFSFT_MALLOC_F_HAT | NFSFT_NORMALIZED | NFSFT_PRESERVE_F_HAT, PRE_PHI_HUT | PRE_PSI | FFTW_INIT | FFT_OUT_OF_PLACE, 6); x = (R*) nfft_malloc(2*M*sizeof(R)); f = (C*) nfft_malloc(M*sizeof(C)); f_hat = (C*) nfft_malloc(N_total*sizeof(C)); /* init pseudo-random nodes */ for (j = 0; j < M; j++) { x[2*j]= X(drand48)() - K(0.5); x[2*j+1]= K(0.5) * X(drand48)(); } if (trafo_adjoint==0) { for (k = 0; k <= N; k++) for (n = -k; n <= k; n++) nfft_vrand_unit_complex(f_hat+NFSFT_INDEX(k,n,&ptemp),1); } else { nfft_vrand_unit_complex(f,M); } printf("%d %d %d\n", trafo_adjoint, N, M); for (j=0; j < M; j++) { for (t=0; t < 2; t++) printf("%.16e ", x[2*j+t]); printf("\n"); } if (trafo_adjoint==0) { for (k = 0; k <= N; k++) for (n = -k; n <= k; n++) printf("%.16e %.16e\n", creal(f_hat[NFSFT_INDEX(k,n,&ptemp)]), cimag(f_hat[NFSFT_INDEX(k,n,&ptemp)])); } else { for (j=0; j < M; j++) printf("%.16e %.16e\n", creal(f[j]), cimag(f[j])); } nfft_free(x); nfft_free(f); nfft_free(f_hat); }
void bench_openmp_readfile(FILE *infile, int *trafo_adjoint, int *N, int *M, double **x, C **f_hat, C **f) { double re,im; int k, n, j, t; nfsft_plan plan; fscanf(infile, "%d %d %d", trafo_adjoint, N, M); *x = (double *)nfft_malloc(2*(*M)*sizeof(double)); *f_hat = (C*)nfft_malloc((2*(*N)+2) * (2*(*N)+2) * sizeof(C)); *f = (C*)nfft_malloc((*M)*sizeof(C)); memset(*f_hat,0U,(2*(*N)+2) * (2*(*N)+2) * sizeof(C)); memset(*f,0U,(*M)*sizeof(C)); #ifdef _OPENMP fftw_import_wisdom_from_filename("nfsft_benchomp_detail_threads.plan"); #else fftw_import_wisdom_from_filename("nfsft_benchomp_detail_single.plan"); #endif nfsft_init_guru(&plan, *N, *M, NFSFT_MALLOC_X | NFSFT_MALLOC_F | NFSFT_MALLOC_F_HAT | NFSFT_NORMALIZED | NFSFT_PRESERVE_F_HAT, PRE_PHI_HUT | FFTW_INIT | FFT_OUT_OF_PLACE, 6); #ifdef _OPENMP fftw_export_wisdom_to_filename("nfsft_benchomp_detail_threads.plan"); #else fftw_export_wisdom_to_filename("nfsft_benchomp_detail_single.plan"); #endif for (j=0; j < *M; j++) for (t=0; t < 2; t++) fscanf(infile, "%lg", (*x)+2*j+t); if (trafo_adjoint==0) { for (k = 0; k <= *N; k++) for (n = -k; n <= k; n++) { fscanf(infile, "%lg %lg", &re, &im); (*f_hat)[NFSFT_INDEX(k,n,&plan)] = re + _Complex_I * im; } } else { for (j=0; j < *M; j++) { fscanf(infile, "%lg %lg", &re, &im); (*f)[j] = re + _Complex_I * im; } } nfsft_finalize(&plan); }
/** * The main program. * * \param argc The number of arguments * \param argv An array containing the arguments as C-strings * * \return Exit code * * \author Jens Keiner */ int main (int argc, char **argv) { int T; int N; int M; int M2; int t; /* Index variable for testcases */ nfsft_plan plan; /* NFSFT plan */ nfsft_plan plan2; /* NFSFT plan */ solver_plan_complex iplan; /* NFSFT plan */ int j; /* */ int k; /* */ int m; /* */ int use_nfsft; /* */ int use_nfft; /* */ int use_fpt; /* */ int cutoff; /**< The current NFFT cut-off parameter */ double threshold; /**< The current NFSFT threshold parameter */ double re; double im; double a; double *scratch; double xs; double *ys; double *temp; double _Complex *temp2; int qlength; double *qweights; fftw_plan fplan; fpt_set set; int npt; int npt_exp; double *alpha, *beta, *gamma; /* Read the number of testcases. */ fscanf(stdin,"testcases=%d\n",&T); fprintf(stderr,"%d\n",T); /* Process each testcase. */ for (t = 0; t < T; t++) { /* Check if the fast transform shall be used. */ fscanf(stdin,"nfsft=%d\n",&use_nfsft); fprintf(stderr,"%d\n",use_nfsft); if (use_nfsft != NO) { /* Check if the NFFT shall be used. */ fscanf(stdin,"nfft=%d\n",&use_nfft); fprintf(stderr,"%d\n",use_nfsft); if (use_nfft != NO) { /* Read the cut-off parameter. */ fscanf(stdin,"cutoff=%d\n",&cutoff); fprintf(stderr,"%d\n",cutoff); } else { /* TODO remove this */ /* Initialize unused variable with dummy value. */ cutoff = 1; } /* Check if the fast polynomial transform shall be used. */ fscanf(stdin,"fpt=%d\n",&use_fpt); fprintf(stderr,"%d\n",use_fpt); if (use_fpt != NO) { /* Read the NFSFT threshold parameter. */ fscanf(stdin,"threshold=%lf\n",&threshold); fprintf(stderr,"%lf\n",threshold); } else { /* TODO remove this */ /* Initialize unused variable with dummy value. */ threshold = 1000.0; } } else { /* TODO remove this */ /* Set dummy values. */ use_nfft = NO; use_fpt = NO; cutoff = 3; threshold = 1000.0; } /* Read the bandwidth. */ fscanf(stdin,"bandwidth=%d\n",&N); fprintf(stderr,"%d\n",N); /* Do precomputation. */ nfsft_precompute(N,threshold, ((use_nfsft==NO)?(NFSFT_NO_FAST_ALGORITHM):(0U/*NFSFT_NO_DIRECT_ALGORITHM*/)), 0U); /* Read the number of nodes. */ fscanf(stdin,"nodes=%d\n",&M); fprintf(stderr,"%d\n",M); /* */ if ((N+1)*(N+1) > M) { X(next_power_of_2_exp)(N, &npt, &npt_exp); fprintf(stderr, "npt = %d, npt_exp = %d\n", npt, npt_exp); fprintf(stderr,"Optimal interpolation!\n"); scratch = (double*) nfft_malloc(4*sizeof(double)); ys = (double*) nfft_malloc((N+1)*sizeof(double)); temp = (double*) nfft_malloc((2*N+1)*sizeof(double)); temp2 = (double _Complex*) nfft_malloc((N+1)*sizeof(double _Complex)); a = 0.0; for (j = 0; j <= N; j++) { xs = 2.0 + (2.0*j)/(N+1); ys[j] = (2.0-((j == 0)?(1.0):(0.0)))*4.0*nfft_bspline(4,xs,scratch); //fprintf(stdout,"%3d: g(%le) = %le\n",j,xs,ys[j]); a += ys[j]; } //fprintf(stdout,"a = %le\n",a); for (j = 0; j <= N; j++) { ys[j] *= 1.0/a; } qlength = 2*N+1; qweights = (double*) nfft_malloc(qlength*sizeof(double)); fplan = fftw_plan_r2r_1d(N+1, qweights, qweights, FFTW_REDFT00, 0U); for (j = 0; j < N+1; j++) { qweights[j] = -2.0/(4*j*j-1); } fftw_execute(fplan); qweights[0] *= 0.5; for (j = 0; j < N+1; j++) { qweights[j] *= 1.0/(2.0*N+1.0); qweights[2*N+1-1-j] = qweights[j]; } fplan = fftw_plan_r2r_1d(2*N+1, temp, temp, FFTW_REDFT00, 0U); for (j = 0; j <= N; j++) { temp[j] = ((j==0 || j == 2*N)?(1.0):(0.5))*ys[j]; } for (j = N+1; j < 2*N+1; j++) { temp[j] = 0.0; } fftw_execute(fplan); for (j = 0; j < 2*N+1; j++) { temp[j] *= qweights[j]; } fftw_execute(fplan); for (j = 0; j < 2*N+1; j++) { temp[j] *= ((j==0 || j == 2*N)?(1.0):(0.5)); if (j <= N) { temp2[j] = temp[j]; } } set = fpt_init(1, npt_exp, 0U); alpha = (double*) nfft_malloc((N+2)*sizeof(double)); beta = (double*) nfft_malloc((N+2)*sizeof(double)); gamma = (double*) nfft_malloc((N+2)*sizeof(double)); alpha_al_row(alpha, N, 0); beta_al_row(beta, N, 0); gamma_al_row(gamma, N, 0); fpt_precompute(set, 0, alpha, beta, gamma, 0, 1000.0); fpt_transposed(set,0, temp2, temp2, N, 0U); fpt_finalize(set); nfft_free(alpha); nfft_free(beta); nfft_free(gamma); fftw_destroy_plan(fplan); nfft_free(scratch); nfft_free(qweights); nfft_free(ys); nfft_free(temp); } /* Init transform plans. */ nfsft_init_guru(&plan, N, M, ((use_nfft!=0)?(0U):(NFSFT_USE_NDFT)) | ((use_fpt!=0)?(0U):(NFSFT_USE_DPT)) | NFSFT_MALLOC_F | NFSFT_MALLOC_X | NFSFT_MALLOC_F_HAT | NFSFT_NORMALIZED | NFSFT_ZERO_F_HAT, PRE_PHI_HUT | PRE_PSI | FFTW_INIT | FFT_OUT_OF_PLACE, cutoff); if ((N+1)*(N+1) > M) { solver_init_advanced_complex(&iplan, (nfft_mv_plan_complex*)(&plan), CGNE | PRECOMPUTE_DAMP); } else { solver_init_advanced_complex(&iplan, (nfft_mv_plan_complex*)(&plan), CGNR | PRECOMPUTE_WEIGHT | PRECOMPUTE_DAMP); } /* Read the nodes and function values. */ for (j = 0; j < M; j++) { fscanf(stdin,"%le %le %le %le\n",&plan.x[2*j+1],&plan.x[2*j],&re,&im); plan.x[2*j+1] = plan.x[2*j+1]/(2.0*PI); plan.x[2*j] = plan.x[2*j]/(2.0*PI); if (plan.x[2*j] >= 0.5) { plan.x[2*j] = plan.x[2*j] - 1; } iplan.y[j] = re + _Complex_I * im; fprintf(stderr,"%le %le %le %le\n",plan.x[2*j+1],plan.x[2*j], creal(iplan.y[j]),cimag(iplan.y[j])); } /* Read the number of nodes. */ fscanf(stdin,"nodes_eval=%d\n",&M2); fprintf(stderr,"%d\n",M2); /* Init transform plans. */ nfsft_init_guru(&plan2, N, M2, ((use_nfft!=0)?(0U):(NFSFT_USE_NDFT)) | ((use_fpt!=0)?(0U):(NFSFT_USE_DPT)) | NFSFT_MALLOC_F | NFSFT_MALLOC_X | NFSFT_MALLOC_F_HAT | NFSFT_NORMALIZED | NFSFT_ZERO_F_HAT, PRE_PHI_HUT | PRE_PSI | FFTW_INIT | FFT_OUT_OF_PLACE, cutoff); /* Read the nodes and function values. */ for (j = 0; j < M2; j++) { fscanf(stdin,"%le %le\n",&plan2.x[2*j+1],&plan2.x[2*j]); plan2.x[2*j+1] = plan2.x[2*j+1]/(2.0*PI); plan2.x[2*j] = plan2.x[2*j]/(2.0*PI); if (plan2.x[2*j] >= 0.5) { plan2.x[2*j] = plan2.x[2*j] - 1; } fprintf(stderr,"%le %le\n",plan2.x[2*j+1],plan2.x[2*j]); } nfsft_precompute_x(&plan); nfsft_precompute_x(&plan2); /* Frequency weights. */ if ((N+1)*(N+1) > M) { /* Compute Voronoi weights. */ //nfft_voronoi_weights_S2(iplan.w, plan.x, M); /* Print out Voronoi weights. */ /*a = 0.0; for (j = 0; j < plan.M_total; j++) { fprintf(stderr,"%le\n",iplan.w[j]); a += iplan.w[j]; } fprintf(stderr,"sum = %le\n",a);*/ for (j = 0; j < plan.N_total; j++) { iplan.w_hat[j] = 0.0; } for (k = 0; k <= N; k++) { for (j = -k; j <= k; j++) { iplan.w_hat[NFSFT_INDEX(k,j,&plan)] = 1.0/(pow(k+1.0,2.0)); /*temp2[j]*/; } } } else { for (j = 0; j < plan.N_total; j++) { iplan.w_hat[j] = 0.0; } for (k = 0; k <= N; k++) { for (j = -k; j <= k; j++) { iplan.w_hat[NFSFT_INDEX(k,j,&plan)] = 1/(pow(k+1.0,2.5)); } } /* Compute Voronoi weights. */ nfft_voronoi_weights_S2(iplan.w, plan.x, M); /* Print out Voronoi weights. */ a = 0.0; for (j = 0; j < plan.M_total; j++) { fprintf(stderr,"%le\n",iplan.w[j]); a += iplan.w[j]; } fprintf(stderr,"sum = %le\n",a); } fprintf(stderr, "N_total = %d\n", plan.N_total); fprintf(stderr, "M_total = %d\n", plan.M_total); /* init some guess */ for (k = 0; k < plan.N_total; k++) { iplan.f_hat_iter[k] = 0.0; } /* inverse trafo */ solver_before_loop_complex(&iplan); /*for (k = 0; k < plan.M_total; k++) { printf("%le %le\n",creal(iplan.r_iter[k]),cimag(iplan.r_iter[k])); }*/ for (m = 0; m < 29; m++) { fprintf(stderr,"Residual ||r||=%e,\n",sqrt(iplan.dot_r_iter)); solver_loop_one_step_complex(&iplan); } /*NFFT_SWAP_complex(iplan.f_hat_iter, plan.f_hat); nfsft_trafo(&plan); NFFT_SWAP_complex(iplan.f_hat_iter, plan.f_hat); a = 0.0; b = 0.0; for (k = 0; k < plan.M_total; k++) { printf("%le %le %le\n",cabs(iplan.y[k]),cabs(plan.f[k]), cabs(iplan.y[k]-plan.f[k])); a += cabs(iplan.y[k]-plan.f[k])*cabs(iplan.y[k]-plan.f[k]); b += cabs(iplan.y[k])*cabs(iplan.y[k]); } fprintf(stderr,"relative error in 2-norm: %le\n",a/b);*/ NFFT_SWAP_complex(iplan.f_hat_iter, plan2.f_hat); nfsft_trafo(&plan2); NFFT_SWAP_complex(iplan.f_hat_iter, plan2.f_hat); for (k = 0; k < plan2.M_total; k++) { fprintf(stdout,"%le\n",cabs(plan2.f[k])); } solver_finalize_complex(&iplan); nfsft_finalize(&plan); nfsft_finalize(&plan2); /* Delete precomputed data. */ nfsft_forget(); if ((N+1)*(N+1) > M) { nfft_free(temp2); } } /* Process each testcase. */ /* Return exit code for successful run. */ return EXIT_SUCCESS; }
void bench_openmp(int trafo_adjoint, int N, int M, double *x, C *f_hat, C *f, int m, int nfsft_flags, int psi_flags) { nfsft_plan plan; int k, n; // int N, M, trafo_adjoint; int t, j; ticks t0, t1; double tt_total, tt_pre; // fscanf(infile, "%d %d %d", &trafo_adjoint, &N, &M); /*#ifdef _OPENMP fftw_import_wisdom_from_filename("nfsft_benchomp_detail_threads.plan"); #else fftw_import_wisdom_from_filename("nfsft_benchomp_detail_single.plan"); #endif*/ /* precomputation (for fast polynomial transform) */ // nfsft_precompute(N,1000.0,0U,0U); /* Initialize transform plan using the guru interface. All input and output * arrays are allocated by nfsft_init_guru(). Computations are performed with * respect to L^2-normalized spherical harmonics Y_k^n. The array of spherical * Fourier coefficients is preserved during transformations. The NFFT uses a * cut-off parameter m = 6. See the NFFT 3 manual for details. */ nfsft_init_guru(&plan, N, M, nfsft_flags | NFSFT_MALLOC_X | NFSFT_MALLOC_F | NFSFT_MALLOC_F_HAT | NFSFT_NORMALIZED | NFSFT_PRESERVE_F_HAT, PRE_PHI_HUT | psi_flags | FFTW_INIT | FFT_OUT_OF_PLACE, m); /*#ifdef _OPENMP fftw_export_wisdom_to_filename("nfsft_benchomp_detail_threads.plan"); #else fftw_export_wisdom_to_filename("nfsft_benchomp_detail_single.plan"); #endif*/ for (j=0; j < plan.M_total; j++) { for (t=0; t < 2; t++) // fscanf(infile, "%lg", plan.x+2*j+t); plan.x[2*j+t] = x[2*j+t]; } if (trafo_adjoint==0) { memset(plan.f_hat,0U,plan.N_total*sizeof(double _Complex)); for (k = 0; k <= plan.N; k++) for (n = -k; n <= k; n++) { // fscanf(infile, "%lg %lg", &re, &im); // plan.f_hat[NFSFT_INDEX(k,n,&plan)] = re + _Complex_I * im; plan.f_hat[NFSFT_INDEX(k,n,&plan)] = f_hat[NFSFT_INDEX(k,n,&plan)]; } } else { for (j=0; j < plan.M_total; j++) { // fscanf(infile, "%lg %lg", &re, &im); // plan.f[j] = re + _Complex_I * im; plan.f[j] = f[j]; } memset(plan.f_hat,0U,plan.N_total*sizeof(double _Complex)); } t0 = getticks(); /* precomputation (for NFFT, node-dependent) */ nfsft_precompute_x(&plan); t1 = getticks(); tt_pre = nfft_elapsed_seconds(t1,t0); if (trafo_adjoint==0) nfsft_trafo(&plan); else nfsft_adjoint(&plan); t1 = getticks(); tt_total = nfft_elapsed_seconds(t1,t0); #ifndef MEASURE_TIME plan.MEASURE_TIME_t[0] = 0.0; plan.MEASURE_TIME_t[2] = 0.0; #endif #ifndef MEASURE_TIME_FFTW plan.MEASURE_TIME_t[1] = 0.0; #endif printf("%.6e %.6e %6e %.6e %.6e %.6e\n", tt_pre, plan.MEASURE_TIME_t[0], plan.MEASURE_TIME_t[1], plan.MEASURE_TIME_t[2], tt_total-tt_pre-plan.MEASURE_TIME_t[0]-plan.MEASURE_TIME_t[1]-plan.MEASURE_TIME_t[2], tt_total); /** finalise the one dimensional plan */ nfsft_finalize(&plan); }
/** * The main program. * * \param argc The number of arguments * \param argv An array containing the arguments as C-strings * * \return Exit code */ int main (int argc, char **argv) { int tc; /**< The index variable for testcases */ int tc_max; /**< The number of testcases */ int *NQ; /**< The array containing the cut-off degrees * \f$N\f$ */ int NQ_max; /**< The maximum cut-off degree \f$N\f$ for the* current testcase */ int *SQ; /**< The array containing the grid size parameters */ int SQ_max; /**< The maximum grid size parameter */ int *RQ; /**< The array containing the grid size parameters */ int iNQ; /**< Index variable for cut-off degrees */ int iNQ_max; /**< The maximum number of cut-off degrees */ int testfunction; /**< The testfunction */ int N; /**< The test function's bandwidth */ int use_nfsft; /**< Whether to use the NFSFT algorithm or not */ int use_nfft; /**< Whether to use the NFFT algorithm or not */ int use_fpt; /**< Whether to use the FPT algorithm or not */ int cutoff; /**< The current NFFT cut-off parameter */ double threshold; /**< The current NFSFT threshold parameter */ int gridtype; /**< The type of quadrature grid to be used */ int repetitions; /**< The number of repetitions to be performed */ int mode; /**< The number of repetitions to be performed */ double *w; /**< The quadrature weights */ double *x_grid; /**< The quadrature nodes */ double *x_compare; /**< The quadrature nodes */ double _Complex *f_grid; /**< The reference function values */ double _Complex *f_compare; /**< The function values */ double _Complex *f; /**< The function values */ double _Complex *f_hat_gen; /**< The reference spherical Fourier * coefficients */ double _Complex *f_hat; /**< The spherical Fourier coefficients */ nfsft_plan plan_adjoint; /**< The NFSFT plan */ nfsft_plan plan; /**< The NFSFT plan */ nfsft_plan plan_gen; /**< The NFSFT plan */ double t_avg; /**< The average computation time needed */ double err_infty_avg; /**< The average error \f$E_\infty\f$ */ double err_2_avg; /**< The average error \f$E_2\f$ */ int i; /**< A loop variable */ int k; /**< A loop variable */ int n; /**< A loop variable */ int d; /**< A loop variable */ int m_theta; /**< The current number of different * colatitudinal angles (for grids) */ int m_phi; /**< The current number of different * longitudinal angles (for grids). */ int m_total; /**< The total number nodes. */ double *theta; /**< An array for saving the angles theta of a * grid */ double *phi; /**< An array for saving the angles phi of a * grid */ fftw_plan fplan; /**< An FFTW plan for computing Clenshaw-Curtis quadrature weights */ //int nside; /**< The size parameter for the HEALPix grid */ int d2; int M; double theta_s; double x1,x2,x3,temp; int m_compare; nfsft_plan *plan_adjoint_ptr; nfsft_plan *plan_ptr; double *w_temp; int testmode; ticks t0, t1; /* Read the number of testcases. */ fscanf(stdin,"testcases=%d\n",&tc_max); fprintf(stdout,"%d\n",tc_max); /* Process each testcase. */ for (tc = 0; tc < tc_max; tc++) { /* Check if the fast transform shall be used. */ fscanf(stdin,"nfsft=%d\n",&use_nfsft); fprintf(stdout,"%d\n",use_nfsft); if (use_nfsft != NO) { /* Check if the NFFT shall be used. */ fscanf(stdin,"nfft=%d\n",&use_nfft); fprintf(stdout,"%d\n",use_nfsft); if (use_nfft != NO) { /* Read the cut-off parameter. */ fscanf(stdin,"cutoff=%d\n",&cutoff); fprintf(stdout,"%d\n",cutoff); } else { /* TODO remove this */ /* Initialize unused variable with dummy value. */ cutoff = 1; } /* Check if the fast polynomial transform shall be used. */ fscanf(stdin,"fpt=%d\n",&use_fpt); fprintf(stdout,"%d\n",use_fpt); if (use_fpt != NO) { /* Read the NFSFT threshold parameter. */ fscanf(stdin,"threshold=%lf\n",&threshold); fprintf(stdout,"%lf\n",threshold); } else { /* TODO remove this */ /* Initialize unused variable with dummy value. */ threshold = 1000.0; } } else { /* TODO remove this */ /* Set dummy values. */ use_nfft = NO; use_fpt = NO; cutoff = 3; threshold = 1000.0; } /* Read the testmode type. */ fscanf(stdin,"testmode=%d\n",&testmode); fprintf(stdout,"%d\n",testmode); if (testmode == ERROR) { /* Read the quadrature grid type. */ fscanf(stdin,"gridtype=%d\n",&gridtype); fprintf(stdout,"%d\n",gridtype); /* Read the test function. */ fscanf(stdin,"testfunction=%d\n",&testfunction); fprintf(stdout,"%d\n",testfunction); /* Check if random bandlimited function has been chosen. */ if (testfunction == FUNCTION_RANDOM_BANDLIMITED) { /* Read the bandwidht. */ fscanf(stdin,"bandlimit=%d\n",&N); fprintf(stdout,"%d\n",N); } else { N = 1; } /* Read the number of repetitions. */ fscanf(stdin,"repetitions=%d\n",&repetitions); fprintf(stdout,"%d\n",repetitions); fscanf(stdin,"mode=%d\n",&mode); fprintf(stdout,"%d\n",mode); if (mode == RANDOM) { /* Read the bandwidht. */ fscanf(stdin,"points=%d\n",&m_compare); fprintf(stdout,"%d\n",m_compare); x_compare = (double*) nfft_malloc(2*m_compare*sizeof(double)); d = 0; while (d < m_compare) { x1 = 2.0*(((double)rand())/RAND_MAX) - 1.0; x2 = 2.0*(((double)rand())/RAND_MAX) - 1.0; x3 = 2.0*(((double)rand())/RAND_MAX) - 1.0; temp = sqrt(x1*x1+x2*x2+x3*x3); if (temp <= 1) { x_compare[2*d+1] = acos(x3); if (x_compare[2*d+1] == 0 || x_compare[2*d+1] == KPI) { x_compare[2*d] = 0.0; } else { x_compare[2*d] = atan2(x2/sin(x_compare[2*d+1]),x1/sin(x_compare[2*d+1])); } x_compare[2*d] *= 1.0/(2.0*KPI); x_compare[2*d+1] *= 1.0/(2.0*KPI); d++; } } f_compare = (double _Complex*) nfft_malloc(m_compare*sizeof(double _Complex)); f = (double _Complex*) nfft_malloc(m_compare*sizeof(double _Complex)); } } /* Initialize maximum cut-off degree and grid size parameter. */ NQ_max = 0; SQ_max = 0; /* Read the number of cut-off degrees. */ fscanf(stdin,"bandwidths=%d\n",&iNQ_max); fprintf(stdout,"%d\n",iNQ_max); /* Allocate memory for the cut-off degrees and grid size parameters. */ NQ = (int*) nfft_malloc(iNQ_max*sizeof(int)); SQ = (int*) nfft_malloc(iNQ_max*sizeof(int)); if (testmode == TIMING) { RQ = (int*) nfft_malloc(iNQ_max*sizeof(int)); } /* Read the cut-off degrees and grid size parameters. */ for (iNQ = 0; iNQ < iNQ_max; iNQ++) { if (testmode == TIMING) { /* Read cut-off degree and grid size parameter. */ fscanf(stdin,"%d %d %d\n",&NQ[iNQ],&SQ[iNQ],&RQ[iNQ]); fprintf(stdout,"%d %d %d\n",NQ[iNQ],SQ[iNQ],RQ[iNQ]); NQ_max = MAX(NQ_max,NQ[iNQ]); SQ_max = MAX(SQ_max,SQ[iNQ]); } else { /* Read cut-off degree and grid size parameter. */ fscanf(stdin,"%d %d\n",&NQ[iNQ],&SQ[iNQ]); fprintf(stdout,"%d %d\n",NQ[iNQ],SQ[iNQ]); NQ_max = MAX(NQ_max,NQ[iNQ]); SQ_max = MAX(SQ_max,SQ[iNQ]); } } /* Do precomputation. */ //fprintf(stderr,"NFSFT Precomputation\n"); //fflush(stderr); nfsft_precompute(NQ_max, threshold, ((use_nfsft==NO)?(NFSFT_NO_FAST_ALGORITHM):(0U)), 0U); if (testmode == TIMING) { /* Allocate data structures. */ f_hat = (double _Complex*) nfft_malloc(NFSFT_F_HAT_SIZE(NQ_max)*sizeof(double _Complex)); f = (double _Complex*) nfft_malloc(SQ_max*sizeof(double _Complex)); x_grid = (double*) nfft_malloc(2*SQ_max*sizeof(double)); for (d = 0; d < SQ_max; d++) { f[d] = (((double)rand())/RAND_MAX)-0.5 + _Complex_I*((((double)rand())/RAND_MAX)-0.5); x_grid[2*d] = (((double)rand())/RAND_MAX) - 0.5; x_grid[2*d+1] = (((double)rand())/RAND_MAX) * 0.5; } } //fprintf(stderr,"Entering loop\n"); //fflush(stderr); /* Process all cut-off bandwidths. */ for (iNQ = 0; iNQ < iNQ_max; iNQ++) { if (testmode == TIMING) { nfsft_init_guru(&plan,NQ[iNQ],SQ[iNQ], NFSFT_NORMALIZED | ((use_nfft!=NO)?(0U):(NFSFT_USE_NDFT)) | ((use_fpt!=NO)?(0U):(NFSFT_USE_DPT)), PRE_PHI_HUT | PRE_PSI | FFTW_INIT | FFTW_MEASURE | FFT_OUT_OF_PLACE, cutoff); plan.f_hat = f_hat; plan.x = x_grid; plan.f = f; nfsft_precompute_x(&plan); t_avg = 0.0; for (i = 0; i < RQ[iNQ]; i++) { t0 = getticks(); if (use_nfsft != NO) { /* Execute the adjoint NFSFT transformation. */ nfsft_adjoint(&plan); } else { /* Execute the adjoint direct NDSFT transformation. */ nfsft_adjoint_direct(&plan); } t1 = getticks(); t_avg += nfft_elapsed_seconds(t1,t0); } t_avg = t_avg/((double)RQ[iNQ]); nfsft_finalize(&plan); fprintf(stdout,"%+le\n", t_avg); fprintf(stderr,"%d: %4d %4d %+le\n", tc, NQ[iNQ], SQ[iNQ], t_avg); } else { /* Determine the maximum number of nodes. */ switch (gridtype) { case GRID_GAUSS_LEGENDRE: /* Calculate grid dimensions. */ m_theta = SQ[iNQ] + 1; m_phi = 2*SQ[iNQ] + 2; m_total = m_theta*m_phi; break; case GRID_CLENSHAW_CURTIS: /* Calculate grid dimensions. */ m_theta = 2*SQ[iNQ] + 1; m_phi = 2*SQ[iNQ] + 2; m_total = m_theta*m_phi; break; case GRID_HEALPIX: m_theta = 1; m_phi = 12*SQ[iNQ]*SQ[iNQ]; m_total = m_theta * m_phi; //fprintf("HEALPix: SQ = %d, m_theta = %d, m_phi= %d, m"); break; case GRID_EQUIDISTRIBUTION: case GRID_EQUIDISTRIBUTION_UNIFORM: m_theta = 2; //fprintf(stderr,"ed: m_theta = %d\n",m_theta); for (k = 1; k < SQ[iNQ]; k++) { m_theta += (int)floor((2*KPI)/acos((cos(KPI/(double)SQ[iNQ])- cos(k*KPI/(double)SQ[iNQ])*cos(k*KPI/(double)SQ[iNQ]))/ (sin(k*KPI/(double)SQ[iNQ])*sin(k*KPI/(double)SQ[iNQ])))); //fprintf(stderr,"ed: m_theta = %d\n",m_theta); } //fprintf(stderr,"ed: m_theta final = %d\n",m_theta); m_phi = 1; m_total = m_theta * m_phi; break; } /* Allocate memory for data structures. */ w = (double*) nfft_malloc(m_theta*sizeof(double)); x_grid = (double*) nfft_malloc(2*m_total*sizeof(double)); //fprintf(stderr,"NQ = %d\n",NQ[iNQ]); //fflush(stderr); switch (gridtype) { case GRID_GAUSS_LEGENDRE: //fprintf(stderr,"Generating grid for NQ = %d, SQ = %d\n",NQ[iNQ],SQ[iNQ]); //fflush(stderr); /* Read quadrature weights. */ for (k = 0; k < m_theta; k++) { fscanf(stdin,"%le\n",&w[k]); w[k] *= (2.0*KPI)/((double)m_phi); } //fprintf(stderr,"Allocating theta and phi\n"); //fflush(stderr); /* Allocate memory to store the grid's angles. */ theta = (double*) nfft_malloc(m_theta*sizeof(double)); phi = (double*) nfft_malloc(m_phi*sizeof(double)); //if (theta == NULL || phi == NULL) //{ //fprintf(stderr,"Couldn't allocate theta and phi\n"); //fflush(stderr); //} /* Read angles theta. */ for (k = 0; k < m_theta; k++) { fscanf(stdin,"%le\n",&theta[k]); } /* Generate the grid angles phi. */ for (n = 0; n < m_phi; n++) { phi[n] = n/((double)m_phi); phi[n] -= ((phi[n]>=0.5)?(1.0):(0.0)); } //fprintf(stderr,"Generating grid nodes\n"); //fflush(stderr); /* Generate the grid's nodes. */ d = 0; for (k = 0; k < m_theta; k++) { for (n = 0; n < m_phi; n++) { x_grid[2*d] = phi[n]; x_grid[2*d+1] = theta[k]; d++; } } //fprintf(stderr,"Freeing theta and phi\n"); //fflush(stderr); /* Free the arrays for the grid's angles. */ nfft_free(theta); nfft_free(phi); break; case GRID_CLENSHAW_CURTIS: /* Allocate memory to store the grid's angles. */ theta = (double*) nfft_malloc(m_theta*sizeof(double)); phi = (double*) nfft_malloc(m_phi*sizeof(double)); /* Generate the grid angles theta. */ for (k = 0; k < m_theta; k++) { theta[k] = k/((double)2*(m_theta-1)); } /* Generate the grid angles phi. */ for (n = 0; n < m_phi; n++) { phi[n] = n/((double)m_phi); phi[n] -= ((phi[n]>=0.5)?(1.0):(0.0)); } /* Generate quadrature weights. */ fplan = fftw_plan_r2r_1d(SQ[iNQ]+1, w, w, FFTW_REDFT00, 0U); for (k = 0; k < SQ[iNQ]+1; k++) { w[k] = -2.0/(4*k*k-1); } fftw_execute(fplan); w[0] *= 0.5; for (k = 0; k < SQ[iNQ]+1; k++) { w[k] *= (2.0*KPI)/((double)(m_theta-1)*m_phi); w[m_theta-1-k] = w[k]; } fftw_destroy_plan(fplan); /* Generate the grid's nodes. */ d = 0; for (k = 0; k < m_theta; k++) { for (n = 0; n < m_phi; n++) { x_grid[2*d] = phi[n]; x_grid[2*d+1] = theta[k]; d++; } } /* Free the arrays for the grid's angles. */ nfft_free(theta); nfft_free(phi); break; case GRID_HEALPIX: d = 0; for (k = 1; k <= SQ[iNQ]-1; k++) { for (n = 0; n <= 4*k-1; n++) { x_grid[2*d+1] = 1 - (k*k)/((double)(3.0*SQ[iNQ]*SQ[iNQ])); x_grid[2*d] = ((n+0.5)/(4*k)); x_grid[2*d] -= (x_grid[2*d]>=0.5)?(1.0):(0.0); d++; } } d2 = d-1; for (k = SQ[iNQ]; k <= 3*SQ[iNQ]; k++) { for (n = 0; n <= 4*SQ[iNQ]-1; n++) { x_grid[2*d+1] = 2.0/(3*SQ[iNQ])*(2*SQ[iNQ]-k); x_grid[2*d] = (n+((k%2==0)?(0.5):(0.0)))/(4*SQ[iNQ]); x_grid[2*d] -= (x_grid[2*d]>=0.5)?(1.0):(0.0); d++; } } for (k = 1; k <= SQ[iNQ]-1; k++) { for (n = 0; n <= 4*k-1; n++) { x_grid[2*d+1] = -x_grid[2*d2+1]; x_grid[2*d] = x_grid[2*d2]; d++; d2--; } } for (d = 0; d < m_total; d++) { x_grid[2*d+1] = acos(x_grid[2*d+1])/(2.0*KPI); } w[0] = (4.0*KPI)/(m_total); break; case GRID_EQUIDISTRIBUTION: case GRID_EQUIDISTRIBUTION_UNIFORM: /* TODO Compute the weights. */ if (gridtype == GRID_EQUIDISTRIBUTION) { w_temp = (double*) nfft_malloc((SQ[iNQ]+1)*sizeof(double)); fplan = fftw_plan_r2r_1d(SQ[iNQ]/2+1, w_temp, w_temp, FFTW_REDFT00, 0U); for (k = 0; k < SQ[iNQ]/2+1; k++) { w_temp[k] = -2.0/(4*k*k-1); } fftw_execute(fplan); w_temp[0] *= 0.5; for (k = 0; k < SQ[iNQ]/2+1; k++) { w_temp[k] *= (2.0*KPI)/((double)(SQ[iNQ])); w_temp[SQ[iNQ]-k] = w_temp[k]; } fftw_destroy_plan(fplan); } d = 0; x_grid[2*d] = -0.5; x_grid[2*d+1] = 0.0; if (gridtype == GRID_EQUIDISTRIBUTION) { w[d] = w_temp[0]; } else { w[d] = (4.0*KPI)/(m_total); } d = 1; x_grid[2*d] = -0.5; x_grid[2*d+1] = 0.5; if (gridtype == GRID_EQUIDISTRIBUTION) { w[d] = w_temp[SQ[iNQ]]; } else { w[d] = (4.0*KPI)/(m_total); } d = 2; for (k = 1; k < SQ[iNQ]; k++) { theta_s = (double)k*KPI/(double)SQ[iNQ]; M = (int)floor((2.0*KPI)/acos((cos(KPI/(double)SQ[iNQ])- cos(theta_s)*cos(theta_s))/(sin(theta_s)*sin(theta_s)))); for (n = 0; n < M; n++) { x_grid[2*d] = (n + 0.5)/M; x_grid[2*d] -= (x_grid[2*d]>=0.5)?(1.0):(0.0); x_grid[2*d+1] = theta_s/(2.0*KPI); if (gridtype == GRID_EQUIDISTRIBUTION) { w[d] = w_temp[k]/((double)(M)); } else { w[d] = (4.0*KPI)/(m_total); } d++; } } if (gridtype == GRID_EQUIDISTRIBUTION) { nfft_free(w_temp); } break; default: break; } /* Allocate memory for grid values. */ f_grid = (double _Complex*) nfft_malloc(m_total*sizeof(double _Complex)); if (mode == RANDOM) { } else { m_compare = m_total; f_compare = (double _Complex*) nfft_malloc(m_compare*sizeof(double _Complex)); x_compare = x_grid; f = f_grid; } //fprintf(stderr,"Generating test function\n"); //fflush(stderr); switch (testfunction) { case FUNCTION_RANDOM_BANDLIMITED: f_hat_gen = (double _Complex*) nfft_malloc(NFSFT_F_HAT_SIZE(N)*sizeof(double _Complex)); //fprintf(stderr,"Generating random test function\n"); //fflush(stderr); /* Generate random function samples by sampling a bandlimited * function. */ nfsft_init_guru(&plan_gen,N,m_total, NFSFT_NORMALIZED | ((use_nfft!=NO)?(0U):(NFSFT_USE_NDFT)) | ((use_fpt!=NO)?(0U):(NFSFT_USE_DPT)), ((N>512)?(0U):(PRE_PHI_HUT | PRE_PSI)) | FFTW_INIT | FFT_OUT_OF_PLACE, cutoff); plan_gen.f_hat = f_hat_gen; plan_gen.x = x_grid; plan_gen.f = f_grid; nfsft_precompute_x(&plan_gen); for (k = 0; k < plan_gen.N_total; k++) { f_hat_gen[k] = 0.0; } for (k = 0; k <= N; k++) { for (n = -k; n <= k; n++) { f_hat_gen[NFSFT_INDEX(k,n,&plan_gen)] = (((double)rand())/RAND_MAX)-0.5 + _Complex_I*((((double)rand())/RAND_MAX)-0.5); } } if (use_nfsft != NO) { /* Execute the NFSFT transformation. */ nfsft_trafo(&plan_gen); } else { /* Execute the direct NDSFT transformation. */ nfsft_trafo_direct(&plan_gen); } nfsft_finalize(&plan_gen); if (mode == RANDOM) { nfsft_init_guru(&plan_gen,N,m_compare, NFSFT_NORMALIZED | ((use_nfft!=NO)?(0U):(NFSFT_USE_NDFT)) | ((use_fpt!=NO)?(0U):(NFSFT_USE_DPT)), ((N>512)?(0U):(PRE_PHI_HUT | PRE_PSI)) | FFTW_INIT | FFT_OUT_OF_PLACE, cutoff); plan_gen.f_hat = f_hat_gen; plan_gen.x = x_compare; plan_gen.f = f_compare; nfsft_precompute_x(&plan_gen); if (use_nfsft != NO) { /* Execute the NFSFT transformation. */ nfsft_trafo(&plan_gen); } else { /* Execute the direct NDSFT transformation. */ nfsft_trafo_direct(&plan_gen); } nfsft_finalize(&plan_gen); } else { memcpy(f_compare,f_grid,m_total*sizeof(double _Complex)); } nfft_free(f_hat_gen); break; case FUNCTION_F1: for (d = 0; d < m_total; d++) { x1 = sin(x_grid[2*d+1]*2.0*KPI)*cos(x_grid[2*d]*2.0*KPI); x2 = sin(x_grid[2*d+1]*2.0*KPI)*sin(x_grid[2*d]*2.0*KPI); x3 = cos(x_grid[2*d+1]*2.0*KPI); f_grid[d] = x1*x2*x3; } if (mode == RANDOM) { for (d = 0; d < m_compare; d++) { x1 = sin(x_compare[2*d+1]*2.0*KPI)*cos(x_compare[2*d]*2.0*KPI); x2 = sin(x_compare[2*d+1]*2.0*KPI)*sin(x_compare[2*d]*2.0*KPI); x3 = cos(x_compare[2*d+1]*2.0*KPI); f_compare[d] = x1*x2*x3; } } else { memcpy(f_compare,f_grid,m_total*sizeof(double _Complex)); } break; case FUNCTION_F2: for (d = 0; d < m_total; d++) { x1 = sin(x_grid[2*d+1]*2.0*KPI)*cos(x_grid[2*d]*2.0*KPI); x2 = sin(x_grid[2*d+1]*2.0*KPI)*sin(x_grid[2*d]*2.0*KPI); x3 = cos(x_grid[2*d+1]*2.0*KPI); f_grid[d] = 0.1*exp(x1+x2+x3); } if (mode == RANDOM) { for (d = 0; d < m_compare; d++) { x1 = sin(x_compare[2*d+1]*2.0*KPI)*cos(x_compare[2*d]*2.0*KPI); x2 = sin(x_compare[2*d+1]*2.0*KPI)*sin(x_compare[2*d]*2.0*KPI); x3 = cos(x_compare[2*d+1]*2.0*KPI); f_compare[d] = 0.1*exp(x1+x2+x3); } } else { memcpy(f_compare,f_grid,m_total*sizeof(double _Complex)); } break; case FUNCTION_F3: for (d = 0; d < m_total; d++) { x1 = sin(x_grid[2*d+1]*2.0*KPI)*cos(x_grid[2*d]*2.0*KPI); x2 = sin(x_grid[2*d+1]*2.0*KPI)*sin(x_grid[2*d]*2.0*KPI); x3 = cos(x_grid[2*d+1]*2.0*KPI); temp = sqrt(x1*x1)+sqrt(x2*x2)+sqrt(x3*x3); f_grid[d] = 0.1*temp; } if (mode == RANDOM) { for (d = 0; d < m_compare; d++) { x1 = sin(x_compare[2*d+1]*2.0*KPI)*cos(x_compare[2*d]*2.0*KPI); x2 = sin(x_compare[2*d+1]*2.0*KPI)*sin(x_compare[2*d]*2.0*KPI); x3 = cos(x_compare[2*d+1]*2.0*KPI); temp = sqrt(x1*x1)+sqrt(x2*x2)+sqrt(x3*x3); f_compare[d] = 0.1*temp; } } else { memcpy(f_compare,f_grid,m_total*sizeof(double _Complex)); } break; case FUNCTION_F4: for (d = 0; d < m_total; d++) { x1 = sin(x_grid[2*d+1]*2.0*KPI)*cos(x_grid[2*d]*2.0*KPI); x2 = sin(x_grid[2*d+1]*2.0*KPI)*sin(x_grid[2*d]*2.0*KPI); x3 = cos(x_grid[2*d+1]*2.0*KPI); temp = sqrt(x1*x1)+sqrt(x2*x2)+sqrt(x3*x3); f_grid[d] = 1.0/(temp); } if (mode == RANDOM) { for (d = 0; d < m_compare; d++) { x1 = sin(x_compare[2*d+1]*2.0*KPI)*cos(x_compare[2*d]*2.0*KPI); x2 = sin(x_compare[2*d+1]*2.0*KPI)*sin(x_compare[2*d]*2.0*KPI); x3 = cos(x_compare[2*d+1]*2.0*KPI); temp = sqrt(x1*x1)+sqrt(x2*x2)+sqrt(x3*x3); f_compare[d] = 1.0/(temp); } } else { memcpy(f_compare,f_grid,m_total*sizeof(double _Complex)); } break; case FUNCTION_F5: for (d = 0; d < m_total; d++) { x1 = sin(x_grid[2*d+1]*2.0*KPI)*cos(x_grid[2*d]*2.0*KPI); x2 = sin(x_grid[2*d+1]*2.0*KPI)*sin(x_grid[2*d]*2.0*KPI); x3 = cos(x_grid[2*d+1]*2.0*KPI); temp = sqrt(x1*x1)+sqrt(x2*x2)+sqrt(x3*x3); f_grid[d] = 0.1*sin(1+temp)*sin(1+temp); } if (mode == RANDOM) { for (d = 0; d < m_compare; d++) { x1 = sin(x_compare[2*d+1]*2.0*KPI)*cos(x_compare[2*d]*2.0*KPI); x2 = sin(x_compare[2*d+1]*2.0*KPI)*sin(x_compare[2*d]*2.0*KPI); x3 = cos(x_compare[2*d+1]*2.0*KPI); temp = sqrt(x1*x1)+sqrt(x2*x2)+sqrt(x3*x3); f_compare[d] = 0.1*sin(1+temp)*sin(1+temp); } } else { memcpy(f_compare,f_grid,m_total*sizeof(double _Complex)); } break; case FUNCTION_F6: for (d = 0; d < m_total; d++) { if (x_grid[2*d+1] <= 0.25) { f_grid[d] = 1.0; } else { f_grid[d] = 1.0/(sqrt(1+3*cos(2.0*KPI*x_grid[2*d+1])*cos(2.0*KPI*x_grid[2*d+1]))); } } if (mode == RANDOM) { for (d = 0; d < m_compare; d++) { if (x_compare[2*d+1] <= 0.25) { f_compare[d] = 1.0; } else { f_compare[d] = 1.0/(sqrt(1+3*cos(2.0*KPI*x_compare[2*d+1])*cos(2.0*KPI*x_compare[2*d+1]))); } } } else { memcpy(f_compare,f_grid,m_total*sizeof(double _Complex)); } break; default: //fprintf(stderr,"Generating one function\n"); //fflush(stderr); for (d = 0; d < m_total; d++) { f_grid[d] = 1.0; } if (mode == RANDOM) { for (d = 0; d < m_compare; d++) { f_compare[d] = 1.0; } } else { memcpy(f_compare,f_grid,m_total*sizeof(double _Complex)); } break; } //fprintf(stderr,"Initializing trafo\n"); //fflush(stderr); /* Init transform plan. */ nfsft_init_guru(&plan_adjoint,NQ[iNQ],m_total, NFSFT_NORMALIZED | ((use_nfft!=NO)?(0U):(NFSFT_USE_NDFT)) | ((use_fpt!=NO)?(0U):(NFSFT_USE_DPT)), ((NQ[iNQ]>512)?(0U):(PRE_PHI_HUT | PRE_PSI)) | FFTW_INIT | FFT_OUT_OF_PLACE, cutoff); plan_adjoint_ptr = &plan_adjoint; if (mode == RANDOM) { nfsft_init_guru(&plan,NQ[iNQ],m_compare, NFSFT_NORMALIZED | ((use_nfft!=NO)?(0U):(NFSFT_USE_NDFT)) | ((use_fpt!=NO)?(0U):(NFSFT_USE_DPT)), ((NQ[iNQ]>512)?(0U):(PRE_PHI_HUT | PRE_PSI)) | FFTW_INIT | FFT_OUT_OF_PLACE, cutoff); plan_ptr = &plan; } else { plan_ptr = &plan_adjoint; } f_hat = (double _Complex*) nfft_malloc(NFSFT_F_HAT_SIZE(NQ[iNQ])*sizeof(double _Complex)); plan_adjoint_ptr->f_hat = f_hat; plan_adjoint_ptr->x = x_grid; plan_adjoint_ptr->f = f_grid; plan_ptr->f_hat = f_hat; plan_ptr->x = x_compare; plan_ptr->f = f; //fprintf(stderr,"Precomputing for x\n"); //fflush(stderr); nfsft_precompute_x(plan_adjoint_ptr); if (plan_adjoint_ptr != plan_ptr) { nfsft_precompute_x(plan_ptr); } /* Initialize cumulative time variable. */ t_avg = 0.0; err_infty_avg = 0.0; err_2_avg = 0.0; /* Cycle through all runs. */ for (i = 0; i < 1/*repetitions*/; i++) { //fprintf(stderr,"Copying original values\n"); //fflush(stderr); /* Copy exact funtion values to working array. */ //memcpy(f,f_grid,m_total*sizeof(double _Complex)); /* Initialize time measurement. */ t0 = getticks(); //fprintf(stderr,"Multiplying with quadrature weights\n"); //fflush(stderr); /* Multiplication with the quadrature weights. */ /*fprintf(stderr,"\n");*/ d = 0; for (k = 0; k < m_theta; k++) { for (n = 0; n < m_phi; n++) { /*fprintf(stderr,"f_ref[%d] = %le + I*%le,\t f[%d] = %le + I*%le, \t w[%d] = %le\n", d,creal(f_ref[d]),cimag(f_ref[d]),d,creal(f[d]),cimag(f[d]),k, w[k]);*/ f_grid[d] *= w[k]; d++; } } t1 = getticks(); t_avg += nfft_elapsed_seconds(t1,t0); nfft_free(w); t0 = getticks(); /*fprintf(stderr,"\n"); d = 0; for (d = 0; d < grid_total; d++) { fprintf(stderr,"f[%d] = %le + I*%le, theta[%d] = %le, phi[%d] = %le\n", d,creal(f[d]),cimag(f[d]),d,x[2*d+1],d,x[2*d]); }*/ //fprintf(stderr,"Executing adjoint\n"); //fflush(stderr); /* Check if the fast NFSFT algorithm shall be tested. */ if (use_nfsft != NO) { /* Execute the adjoint NFSFT transformation. */ nfsft_adjoint(plan_adjoint_ptr); } else { /* Execute the adjoint direct NDSFT transformation. */ nfsft_adjoint_direct(plan_adjoint_ptr); } /* Multiplication with the Fourier-Legendre coefficients. */ /*for (k = 0; k <= m[im]; k++) { for (n = -k; n <= k; n++) { fprintf(stderr,"f_hat[%d,%d] = %le\t + I*%le\n",k,n, creal(f_hat[NFSFT_INDEX(k,n,&plan_adjoint)]), cimag(f_hat[NFSFT_INDEX(k,n,&plan_adjoint)])); } }*/ //fprintf(stderr,"Executing trafo\n"); //fflush(stderr); if (use_nfsft != NO) { /* Execute the NFSFT transformation. */ nfsft_trafo(plan_ptr); } else { /* Execute the direct NDSFT transformation. */ nfsft_trafo_direct(plan_ptr); } t1 = getticks(); t_avg += nfft_elapsed_seconds(t1,t0); //fprintf(stderr,"Finalizing\n"); //fflush(stderr); /* Finalize the NFSFT plans */ nfsft_finalize(plan_adjoint_ptr); if (plan_ptr != plan_adjoint_ptr) { nfsft_finalize(plan_ptr); } /* Free data arrays. */ nfft_free(f_hat); nfft_free(x_grid); err_infty_avg += X(error_l_infty_complex)(f, f_compare, m_compare); err_2_avg += X(error_l_2_complex)(f, f_compare, m_compare); nfft_free(f_grid); if (mode == RANDOM) { } else { nfft_free(f_compare); } /*for (d = 0; d < m_total; d++) { fprintf(stderr,"f_ref[%d] = %le + I*%le,\t f[%d] = %le + I*%le\n", d,creal(f_ref[d]),cimag(f_ref[d]),d,creal(f[d]),cimag(f[d])); }*/ } //fprintf(stderr,"Calculating the error\n"); //fflush(stderr); /* Calculate average time needed. */ t_avg = t_avg/((double)repetitions); /* Calculate the average error. */ err_infty_avg = err_infty_avg/((double)repetitions); /* Calculate the average error. */ err_2_avg = err_2_avg/((double)repetitions); /* Print out the error measurements. */ fprintf(stdout,"%+le %+le %+le\n", t_avg, err_infty_avg, err_2_avg); fprintf(stderr,"%d: %4d %4d %+le %+le %+le\n", tc, NQ[iNQ], SQ[iNQ], t_avg, err_infty_avg, err_2_avg); } } /* for (im = 0; im < im_max; im++) - Process all cut-off * bandwidths.*/ fprintf(stderr,"\n"); /* Delete precomputed data. */ nfsft_forget(); /* Free memory for cut-off bandwidths and grid size parameters. */ nfft_free(NQ); nfft_free(SQ); if (testmode == TIMING) { nfft_free(RQ); } if (mode == RANDOM) { nfft_free(x_compare); nfft_free(f_compare); nfft_free(f); } if (testmode == TIMING) { /* Allocate data structures. */ nfft_free(f_hat); nfft_free(f); nfft_free(x_grid); } } /* for (tc = 0; tc < tc_max; tc++) - Process each testcase. */ /* Return exit code for successful run. */ return EXIT_SUCCESS; }
static void simple_test_nfsft(void) { const int N = 4; /* bandwidth/maximum degree */ const int M = 8; /* number of nodes */ nfsft_plan plan; /* transform plan */ int j, k, n; /* loop variables */ /* precomputation (for fast polynomial transform) */ nfsft_precompute(N,1000.0,0U,0U); /* Initialize transform plan using the guru interface. All input and output * arrays are allocated by nfsft_init_guru(). Computations are performed with * respect to L^2-normalized spherical harmonics Y_k^n. The array of spherical * Fourier coefficients is preserved during transformations. The NFFT uses a * cut-off parameter m = 6. See the NFFT 3 manual for details. */ nfsft_init_guru(&plan, N, M, NFSFT_MALLOC_X | NFSFT_MALLOC_F | NFSFT_MALLOC_F_HAT | NFSFT_NORMALIZED | NFSFT_PRESERVE_F_HAT, PRE_PHI_HUT | PRE_PSI | FFTW_INIT | FFT_OUT_OF_PLACE, 6); /* pseudo-random nodes */ for (j = 0; j < plan.M_total; j++) { plan.x[2*j]= nfft_drand48() - K(0.5); plan.x[2*j+1]= K(0.5) * nfft_drand48(); } /* precomputation (for NFFT, node-dependent) */ nfsft_precompute_x(&plan); /* pseudo-random Fourier coefficients */ for (k = 0; k <= plan.N; k++) for (n = -k; n <= k; n++) plan.f_hat[NFSFT_INDEX(k,n,&plan)] = nfft_drand48() - K(0.5) + _Complex_I*(nfft_drand48() - K(0.5)); /* Direct transformation, display result. */ nfsft_trafo_direct(&plan); printf("Vector f (NDSFT):\n"); for (j = 0; j < plan.M_total; j++) printf("f[%+2d] = %+5.3" __FES__ " %+5.3" __FES__ "*I\n",j, creal(plan.f[j]), cimag(plan.f[j])); printf("\n"); /* Fast approximate transformation, display result. */ nfsft_trafo(&plan); printf("Vector f (NFSFT):\n"); for (j = 0; j < plan.M_total; j++) printf("f[%+2d] = %+5.3" __FES__ " %+5.3" __FES__ "*I\n",j, creal(plan.f[j]), cimag(plan.f[j])); printf("\n"); /* Direct adjoint transformation, display result. */ nfsft_adjoint_direct(&plan); printf("Vector f_hat (NDSFT):\n"); for (k = 0; k <= plan.N; k++) for (n = -k; n <= k; n++) fprintf(stdout,"f_hat[%+2d,%+2d] = %+5.3" __FES__ " %+5.3" __FES__ "*I\n",k,n, creal(plan.f_hat[NFSFT_INDEX(k,n,&plan)]), cimag(plan.f_hat[NFSFT_INDEX(k,n,&plan)])); printf("\n"); /* Fast approximate adjoint transformation, display result. */ nfsft_adjoint(&plan); printf("Vector f_hat (NFSFT):\n"); for (k = 0; k <= plan.N; k++) { for (n = -k; n <= k; n++) { fprintf(stdout,"f_hat[%+2d,%+2d] = %+5.3" __FES__ " %+5.3" __FES__ "*I\n",k,n, creal(plan.f_hat[NFSFT_INDEX(k,n,&plan)]), cimag(plan.f_hat[NFSFT_INDEX(k,n,&plan)])); } } /* Finalize the plan. */ nfsft_finalize(&plan); /* Destroy data precomputed for fast polynomial transform. */ nfsft_forget(); }
/** * The main program. * * \param argc The number of arguments * \param argv An array containing the arguments as C-strings * * \return Exit code * * \author Jens Keiner */ int main (int argc, char **argv) { double **p; /* The array containing the parameter sets * * for the kernel functions */ int *m; /* The array containing the cut-off degrees M */ int **ld; /* The array containing the numbers of source * * and target nodes, L and D */ int ip; /* Index variable for p */ int im; /* Index variable for m */ int ild; /* Index variable for l */ int ipp; /* Index for kernel parameters */ int ip_max; /* The maximum index for p */ int im_max; /* The maximum index for m */ int ild_max; /* The maximum index for l */ int ipp_max; /* The maximum index for ip */ int tc_max; /* The number of testcases */ int m_max; /* The maximum cut-off degree M for the * * current dataset */ int l_max; /* The maximum number of source nodes L for * * the current dataset */ int d_max; /* The maximum number of target nodes D for * * the current dataset */ long ld_max_prec; /* The maximum number of source and target * * nodes for precomputation multiplied */ long l_max_prec; /* The maximum number of source nodes for * * precomputation */ int tc; /* Index variable for testcases */ int kt; /* The kernel function */ int cutoff; /* The current NFFT cut-off parameter */ double threshold; /* The current NFSFT threshold parameter */ double t_d; /* Time for direct algorithm in seconds */ double t_dp; /* Time for direct algorithm with * precomputation in seconds */ double t_fd; /* Time for fast direct algorithm in seconds */ double t_f; /* Time for fast algorithm in seconds */ double temp; /* */ double err_f; /* Error E_infty for fast algorithm */ double err_fd; /* Error E_\infty for fast direct algorithm */ ticks t0, t1; /* */ int precompute = NO; /* */ fftw_complex *ptr; /* */ double* steed; /* */ fftw_complex *b; /* The weights (b_l)_{l=0}^{L-1} */ fftw_complex *f_hat; /* The spherical Fourier coefficients */ fftw_complex *a; /* The Fourier-Legendre coefficients */ double *xi; /* Target nodes */ double *eta; /* Source nodes */ fftw_complex *f_m; /* Approximate function values */ fftw_complex *f; /* Exact function values */ fftw_complex *prec = NULL; /* */ nfsft_plan plan; /* NFSFT plan */ nfsft_plan plan_adjoint; /* adjoint NFSFT plan */ int i; /* */ int k; /* */ int n; /* */ int d; /* */ int l; /* */ int use_nfsft; /* */ int use_nfft; /* */ int use_fpt; /* */ int rinc; /* */ double constant; /* */ /* Read the number of testcases. */ fscanf(stdin,"testcases=%d\n",&tc_max); fprintf(stdout,"%d\n",tc_max); /* Process each testcase. */ for (tc = 0; tc < tc_max; tc++) { /* Check if the fast transform shall be used. */ fscanf(stdin,"nfsft=%d\n",&use_nfsft); fprintf(stdout,"%d\n",use_nfsft); if (use_nfsft != NO) { /* Check if the NFFT shall be used. */ fscanf(stdin,"nfft=%d\n",&use_nfft); fprintf(stdout,"%d\n",use_nfft); if (use_nfft != NO) { /* Read the cut-off parameter. */ fscanf(stdin,"cutoff=%d\n",&cutoff); fprintf(stdout,"%d\n",cutoff); } else { /* TODO remove this */ /* Initialize unused variable with dummy value. */ cutoff = 1; } /* Check if the fast polynomial transform shall be used. */ fscanf(stdin,"fpt=%d\n",&use_fpt); fprintf(stdout,"%d\n",use_fpt); /* Read the NFSFT threshold parameter. */ fscanf(stdin,"threshold=%lf\n",&threshold); fprintf(stdout,"%lf\n",threshold); } else { /* TODO remove this */ /* Set dummy values. */ cutoff = 3; threshold = 1000000000000.0; } /* Initialize bandwidth bound. */ m_max = 0; /* Initialize source nodes bound. */ l_max = 0; /* Initialize target nodes bound. */ d_max = 0; /* Initialize source nodes bound for precomputation. */ l_max_prec = 0; /* Initialize source and target nodes bound for precomputation. */ ld_max_prec = 0; /* Read the kernel type. This is one of KT_ABEL_POISSON, KT_SINGULARITY, * KT_LOC_SUPP and KT_GAUSSIAN. */ fscanf(stdin,"kernel=%d\n",&kt); fprintf(stdout,"%d\n",kt); /* Read the number of parameter sets. */ fscanf(stdin,"parameter_sets=%d\n",&ip_max); fprintf(stdout,"%d\n",ip_max); /* Allocate memory for pointers to parameter sets. */ p = (double**) nfft_malloc(ip_max*sizeof(double*)); /* We now read in the parameter sets. */ /* Read number of parameters. */ fscanf(stdin,"parameters=%d\n",&ipp_max); fprintf(stdout,"%d\n",ipp_max); for (ip = 0; ip < ip_max; ip++) { /* Allocate memory for the parameters. */ p[ip] = (double*) nfft_malloc(ipp_max*sizeof(double)); /* Read the parameters. */ for (ipp = 0; ipp < ipp_max; ipp++) { /* Read the next parameter. */ fscanf(stdin,"%lf\n",&p[ip][ipp]); fprintf(stdout,"%lf\n",p[ip][ipp]); } } /* Read the number of cut-off degrees. */ fscanf(stdin,"bandwidths=%d\n",&im_max); fprintf(stdout,"%d\n",im_max); m = (int*) nfft_malloc(im_max*sizeof(int)); /* Read the cut-off degrees. */ for (im = 0; im < im_max; im++) { /* Read cut-off degree. */ fscanf(stdin,"%d\n",&m[im]); fprintf(stdout,"%d\n",m[im]); m_max = MAX(m_max,m[im]); } /* Read number of node specifications. */ fscanf(stdin,"node_sets=%d\n",&ild_max); fprintf(stdout,"%d\n",ild_max); ld = (int**) nfft_malloc(ild_max*sizeof(int*)); /* Read the run specification. */ for (ild = 0; ild < ild_max; ild++) { /* Allocate memory for the run parameters. */ ld[ild] = (int*) nfft_malloc(5*sizeof(int)); /* Read number of source nodes. */ fscanf(stdin,"L=%d ",&ld[ild][0]); fprintf(stdout,"%d\n",ld[ild][0]); l_max = MAX(l_max,ld[ild][0]); /* Read number of target nodes. */ fscanf(stdin,"D=%d ",&ld[ild][1]); fprintf(stdout,"%d\n",ld[ild][1]); d_max = MAX(d_max,ld[ild][1]); /* Determine whether direct and fast algorithm shall be compared. */ fscanf(stdin,"compare=%d ",&ld[ild][2]); fprintf(stdout,"%d\n",ld[ild][2]); /* Check if precomputation for the direct algorithm is used. */ if (ld[ild][2] == YES) { /* Read whether the precomputed version shall also be used. */ fscanf(stdin,"precomputed=%d\n",&ld[ild][3]); fprintf(stdout,"%d\n",ld[ild][3]); /* Read the number of repetitions over which measurements are * averaged. */ fscanf(stdin,"repetitions=%d\n",&ld[ild][4]); fprintf(stdout,"%d\n",ld[ild][4]); /* Update ld_max_prec and l_max_prec. */ if (ld[ild][3] == YES) { /* Update ld_max_prec. */ ld_max_prec = MAX(ld_max_prec,ld[ild][0]*ld[ild][1]); /* Update l_max_prec. */ l_max_prec = MAX(l_max_prec,ld[ild][0]); /* Turn on the precomputation for the direct algorithm. */ precompute = YES; } } else { /* Set default value for the number of repetitions. */ ld[ild][4] = 1; } } /* Allocate memory for data structures. */ b = (fftw_complex*) nfft_malloc(l_max*sizeof(fftw_complex)); eta = (double*) nfft_malloc(2*l_max*sizeof(double)); f_hat = (fftw_complex*) nfft_malloc(NFSFT_F_HAT_SIZE(m_max)*sizeof(fftw_complex)); a = (fftw_complex*) nfft_malloc((m_max+1)*sizeof(fftw_complex)); xi = (double*) nfft_malloc(2*d_max*sizeof(double)); f_m = (fftw_complex*) nfft_malloc(d_max*sizeof(fftw_complex)); f = (fftw_complex*) nfft_malloc(d_max*sizeof(fftw_complex)); /* Allocate memory for precomputed data. */ if (precompute == YES) { prec = (fftw_complex*) nfft_malloc(ld_max_prec*sizeof(fftw_complex)); } /* Generate random source nodes and weights. */ for (l = 0; l < l_max; l++) { b[l] = (((double)rand())/RAND_MAX) - 0.5; eta[2*l] = (((double)rand())/RAND_MAX) - 0.5; eta[2*l+1] = acos(2.0*(((double)rand())/RAND_MAX) - 1.0)/(K2PI); } /* Generate random target nodes. */ for (d = 0; d < d_max; d++) { xi[2*d] = (((double)rand())/RAND_MAX) - 0.5; xi[2*d+1] = acos(2.0*(((double)rand())/RAND_MAX) - 1.0)/(K2PI); } /* Do precomputation. */ nfsft_precompute(m_max,threshold, ((use_nfsft==NO)?(NFSFT_NO_FAST_ALGORITHM):(0U/*NFSFT_NO_DIRECT_ALGORITHM*/)), 0U); /* Process all parameter sets. */ for (ip = 0; ip < ip_max; ip++) { /* Compute kernel coeffcients up to the maximum cut-off degree m_max. */ switch (kt) { case KT_ABEL_POISSON: /* Compute Fourier-Legendre coefficients for the Poisson kernel. */ for (k = 0; k <= m_max; k++) a[k] = SYMBOL_ABEL_POISSON(k,p[ip][0]); break; case KT_SINGULARITY: /* Compute Fourier-Legendre coefficients for the singularity * kernel. */ for (k = 0; k <= m_max; k++) a[k] = SYMBOL_SINGULARITY(k,p[ip][0]); break; case KT_LOC_SUPP: /* Compute Fourier-Legendre coefficients for the locally supported * kernel. */ a[0] = 1.0; if (1 <= m_max) a[1] = ((p[ip][1]+1+p[ip][0])/(p[ip][1]+2.0))*a[0]; for (k = 2; k <= m_max; k++) a[k] = (1.0/(k+p[ip][1]+1))*((2*k-1)*p[ip][0]*a[k-1] - (k-p[ip][1]-2)*a[k-2]); break; case KT_GAUSSIAN: /* Fourier-Legendre coefficients */ steed = (double*) nfft_malloc((m_max+1)*sizeof(double)); smbi(2.0*p[ip][0],0.5,m_max+1,2,steed); for (k = 0; k <= m_max; k++) a[k] = K2PI*(sqrt(KPI/p[ip][0]))*steed[k]; nfft_free(steed); break; } /* Normalize Fourier-Legendre coefficients. */ for (k = 0; k <= m_max; k++) a[k] *= (2*k+1)/(K4PI); /* Process all node sets. */ for (ild = 0; ild < ild_max; ild++) { /* Check if the fast algorithm shall be used. */ if (ld[ild][2] != NO) { /* Check if the direct algorithm with precomputation should be * tested. */ if (ld[ild][3] != NO) { /* Get pointer to start of data. */ ptr = prec; /* Calculate increment from one row to the next. */ rinc = l_max_prec-ld[ild][0]; /* Process al target nodes. */ for (d = 0; d < ld[ild][1]; d++) { /* Process all source nodes. */ for (l = 0; l < ld[ild][0]; l++) { /* Compute inner product between current source and target * node. */ temp = innerProduct(eta[2*l],eta[2*l+1],xi[2*d],xi[2*d+1]); /* Switch by the kernel type. */ switch (kt) { case KT_ABEL_POISSON: /* Evaluate the Poisson kernel for the current value. */ *ptr++ = poissonKernel(temp,p[ip][0]); break; case KT_SINGULARITY: /* Evaluate the singularity kernel for the current * value. */ *ptr++ = singularityKernel(temp,p[ip][0]); break; case KT_LOC_SUPP: /* Evaluate the localized kernel for the current * value. */ *ptr++ = locallySupportedKernel(temp,p[ip][0],p[ip][1]); break; case KT_GAUSSIAN: /* Evaluate the spherical Gaussian kernel for the current * value. */ *ptr++ = gaussianKernel(temp,p[ip][0]); break; } } /* Increment pointer for next row. */ ptr += rinc; } /* Initialize cumulative time variable. */ t_dp = 0.0; /* Initialize time measurement. */ t0 = getticks(); /* Cycle through all runs. */ for (i = 0; i < ld[ild][4]; i++) { /* Reset pointer to start of precomputed data. */ ptr = prec; /* Calculate increment from one row to the next. */ rinc = l_max_prec-ld[ild][0]; /* Check if the localized kernel is used. */ if (kt == KT_LOC_SUPP) { /* Perform final summation */ /* Calculate the multiplicative constant. */ constant = ((p[ip][1]+1)/(K2PI*pow(1-p[ip][0],p[ip][1]+1))); /* Process all target nodes. */ for (d = 0; d < ld[ild][1]; d++) { /* Initialize function value. */ f[d] = 0.0; /* Process all source nodes. */ for (l = 0; l < ld[ild][0]; l++) f[d] += b[l]*(*ptr++); /* Multiply with the constant. */ f[d] *= constant; /* Proceed to next row. */ ptr += rinc; } } else { /* Process all target nodes. */ for (d = 0; d < ld[ild][1]; d++) { /* Initialize function value. */ f[d] = 0.0; /* Process all source nodes. */ for (l = 0; l < ld[ild][0]; l++) f[d] += b[l]*(*ptr++); /* Proceed to next row. */ ptr += rinc; } } } /* Calculate the time needed. */ t1 = getticks(); t_dp = nfft_elapsed_seconds(t1,t0); /* Calculate average time needed. */ t_dp = t_dp/((double)ld[ild][4]); } else { /* Initialize cumulative time variable with dummy value. */ t_dp = -1.0; } /* Initialize cumulative time variable. */ t_d = 0.0; /* Initialize time measurement. */ t0 = getticks(); /* Cycle through all runs. */ for (i = 0; i < ld[ild][4]; i++) { /* Switch by the kernel type. */ switch (kt) { case KT_ABEL_POISSON: /* Process all target nodes. */ for (d = 0; d < ld[ild][1]; d++) { /* Initialize function value. */ f[d] = 0.0; /* Process all source nodes. */ for (l = 0; l < ld[ild][0]; l++) { /* Compute the inner product for the current source and * target nodes. */ temp = innerProduct(eta[2*l],eta[2*l+1],xi[2*d],xi[2*d+1]); /* Evaluate the Poisson kernel for the current value and add * to the result. */ f[d] += b[l]*poissonKernel(temp,p[ip][0]); } } break; case KT_SINGULARITY: /* Process all target nodes. */ for (d = 0; d < ld[ild][1]; d++) { /* Initialize function value. */ f[d] = 0.0; /* Process all source nodes. */ for (l = 0; l < ld[ild][0]; l++) { /* Compute the inner product for the current source and * target nodes. */ temp = innerProduct(eta[2*l],eta[2*l+1],xi[2*d],xi[2*d+1]); /* Evaluate the Poisson kernel for the current value and add * to the result. */ f[d] += b[l]*singularityKernel(temp,p[ip][0]); } } break; case KT_LOC_SUPP: /* Calculate the multiplicative constant. */ constant = ((p[ip][1]+1)/(K2PI*pow(1-p[ip][0],p[ip][1]+1))); /* Process all target nodes. */ for (d = 0; d < ld[ild][1]; d++) { /* Initialize function value. */ f[d] = 0.0; /* Process all source nodes. */ for (l = 0; l < ld[ild][0]; l++) { /* Compute the inner product for the current source and * target nodes. */ temp = innerProduct(eta[2*l],eta[2*l+1],xi[2*d],xi[2*d+1]); /* Evaluate the Poisson kernel for the current value and add * to the result. */ f[d] += b[l]*locallySupportedKernel(temp,p[ip][0],p[ip][1]); } /* Multiply result with constant. */ f[d] *= constant; } break; case KT_GAUSSIAN: /* Process all target nodes. */ for (d = 0; d < ld[ild][1]; d++) { /* Initialize function value. */ f[d] = 0.0; /* Process all source nodes. */ for (l = 0; l < ld[ild][0]; l++) { /* Compute the inner product for the current source and * target nodes. */ temp = innerProduct(eta[2*l],eta[2*l+1],xi[2*d],xi[2*d+1]); /* Evaluate the Poisson kernel for the current value and add * to the result. */ f[d] += b[l]*gaussianKernel(temp,p[ip][0]); } } break; } } /* Calculate and add the time needed. */ t1 = getticks(); t_d = nfft_elapsed_seconds(t1,t0); /* Calculate average time needed. */ t_d = t_d/((double)ld[ild][4]); } else { /* Initialize cumulative time variable with dummy value. */ t_d = -1.0; t_dp = -1.0; } /* Initialize error and cumulative time variables for the fast * algorithm. */ err_fd = -1.0; err_f = -1.0; t_fd = -1.0; t_f = -1.0; /* Process all cut-off bandwidths. */ for (im = 0; im < im_max; im++) { /* Init transform plans. */ nfsft_init_guru(&plan_adjoint, m[im],ld[ild][0], ((use_nfft!=0)?(0U):(NFSFT_USE_NDFT)) | ((use_fpt!=0)?(0U):(NFSFT_USE_DPT)), PRE_PHI_HUT | PRE_PSI | FFTW_INIT | FFT_OUT_OF_PLACE, cutoff); nfsft_init_guru(&plan,m[im],ld[ild][1], ((use_nfft!=0)?(0U):(NFSFT_USE_NDFT)) | ((use_fpt!=0)?(0U):(NFSFT_USE_DPT)), PRE_PHI_HUT | PRE_PSI | FFTW_INIT | FFT_OUT_OF_PLACE, cutoff); plan_adjoint.f_hat = f_hat; plan_adjoint.x = eta; plan_adjoint.f = b; plan.f_hat = f_hat; plan.x = xi; plan.f = f_m; nfsft_precompute_x(&plan_adjoint); nfsft_precompute_x(&plan); /* Check if direct algorithm shall also be tested. */ if (use_nfsft == BOTH) { /* Initialize cumulative time variable. */ t_fd = 0.0; /* Initialize time measurement. */ t0 = getticks(); /* Cycle through all runs. */ for (i = 0; i < ld[ild][4]; i++) { /* Execute adjoint direct NDSFT transformation. */ nfsft_adjoint_direct(&plan_adjoint); /* Multiplication with the Fourier-Legendre coefficients. */ for (k = 0; k <= m[im]; k++) for (n = -k; n <= k; n++) f_hat[NFSFT_INDEX(k,n,&plan_adjoint)] *= a[k]; /* Execute direct NDSFT transformation. */ nfsft_trafo_direct(&plan); } /* Calculate and add the time needed. */ t1 = getticks(); t_fd = nfft_elapsed_seconds(t1,t0); /* Calculate average time needed. */ t_fd = t_fd/((double)ld[ild][4]); /* Check if error E_infty should be computed. */ if (ld[ild][2] != NO) { /* Compute the error E_infinity. */ err_fd = X(error_l_infty_1_complex)(f, f_m, ld[ild][1], b, ld[ild][0]); } } /* Check if the fast NFSFT algorithm shall also be tested. */ if (use_nfsft != NO) { /* Initialize cumulative time variable for the NFSFT algorithm. */ t_f = 0.0; } else { /* Initialize cumulative time variable for the direct NDSFT * algorithm. */ t_fd = 0.0; } /* Initialize time measurement. */ t0 = getticks(); /* Cycle through all runs. */ for (i = 0; i < ld[ild][4]; i++) { /* Check if the fast NFSFT algorithm shall also be tested. */ if (use_nfsft != NO) { /* Execute the adjoint NFSFT transformation. */ nfsft_adjoint(&plan_adjoint); } else { /* Execute the adjoint direct NDSFT transformation. */ nfsft_adjoint_direct(&plan_adjoint); } /* Multiplication with the Fourier-Legendre coefficients. */ for (k = 0; k <= m[im]; k++) for (n = -k; n <= k; n++) f_hat[NFSFT_INDEX(k,n,&plan_adjoint)] *= a[k]; /* Check if the fast NFSFT algorithm shall also be tested. */ if (use_nfsft != NO) { /* Execute the NFSFT transformation. */ nfsft_trafo(&plan); } else { /* Execute the NDSFT transformation. */ nfsft_trafo_direct(&plan); } } /* Check if the fast NFSFT algorithm has been used. */ t1 = getticks(); if (use_nfsft != NO) t_f = nfft_elapsed_seconds(t1,t0); else t_fd = nfft_elapsed_seconds(t1,t0); /* Check if the fast NFSFT algorithm has been used. */ if (use_nfsft != NO) { /* Calculate average time needed. */ t_f = t_f/((double)ld[ild][4]); } else { /* Calculate average time needed. */ t_fd = t_fd/((double)ld[ild][4]); } /* Check if error E_infty should be computed. */ if (ld[ild][2] != NO) { /* Check if the fast NFSFT algorithm has been used. */ if (use_nfsft != NO) { /* Compute the error E_infinity. */ err_f = X(error_l_infty_1_complex)(f, f_m, ld[ild][1], b, ld[ild][0]); } else { /* Compute the error E_infinity. */ err_fd = X(error_l_infty_1_complex)(f, f_m, ld[ild][1], b, ld[ild][0]); } } /* Print out the error measurements. */ fprintf(stdout,"%e\n%e\n%e\n%e\n%e\n%e\n\n",t_d,t_dp,t_fd,t_f,err_fd, err_f); /* Finalize the NFSFT plans */ nfsft_finalize(&plan_adjoint); nfsft_finalize(&plan); } /* for (im = 0; im < im_max; im++) - Process all cut-off * bandwidths.*/ } /* for (ild = 0; ild < ild_max; ild++) - Process all node sets. */ } /* for (ip = 0; ip < ip_max; ip++) - Process all parameter sets. */ /* Delete precomputed data. */ nfsft_forget(); /* Check if memory for precomputed data of the matrix K has been * allocated. */ if (precompute == YES) { /* Free memory for precomputed matrix K. */ nfft_free(prec); } /* Free data arrays. */ nfft_free(f); nfft_free(f_m); nfft_free(xi); nfft_free(eta); nfft_free(a); nfft_free(f_hat); nfft_free(b); /* Free memory for node sets. */ for (ild = 0; ild < ild_max; ild++) nfft_free(ld[ild]); nfft_free(ld); /* Free memory for cut-off bandwidths. */ nfft_free(m); /* Free memory for parameter sets. */ for (ip = 0; ip < ip_max; ip++) nfft_free(p[ip]); nfft_free(p); } /* for (tc = 0; tc < tc_max; tc++) - Process each testcase. */ /* Return exit code for successful run. */ return EXIT_SUCCESS; }