/** simple test program for the inverse discrete Radon transform */ int main(int argc, char **argv) { int (*gridfcn)(); /**< grid generating function */ int T, S; /**< number of directions/offsets */ FILE *fp; int N; /**< image size */ NFFT_R *Rf, *iRf; int max_i; /**< number of iterations */ if (argc != 6) { printf("inverse_radon gridfcn N T R max_i\n"); printf("\n"); printf("gridfcn \"polar\" or \"linogram\" \n"); printf("N image size NxN \n"); printf("T number of slopes \n"); printf("R number of offsets \n"); printf("max_i number of iterations \n"); exit(EXIT_FAILURE); } if (strcmp(argv[1], "polar") == 0) gridfcn = polar_grid; else gridfcn = linogram_grid; N = atoi(argv[2]); T = atoi(argv[3]); S = atoi(argv[4]); /*printf("N=%d, %s grid with T=%d, R=%d. \n",N,argv[1],T,R);*/ max_i = atoi(argv[5]); Rf = (NFFT_R *) NFFT(malloc)((size_t)(T * S) * (sizeof(NFFT_R))); iRf = (NFFT_R *) NFFT(malloc)((size_t)(N * N) * (sizeof(NFFT_R))); /** load data */ fp = fopen("sinogram_data.bin", "rb"); if (fp == NULL) return EXIT_FAILURE; fread(Rf, sizeof(NFFT_R), (size_t)(T * S), fp); fclose(fp); /** inverse Radon transform */ inverse_radon_trafo(gridfcn, T, S, Rf, N, iRf, max_i); /** write result */ fp = fopen("output_data.bin", "wb+"); if (fp == NULL) return EXIT_FAILURE; fwrite(iRf, sizeof(NFFT_R), (size_t)(N * N), fp); fclose(fp); /** free the variables */ NFFT(free)(Rf); NFFT(free)(iRf); return EXIT_SUCCESS; }
void run_testset(s_testset *testset, int d, int L, int M, int n, int m, int p, char *kernel_name, R c, R eps_I, R eps_B, int *nthreads_array, int n_threads_array_size) { int i; testset->param.d = d; testset->param.L = L; testset->param.M = M; testset->param.n = n; testset->param.m = m; testset->param.p = p; testset->param.kernel_name = kernel_name; testset->param.c = c; testset->param.eps_I = eps_I; testset->param.eps_B = eps_B; testset->results = (s_result*) NFFT(malloc)( (size_t)(n_threads_array_size) * sizeof(s_result)); testset->nresults = n_threads_array_size; run_test_create(testset->param.d, testset->param.L, testset->param.M); for (i = 0; i < n_threads_array_size; i++) { testset->results[i].nthreads = nthreads_array[i]; run_test(testset->results[i].resval, NREPEAT, testset->param.n, testset->param.m, testset->param.p, testset->param.kernel_name, testset->param.c, testset->param.eps_I, testset->param.eps_B, testset->results[i].nthreads); } }
int get_nthreads_array(int **arr) { int max_threads = NFFT(get_num_threads)(); int alloc_num = 2; int k; int ret_number = 0; int max_threads_pw2 = (max_threads / 2) * 2 == max_threads ? 1 : 0; if (max_threads <= 5) { *arr = (int*) NFFT(malloc)((size_t) (max_threads) * sizeof(int)); for (k = 0; k < max_threads; k++) *(*arr + k) = k + 1; return max_threads; } for (k = 1; k <= max_threads; k *= 2, alloc_num++) ; *arr = (int*) NFFT(malloc)((size_t)(alloc_num) * sizeof(int)); for (k = 1; k <= max_threads; k *= 2) { if (k != max_threads && 2 * k > max_threads && max_threads_pw2) { *(*arr + ret_number) = max_threads / 2; ret_number++; } *(*arr + ret_number) = k; ret_number++; if (k != max_threads && 2 * k > max_threads) { *(*arr + ret_number) = max_threads; ret_number++; break; } } return ret_number; }
static void simple_test_nfct_1d(void) { NFCT(plan) p; const char *error_str; int N = 14; int M = 19; /** init an one dimensional plan */ NFCT(init_1d)(&p,N,M); /** init pseudo random nodes */ NFFT(vrand_real)(p.x, p.M_total, NFFT_K(0.0), NFFT_K(0.5)); /** precompute psi, the entries of the matrix B */ if( p.flags & PRE_ONE_PSI) NFCT(precompute_one_psi)(&p); /** init pseudo random Fourier coefficients and show them */ NFFT(vrand_real)(p.f_hat, p.N_total, NFFT_K(0.0), NFFT_K(1.0)); NFFT(vpr_double)(p.f_hat,p.N_total,"given Fourier coefficients, vector f_hat"); /** check for valid parameters before calling any trafo/adjoint method */ error_str = NFCT(check)(&p); if (error_str != 0) { printf("Error in nfct module: %s\n", error_str); return; } /** direct trafo and show the result */ NFCT(trafo_direct)(&p); NFFT(vpr_double)(p.f,p.M_total,"ndct, vector f"); /** approx. trafo and show the result */ NFCT(trafo)(&p); NFFT(vpr_double)(p.f,p.M_total,"nfct, vector f"); /** approx. adjoint and show the result */ NFCT(adjoint_direct)(&p); NFFT(vpr_double)(p.f_hat,p.N_total,"adjoint ndct, vector f_hat"); /** approx. adjoint and show the result */ NFCT(adjoint)(&p); NFFT(vpr_double)(p.f_hat,p.N_total,"adjoint nfct, vector f_hat"); /** finalise the one dimensional plan */ NFCT(finalize)(&p); }
void fastsum_benchomp_createdataset(unsigned int d, int L, int M) { int t, j, k; R *x; R *y; C *alpha; x = (R*) NFFT(malloc)((size_t)(d * L) * sizeof(R)); y = (R*) NFFT(malloc)((size_t)(d * L) * sizeof(R)); alpha = (C*) NFFT(malloc)((size_t)(L) * sizeof(C)); /** init source knots in a d-ball with radius 1 */ k = 0; while (k < L) { R r_max = K(1.0); R r2 = K(0.0); for (j = 0; j < d; j++) x[k * d + j] = K(2.0) * r_max * NFFT(drand48)() - r_max; for (j = 0; j < d; j++) r2 += x[k * d + j] * x[k * d + j]; if (r2 >= r_max * r_max) continue; k++; } NFFT(vrand_unit_complex)(alpha, L); /** init target knots in a d-ball with radius 1 */ k = 0; while (k < M) { R r_max = K(1.0); R r2 = K(0.0); for (j = 0; j < d; j++) y[k * d + j] = K(2.0) * r_max * NFFT(drand48)() - r_max; for (j = 0; j < d; j++) r2 += y[k * d + j] * y[k * d + j]; if (r2 >= r_max * r_max) continue; k++; } printf("%d %d %d\n", d, L, M); for (j = 0; j < L; j++) { for (t = 0; t < d; t++) printf("%.16" __FES__ " ", x[d * j + t]); printf("\n"); } for (j = 0; j < L; j++) printf("%.16" __FES__ " %.16" __FES__ "\n", CREAL(alpha[j]), CIMAG(alpha[j])); for (j = 0; j < M; j++) { for (t = 0; t < d; t++) printf("%.16" __FES__ " ", y[d * j + t]); printf("\n"); } NFFT(free)(x); NFFT(free)(y); NFFT(free)(alpha); }
/** Simple test routine for the inverse nfft */ static void simple_test_solver_nfft_1d(int N, int M, int iter) { int k, l; /**< index for nodes, freqencies,iter*/ NFFT(plan) p; /**< plan for the nfft */ SOLVER(plan_complex) ip; /**< plan for the inverse nfft */ const char *error_str; /** initialise an one dimensional plan */ NFFT(init_1d)(&p, N, M); /** init pseudo random nodes */ NFFT(vrand_shifted_unit_double)(p.x, p.M_total); /** precompute psi, the entries of the matrix B */ if (p.flags & PRE_ONE_PSI) NFFT(precompute_one_psi)(&p); /** initialise inverse plan */ SOLVER(init_complex)(&ip, (NFFT(mv_plan_complex)*) (&p)); /** init pseudo random samples and show them */ NFFT(vrand_unit_complex)(ip.y, p.M_total); NFFT(vpr_complex)(ip.y, p.M_total, "Given data, vector y"); /** initialise some guess f_hat_0 and solve */ for (k = 0; k < p.N_total; k++) ip.f_hat_iter[k] = NFFT_K(0.0); NFFT(vpr_complex)(ip.f_hat_iter, p.N_total, "Initial guess, vector f_hat_iter"); /** check for valid parameters before calling any trafo/adjoint method */ error_str = NFFT(check)(&p); if (error_str != 0) { printf("Error in nfft module: %s\n", error_str); return; } NFFT_CSWAP(ip.f_hat_iter, p.f_hat); NFFT(trafo)(&p); NFFT(vpr_complex)(p.f, p.M_total, "Data fit, vector f"); NFFT_CSWAP(ip.f_hat_iter, p.f_hat); SOLVER(before_loop_complex)(&ip); printf("\n Residual r=%" NFFT__FES__ "\n", ip.dot_r_iter); for (l = 0; l < iter; l++) { printf("\n********** Iteration l=%d **********\n", l); SOLVER(loop_one_step_complex)(&ip); NFFT(vpr_complex)(ip.f_hat_iter, p.N_total, "Approximate solution, vector f_hat_iter"); NFFT_CSWAP(ip.f_hat_iter, p.f_hat); NFFT(trafo)(&p); NFFT(vpr_complex)(p.f, p.M_total, "Data fit, vector f"); NFFT_CSWAP(ip.f_hat_iter, p.f_hat); printf("\n Residual r=%" NFFT__FES__ "\n", ip.dot_r_iter); } SOLVER(finalize_complex)(&ip); NFFT(finalize)(&p); }
int main(int argc, char **argv) { int j, k, t; /**< indices */ int d; /**< number of dimensions */ int N; /**< number of source nodes */ int M; /**< number of target nodes */ int n; /**< expansion degree */ int m; /**< cut-off parameter */ int p; /**< degree of smoothness */ const char *s; /**< name of kernel */ C (*kernel)(R, int, const R *); /**< kernel function */ R c; /**< parameter for kernel */ fastsum_plan my_fastsum_plan; /**< plan for fast summation */ C *direct; /**< array for direct computation */ ticks t0, t1; /**< for time measurement */ R time; /**< for time measurement */ R error = K(0.0); /**< for error computation */ R eps_I; /**< inner boundary */ R eps_B; /**< outer boundary */ FILE *fid1, *fid2; R temp; if (argc != 11) { printf("\nfastsum_test d N M n m p kernel c\n\n"); printf(" d dimension \n"); printf(" N number of source nodes \n"); printf(" M number of target nodes \n"); printf(" n expansion degree \n"); printf(" m cut-off parameter \n"); printf(" p degree of smoothness \n"); printf(" kernel kernel function (e.g., gaussian)\n"); printf(" c kernel parameter \n"); printf(" eps_I inner boundary \n"); printf(" eps_B outer boundary \n\n"); exit(-1); } else { d = atoi(argv[1]); N = atoi(argv[2]); c = K(1.0) / POW((R)(N), K(1.0) / ((R)(d))); M = atoi(argv[3]); n = atoi(argv[4]); m = atoi(argv[5]); p = atoi(argv[6]); s = argv[7]; c = (R)(atof(argv[8])); eps_I = (R)(atof(argv[9])); eps_B = (R)(atof(argv[10])); if (strcmp(s, "gaussian") == 0) kernel = gaussian; else if (strcmp(s, "multiquadric") == 0) kernel = multiquadric; else if (strcmp(s, "inverse_multiquadric") == 0) kernel = inverse_multiquadric; else if (strcmp(s, "logarithm") == 0) kernel = logarithm; else if (strcmp(s, "thinplate_spline") == 0) kernel = thinplate_spline; else if (strcmp(s, "one_over_square") == 0) kernel = one_over_square; else if (strcmp(s, "one_over_modulus") == 0) kernel = one_over_modulus; else if (strcmp(s, "one_over_x") == 0) kernel = one_over_x; else if (strcmp(s, "inverse_multiquadric3") == 0) kernel = inverse_multiquadric3; else if (strcmp(s, "sinc_kernel") == 0) kernel = sinc_kernel; else if (strcmp(s, "cosc") == 0) kernel = cosc; else if (strcmp(s, "cot") == 0) kernel = kcot; else { s = "multiquadric"; kernel = multiquadric; } } printf( "d=%d, N=%d, M=%d, n=%d, m=%d, p=%d, kernel=%s, c=%" __FGS__ ", eps_I=%" __FGS__ ", eps_B=%" __FGS__ " \n", d, N, M, n, m, p, s, c, eps_I, eps_B); /** init two dimensional fastsum plan */ fastsum_init_guru(&my_fastsum_plan, d, N, M, kernel, &c, 0, n, m, p, eps_I, eps_B); /*fastsum_init_guru(&my_fastsum_plan, d, N, M, kernel, &c, EXACT_NEARFIELD, n, m, p);*/ /** load source knots and coefficients */ fid1 = fopen("x.dat", "r"); fid2 = fopen("alpha.dat", "r"); for (k = 0; k < N; k++) { for (t = 0; t < d; t++) { fscanf(fid1, __FR__, &my_fastsum_plan.x[k * d + t]); } fscanf(fid2, __FR__, &temp); my_fastsum_plan.alpha[k] = temp; fscanf(fid2, __FR__, &temp); my_fastsum_plan.alpha[k] += temp * II; } fclose(fid1); fclose(fid2); /** load target knots */ fid1 = fopen("y.dat", "r"); for (j = 0; j < M; j++) { for (t = 0; t < d; t++) { fscanf(fid1, __FR__, &my_fastsum_plan.y[j * d + t]); } } fclose(fid1); /** direct computation */ printf("direct computation: "); fflush(NULL); t0 = getticks(); fastsum_exact(&my_fastsum_plan); t1 = getticks(); time = NFFT(elapsed_seconds)(t1, t0); printf(__FI__ "sec\n", time); /** copy result */ direct = (C *) NFFT(malloc)((size_t)(my_fastsum_plan.M_total) * (sizeof(C))); for (j = 0; j < my_fastsum_plan.M_total; j++) direct[j] = my_fastsum_plan.f[j]; /** precomputation */ printf("pre-computation: "); fflush(NULL); t0 = getticks(); fastsum_precompute(&my_fastsum_plan); t1 = getticks(); time = NFFT(elapsed_seconds)(t1, t0); printf(__FI__ "sec\n", time); /** fast computation */ printf("fast computation: "); fflush(NULL); t0 = getticks(); fastsum_trafo(&my_fastsum_plan); t1 = getticks(); time = NFFT(elapsed_seconds)(t1, t0); printf(__FI__ "sec\n", time); /** compute max error */ error = K(0.0); for (j = 0; j < my_fastsum_plan.M_total; j++) { if (CABS(direct[j] - my_fastsum_plan.f[j]) / CABS(direct[j]) > error) error = CABS(direct[j] - my_fastsum_plan.f[j]) / CABS(direct[j]); } printf("max relative error: " __FE__ "\n", error); /** write result to file */ fid1 = fopen("f.dat", "w+"); fid2 = fopen("f_direct.dat", "w+"); if (fid1 == NULL) { printf("Fehler!\n"); exit(EXIT_FAILURE); } for (j = 0; j < M; j++) { temp = CREAL(my_fastsum_plan.f[j]); fprintf(fid1, " % .16" __FES__ "", temp); temp = CIMAG(my_fastsum_plan.f[j]); fprintf(fid1, " % .16" __FES__ "\n", temp); temp = CREAL(direct[j]); fprintf(fid2, " % .16" __FES__ "", temp); temp = CIMAG(direct[j]); fprintf(fid2, " % .16" __FES__ "\n", temp); } fclose(fid1); fclose(fid2); /** finalise the plan */ fastsum_finalize(&my_fastsum_plan); return EXIT_SUCCESS; }
int bench_openmp(FILE *infile, int n, int m, int p, C (*kernel)(R, int, const R *), R c, R eps_I, R eps_B) { fastsum_plan my_fastsum_plan; int d, L, M; int t, j; R re, im; R r_max = K(0.25) - my_fastsum_plan.eps_B / K(2.0); ticks t0, t1; R tt_total; fscanf(infile, "%d %d %d", &d, &L, &M); #ifdef _OPENMP FFTW(import_wisdom_from_filename)("fastsum_benchomp_detail_threads.plan"); #else FFTW(import_wisdom_from_filename)("fastsum_benchomp_detail_single.plan"); #endif fastsum_init_guru(&my_fastsum_plan, d, L, M, kernel, &c, NEARFIELD_BOXES, n, m, p, eps_I, eps_B); #ifdef _OPENMP FFTW(export_wisdom_to_filename)("fastsum_benchomp_detail_threads.plan"); #else FFTW(export_wisdom_to_filename)("fastsum_benchomp_detail_single.plan"); #endif for (j = 0; j < L; j++) { for (t = 0; t < d; t++) { R v; fscanf(infile, __FR__, &v); my_fastsum_plan.x[d * j + t] = v * r_max; } } for (j = 0; j < L; j++) { fscanf(infile, __FR__ " " __FR__, &re, &im); my_fastsum_plan.alpha[j] = re + II * im; } for (j = 0; j < M; j++) { for (t = 0; t < d; t++) { R v; fscanf(infile, __FR__, &v); my_fastsum_plan.y[d * j + t] = v * r_max; } } /** precomputation */ t0 = getticks(); fastsum_precompute(&my_fastsum_plan); /** fast computation */ fastsum_trafo(&my_fastsum_plan); t1 = getticks(); tt_total = NFFT(elapsed_seconds)(t1, t0); #ifndef MEASURE_TIME my_fastsum_plan.MEASURE_TIME_t[0] = K(0.0); my_fastsum_plan.MEASURE_TIME_t[1] = K(0.0); my_fastsum_plan.MEASURE_TIME_t[2] = K(0.0); my_fastsum_plan.MEASURE_TIME_t[3] = K(0.0); my_fastsum_plan.MEASURE_TIME_t[4] = K(0.0); my_fastsum_plan.MEASURE_TIME_t[5] = K(0.0); my_fastsum_plan.MEASURE_TIME_t[6] = K(0.0); my_fastsum_plan.MEASURE_TIME_t[7] = K(0.0); my_fastsum_plan.mv1.MEASURE_TIME_t[0] = K(0.0); my_fastsum_plan.mv1.MEASURE_TIME_t[2] = K(0.0); my_fastsum_plan.mv2.MEASURE_TIME_t[0] = K(0.0); my_fastsum_plan.mv2.MEASURE_TIME_t[2] = K(0.0); #endif #ifndef MEASURE_TIME_FFTW my_fastsum_plan.mv1.MEASURE_TIME_t[1] = K(0.0); my_fastsum_plan.mv2.MEASURE_TIME_t[1] = K(0.0); #endif printf( "%.6" __FES__ " %.6" __FES__ " %.6" __FES__ " %6" __FES__ " %.6" __FES__ " %.6" __FES__ " %.6" __FES__ " %.6" __FES__ " %.6" __FES__ " %6" __FES__ " %.6" __FES__ " %.6" __FES__ " %6" __FES__ " %.6" __FES__ " %.6" __FES__ " %6" __FES__ "\n", my_fastsum_plan.MEASURE_TIME_t[0], my_fastsum_plan.MEASURE_TIME_t[1], my_fastsum_plan.MEASURE_TIME_t[2], my_fastsum_plan.MEASURE_TIME_t[3], my_fastsum_plan.MEASURE_TIME_t[4], my_fastsum_plan.MEASURE_TIME_t[5], my_fastsum_plan.MEASURE_TIME_t[6], my_fastsum_plan.MEASURE_TIME_t[7], tt_total - my_fastsum_plan.MEASURE_TIME_t[0] - my_fastsum_plan.MEASURE_TIME_t[1] - my_fastsum_plan.MEASURE_TIME_t[2] - my_fastsum_plan.MEASURE_TIME_t[3] - my_fastsum_plan.MEASURE_TIME_t[4] - my_fastsum_plan.MEASURE_TIME_t[5] - my_fastsum_plan.MEASURE_TIME_t[6] - my_fastsum_plan.MEASURE_TIME_t[7], tt_total, my_fastsum_plan.mv1.MEASURE_TIME_t[0], my_fastsum_plan.mv1.MEASURE_TIME_t[1], my_fastsum_plan.mv1.MEASURE_TIME_t[2], my_fastsum_plan.mv2.MEASURE_TIME_t[0], my_fastsum_plan.mv2.MEASURE_TIME_t[1], my_fastsum_plan.mv2.MEASURE_TIME_t[2]); fastsum_finalize(&my_fastsum_plan); return 0; }
unsigned test_fg=0; #endif #ifdef MEASURE_TIME_FFTW unsigned test_fftw=1; #else unsigned test_fftw=0; #endif #ifdef MEASURE_TIME unsigned test=1; #else unsigned test=0; #endif static void flags_cp(NFFT(plan) *dst, NFFT(plan) *src) { dst->x = src->x; dst->f_hat = src->f_hat; dst->f = src->f; dst->g1 = src->g1; dst->g2 = src->g2; dst->my_fftw_plan1 = src->my_fftw_plan1; dst->my_fftw_plan2 = src->my_fftw_plan2; } static void time_accuracy(int d, int N, int M, int n, int m, unsigned test_ndft, unsigned test_pre_full_psi) { int r, NN[d], nn[d]; R t_ndft, t, e;
int main(int argc, char **argv) { int j, k; /**< indices */ int d; /**< number of dimensions */ int N; /**< number of source nodes */ int M; /**< number of target nodes */ int n; /**< expansion degree */ int m; /**< cut-off parameter */ int p; /**< degree of smoothness */ const char *s; /**< name of kernel */ C (*kernel)(R, int, const R *); /**< kernel function */ R c; /**< parameter for kernel */ fastsum_plan my_fastsum_plan; /**< plan for fast summation */ C *direct; /**< array for direct computation */ ticks t0, t1; /**< for time measurement */ R time; /**< for time measurement */ R error = K(0.0); /**< for error computation */ R eps_I; /**< inner boundary */ R eps_B; /**< outer boundary */ if (argc != 11) { printf("\nfastsum_test d N M n m p kernel c eps_I eps_B\n\n"); printf(" d dimension \n"); printf(" N number of source nodes \n"); printf(" M number of target nodes \n"); printf(" n expansion degree \n"); printf(" m cut-off parameter \n"); printf(" p degree of smoothness \n"); printf(" kernel kernel function (e.g., gaussian)\n"); printf(" c kernel parameter \n"); printf(" eps_I inner boundary \n"); printf(" eps_B outer boundary \n\n"); exit(EXIT_FAILURE); } else { d = atoi(argv[1]); N = atoi(argv[2]); c = K(1.0) / POW((R)(N), K(1.0) / ((R)(d))); M = atoi(argv[3]); n = atoi(argv[4]); m = atoi(argv[5]); p = atoi(argv[6]); s = argv[7]; c = (R)(atof(argv[8])); eps_I = (R)(atof(argv[9])); eps_B = (R)(atof(argv[10])); if (strcmp(s, "gaussian") == 0) kernel = gaussian; else if (strcmp(s, "multiquadric") == 0) kernel = multiquadric; else if (strcmp(s, "inverse_multiquadric") == 0) kernel = inverse_multiquadric; else if (strcmp(s, "logarithm") == 0) kernel = logarithm; else if (strcmp(s, "thinplate_spline") == 0) kernel = thinplate_spline; else if (strcmp(s, "one_over_square") == 0) kernel = one_over_square; else if (strcmp(s, "one_over_modulus") == 0) kernel = one_over_modulus; else if (strcmp(s, "one_over_x") == 0) kernel = one_over_x; else if (strcmp(s, "inverse_multiquadric3") == 0) kernel = inverse_multiquadric3; else if (strcmp(s, "sinc_kernel") == 0) kernel = sinc_kernel; else if (strcmp(s, "cosc") == 0) kernel = cosc; else if (strcmp(s, "cot") == 0) kernel = kcot; else { s = "multiquadric"; kernel = multiquadric; } } printf( "d=%d, N=%d, M=%d, n=%d, m=%d, p=%d, kernel=%s, c=%" __FGS__ ", eps_I=%" __FGS__ ", eps_B=%" __FGS__ " \n", d, N, M, n, m, p, s, c, eps_I, eps_B); #ifdef NF_KUB printf("nearfield correction using piecewise cubic Lagrange interpolation\n"); #elif defined(NF_QUADR) printf("nearfield correction using piecewise quadratic Lagrange interpolation\n"); #elif defined(NF_LIN) printf("nearfield correction using piecewise linear Lagrange interpolation\n"); #endif #ifdef _OPENMP #pragma omp parallel { #pragma omp single { printf("nthreads=%d\n", omp_get_max_threads()); } } FFTW(init_threads)(); #endif /** init d-dimensional fastsum plan */ fastsum_init_guru(&my_fastsum_plan, d, N, M, kernel, &c, 0, n, m, p, eps_I, eps_B); //fastsum_init_guru(&my_fastsum_plan, d, N, M, kernel, &c, NEARFIELD_BOXES, n, m, p, eps_I, eps_B); if (my_fastsum_plan.flags & NEARFIELD_BOXES) printf( "determination of nearfield candidates based on partitioning into boxes\n"); else printf("determination of nearfield candidates based on search tree\n"); /** init source knots in a d-ball with radius 0.25-eps_b/2 */ k = 0; while (k < N) { R r_max = K(0.25) - my_fastsum_plan.eps_B / K(2.0); R r2 = K(0.0); for (j = 0; j < d; j++) my_fastsum_plan.x[k * d + j] = K(2.0) * r_max * NFFT(drand48)() - r_max; for (j = 0; j < d; j++) r2 += my_fastsum_plan.x[k * d + j] * my_fastsum_plan.x[k * d + j]; if (r2 >= r_max * r_max) continue; k++; } for (k = 0; k < N; k++) { /* R r=(0.25-my_fastsum_plan.eps_B/2.0)*pow((R)rand()/(R)RAND_MAX,1.0/d); my_fastsum_plan.x[k*d+0] = r; for (j=1; j<d; j++) { R phi=2.0*KPI*(R)rand()/(R)RAND_MAX; my_fastsum_plan.x[k*d+j] = r; for (t=0; t<j; t++) { my_fastsum_plan.x[k*d+t] *= cos(phi); } my_fastsum_plan.x[k*d+j] *= sin(phi); } */ my_fastsum_plan.alpha[k] = NFFT(drand48)() + II * NFFT(drand48)(); } /** init target knots in a d-ball with radius 0.25-eps_b/2 */ k = 0; while (k < M) { R r_max = K(0.25) - my_fastsum_plan.eps_B / K(2.0); R r2 = K(0.0); for (j = 0; j < d; j++) my_fastsum_plan.y[k * d + j] = K(2.0) * r_max * NFFT(drand48)() - r_max; for (j = 0; j < d; j++) r2 += my_fastsum_plan.y[k * d + j] * my_fastsum_plan.y[k * d + j]; if (r2 >= r_max * r_max) continue; k++; } /* for (k=0; k<M; k++) { R r=(0.25-my_fastsum_plan.eps_B/2.0)*pow((R)rand()/(R)RAND_MAX,1.0/d); my_fastsum_plan.y[k*d+0] = r; for (j=1; j<d; j++) { R phi=2.0*KPI*(R)rand()/(R)RAND_MAX; my_fastsum_plan.y[k*d+j] = r; for (t=0; t<j; t++) { my_fastsum_plan.y[k*d+t] *= cos(phi); } my_fastsum_plan.y[k*d+j] *= sin(phi); } } */ /** direct computation */ printf("direct computation: "); fflush(NULL); t0 = getticks(); fastsum_exact(&my_fastsum_plan); t1 = getticks(); time = NFFT(elapsed_seconds)(t1, t0); printf(__FI__ "sec\n", time); /** copy result */ direct = (C *) NFFT(malloc)((size_t)(my_fastsum_plan.M_total) * (sizeof(C))); for (j = 0; j < my_fastsum_plan.M_total; j++) direct[j] = my_fastsum_plan.f[j]; /** precomputation */ printf("pre-computation: "); fflush(NULL); t0 = getticks(); fastsum_precompute(&my_fastsum_plan); t1 = getticks(); time = NFFT(elapsed_seconds)(t1, t0); printf(__FI__ "sec\n", time); /** fast computation */ printf("fast computation: "); fflush(NULL); t0 = getticks(); fastsum_trafo(&my_fastsum_plan); t1 = getticks(); time = NFFT(elapsed_seconds)(t1, t0); printf(__FI__ "sec\n", time); /** compute max error */ error = K(0.0); for (j = 0; j < my_fastsum_plan.M_total; j++) { if (CABS(direct[j] - my_fastsum_plan.f[j]) / CABS(direct[j]) > error) error = CABS(direct[j] - my_fastsum_plan.f[j]) / CABS(direct[j]); } printf("max relative error: %" __FES__ "\n", error); /** finalise the plan */ fastsum_finalize(&my_fastsum_plan); return EXIT_SUCCESS; }
/** computes the inverse discrete Radon transform of Rf * on the grid given by gridfcn() with T angles and R offsets * by a NFFT-based CG-type algorithm */ static int inverse_radon_trafo(int (*gridfcn)(), int T, int S, NFFT_R *Rf, int NN, NFFT_R *f, int max_i) { int j, k; /**< index for nodes and freqencies */ NFFT(plan) my_nfft_plan; /**< plan for the nfft-2D */ SOLVER(plan_complex) my_infft_plan; /**< plan for the inverse nfft */ NFFT_C *fft; /**< variable for the fftw-1Ds */ FFTW(plan) my_fftw_plan; /**< plan for the fftw-1Ds */ int t, r; /**< index for directions and offsets */ NFFT_R *x, *w; /**< knots and associated weights */ int l; /**< index for iterations */ int N[2], n[2]; int M = T * S; N[0] = NN; n[0] = 2 * N[0]; N[1] = NN; n[1] = 2 * N[1]; fft = (NFFT_C *) NFFT(malloc)((size_t)(S) * sizeof(NFFT_C)); my_fftw_plan = FFTW(plan_dft_1d)(S, fft, fft, FFTW_FORWARD, FFTW_MEASURE); x = (NFFT_R *) NFFT(malloc)((size_t)(2 * T * S) * (sizeof(NFFT_R))); if (x == NULL) return EXIT_FAILURE; w = (NFFT_R *) NFFT(malloc)((size_t)(T * S) * (sizeof(NFFT_R))); if (w == NULL) return EXIT_FAILURE; /** init two dimensional NFFT plan */ NFFT(init_guru)(&my_nfft_plan, 2, N, M, n, 4, PRE_PHI_HUT | PRE_PSI | MALLOC_X | MALLOC_F_HAT | MALLOC_F | FFTW_INIT | FFT_OUT_OF_PLACE, FFTW_MEASURE | FFTW_DESTROY_INPUT); /** init two dimensional infft plan */ SOLVER(init_advanced_complex)(&my_infft_plan, (NFFT(mv_plan_complex)*) (&my_nfft_plan), CGNR | PRECOMPUTE_WEIGHT); /** init nodes and weights of grid*/ gridfcn(T, S, x, w); for (j = 0; j < my_nfft_plan.M_total; j++) { my_nfft_plan.x[2 * j + 0] = x[2 * j + 0]; my_nfft_plan.x[2 * j + 1] = x[2 * j + 1]; if (j % S) my_infft_plan.w[j] = w[j]; else my_infft_plan.w[j] = NFFT_K(0.0); } /** precompute psi, the entries of the matrix B */ if (my_nfft_plan.flags & PRE_LIN_PSI) NFFT(precompute_lin_psi)(&my_nfft_plan); if (my_nfft_plan.flags & PRE_PSI) NFFT(precompute_psi)(&my_nfft_plan); if (my_nfft_plan.flags & PRE_FULL_PSI) NFFT(precompute_full_psi)(&my_nfft_plan); /** compute 1D-ffts and init given samples and weights */ for (t = 0; t < T; t++) { /* for(r=0; r<R/2; r++) fft[r] = cexp(I*NFFT_KPI*r)*Rf[t*R+(r+R/2)]; for(r=0; r<R/2; r++) fft[r+R/2] = cexp(I*NFFT_KPI*r)*Rf[t*R+r]; */ for (r = 0; r < S; r++) fft[r] = Rf[t * S + r] + _Complex_I * NFFT_K(0.0); NFFT(fftshift_complex_int)(fft, 1, &S); FFTW(execute)(my_fftw_plan); NFFT(fftshift_complex_int)(fft, 1, &S); my_infft_plan.y[t * S] = NFFT_K(0.0); for (r = -S / 2 + 1; r < S / 2; r++) my_infft_plan.y[t * S + (r + S / 2)] = fft[r + S / 2] / KERNEL(r); } /** initialise some guess f_hat_0 */ for (k = 0; k < my_nfft_plan.N_total; k++) my_infft_plan.f_hat_iter[k] = NFFT_K(0.0) + _Complex_I * NFFT_K(0.0); /** solve the system */ SOLVER(before_loop_complex)(&my_infft_plan); if (max_i < 1) { l = 1; for (k = 0; k < my_nfft_plan.N_total; k++) my_infft_plan.f_hat_iter[k] = my_infft_plan.p_hat_iter[k]; } else { for (l = 1; l <= max_i; l++) { SOLVER(loop_one_step_complex)(&my_infft_plan); /*if (sqrt(my_infft_plan.dot_r_iter)<=1e-12) break;*/ } } /*printf("after %d iteration(s): weighted 2-norm of original residual vector = %g\n",l-1,sqrt(my_infft_plan.dot_r_iter));*/ /** copy result */ for (k = 0; k < my_nfft_plan.N_total; k++) f[k] = NFFT_M(creal)(my_infft_plan.f_hat_iter[k]); /** finalise the plans and free the variables */ FFTW(destroy_plan)(my_fftw_plan); NFFT(free)(fft); SOLVER(finalize_complex)(&my_infft_plan); NFFT(finalize)(&my_nfft_plan); NFFT(free)(x); NFFT(free)(w); return 0; }
void nfft_benchomp_createdataset(unsigned int d, unsigned int trafo_adjoint, int *N, int M, double sigma) { int n[d]; int t, j; R *x; C *f, *f_hat; int N_total = 1; for (t = 0; t < d; t++) N_total *= N[t]; x = (R*) NFFT(malloc)(d*M*sizeof(R)); f = (C*) NFFT(malloc)(M*sizeof(C)); f_hat = (C*) NFFT(malloc)(N_total*sizeof(C)); for (t=0; t<d; t++) n[t] = sigma*NFFT(next_power_of_2)(N[t]); /** init pseudo random nodes */ NFFT(vrand_shifted_unit_double)(x,d*M); if (trafo_adjoint==0) { NFFT(vrand_unit_complex)(f_hat,N_total); } else { NFFT(vrand_unit_complex)(f,M); } printf("%d %d ", d, trafo_adjoint); for (t=0; t<d; t++) printf("%d ", N[t]); for (t=0; t<d; t++) printf("%d ", n[t]); printf("%d\n", M); for (j=0; j < M; j++) { for (t=0; t < d; t++) printf("%.16e ", x[d*j+t]); printf("\n"); } if (trafo_adjoint==0) { for (j=0; j < N_total; j++) printf("%.16e %.16e\n", creal(f_hat[j]), cimag(f_hat[j])); } else { for (j=0; j < M; j++) printf("%.16e %.16e\n", creal(f[j]), cimag(f[j])); } NFFT(free)(x); NFFT(free)(f); NFFT(free)(f_hat); }