void nfsft_benchomp_createdataset(unsigned int trafo_adjoint, int N, int M) { int t, j, k, n; R *x; C *f, *f_hat; int N_total = (2*N+2) * (2*N+2); nfsft_plan ptemp; nfsft_init_guru(&ptemp, N, M, NFSFT_MALLOC_X | NFSFT_MALLOC_F | NFSFT_MALLOC_F_HAT | NFSFT_NORMALIZED | NFSFT_PRESERVE_F_HAT, PRE_PHI_HUT | PRE_PSI | FFTW_INIT | FFT_OUT_OF_PLACE, 6); x = (R*) nfft_malloc(2*M*sizeof(R)); f = (C*) nfft_malloc(M*sizeof(C)); f_hat = (C*) nfft_malloc(N_total*sizeof(C)); /* init pseudo-random nodes */ for (j = 0; j < M; j++) { x[2*j]= X(drand48)() - K(0.5); x[2*j+1]= K(0.5) * X(drand48)(); } if (trafo_adjoint==0) { for (k = 0; k <= N; k++) for (n = -k; n <= k; n++) nfft_vrand_unit_complex(f_hat+NFSFT_INDEX(k,n,&ptemp),1); } else { nfft_vrand_unit_complex(f,M); } printf("%d %d %d\n", trafo_adjoint, N, M); for (j=0; j < M; j++) { for (t=0; t < 2; t++) printf("%.16e ", x[2*j+t]); printf("\n"); } if (trafo_adjoint==0) { for (k = 0; k <= N; k++) for (n = -k; n <= k; n++) printf("%.16e %.16e\n", creal(f_hat[NFSFT_INDEX(k,n,&ptemp)]), cimag(f_hat[NFSFT_INDEX(k,n,&ptemp)])); } else { for (j=0; j < M; j++) printf("%.16e %.16e\n", creal(f[j]), cimag(f[j])); } nfft_free(x); nfft_free(f); nfft_free(f_hat); }
/** discrete mpolar FFT */ static int mpolar_dft(fftw_complex *f_hat, int NN, fftw_complex *f, int T, int R, int m) { ticks t0, t1; int j,k; /**< index for nodes and freqencies */ nfft_plan my_nfft_plan; /**< plan for the nfft-2D */ double *x, *w; /**< knots and associated weights */ int N[2],n[2]; int M; /**< number of knots */ N[0]=NN; n[0]=2*N[0]; /**< oversampling factor sigma=2 */ N[1]=NN; n[1]=2*N[1]; /**< oversampling factor sigma=2 */ x = (double *)nfft_malloc(5*(T/2)*R*(sizeof(double))); if (x==NULL) return -1; w = (double *)nfft_malloc(5*(T*R)/4*(sizeof(double))); if (w==NULL) return -1; /** init two dimensional NFFT plan */ M=mpolar_grid(T,R,x,w); nfft_init_guru(&my_nfft_plan, 2, N, M, n, m, PRE_PHI_HUT| PRE_PSI| MALLOC_X | MALLOC_F_HAT| MALLOC_F| FFTW_INIT | FFT_OUT_OF_PLACE, FFTW_MEASURE| FFTW_DESTROY_INPUT); /** init nodes from mpolar grid*/ for(j=0;j<my_nfft_plan.M_total;j++) { my_nfft_plan.x[2*j+0] = x[2*j+0]; my_nfft_plan.x[2*j+1] = x[2*j+1]; } /** init Fourier coefficients from given image */ for(k=0;k<my_nfft_plan.N_total;k++) my_nfft_plan.f_hat[k] = f_hat[k]; t0 = getticks(); /** NDFT-2D */ nfft_trafo_direct(&my_nfft_plan); t1 = getticks(); GLOBAL_elapsed_time = nfft_elapsed_seconds(t1,t0); /** copy result */ for(j=0;j<my_nfft_plan.M_total;j++) f[j] = my_nfft_plan.f[j]; /** finalise the plans and free the variables */ nfft_finalize(&my_nfft_plan); nfft_free(x); nfft_free(w); return EXIT_SUCCESS; }
/** simple test program for the inverse discrete Radon transform */ int main(int argc,char **argv) { int (*gridfcn)(); /**< grid generating function */ int T, S; /**< number of directions/offsets */ FILE *fp; int N; /**< image size */ double *Rf, *iRf; int max_i; /**< number of iterations */ if( argc!=6 ) { printf("inverse_radon gridfcn N T R max_i\n"); printf("\n"); printf("gridfcn \"polar\" or \"linogram\" \n"); printf("N image size NxN \n"); printf("T number of slopes \n"); printf("R number of offsets \n"); printf("max_i number of iterations \n"); exit(-1); } if (strcmp(argv[1],"polar") == 0) gridfcn = polar_grid; else gridfcn = linogram_grid; N = atoi(argv[2]); T = atoi(argv[3]); S = atoi(argv[4]); /*printf("N=%d, %s grid with T=%d, R=%d. \n",N,argv[1],T,R);*/ max_i = atoi(argv[5]); Rf = (double *)nfft_malloc(T*S*(sizeof(double))); iRf = (double *)nfft_malloc(N*N*(sizeof(double))); /** load data */ fp=fopen("sinogram_data.bin","rb"); if (fp==NULL) return(-1); fread(Rf,sizeof(double),T*S,fp); fclose(fp); /** inverse Radon transform */ Inverse_Radon_trafo(gridfcn,T,S,Rf,N,iRf,max_i); /** write result */ fp=fopen("output_data.bin","wb+"); if (fp==NULL) return(-1); fwrite(iRf,sizeof(double),N*N,fp); fclose(fp); /** free the variables */ nfft_free(Rf); nfft_free(iRf); return EXIT_SUCCESS; }
/** finalization of fastsum plan */ void fastsum_finalize(fastsum_plan *ths) { nfft_free(ths->x); nfft_free(ths->alpha); nfft_free(ths->y); nfft_free(ths->f); if (!(ths->flags & EXACT_NEARFIELD)) nfft_free(ths->Add); nfft_finalize(&(ths->mv1)); nfft_finalize(&(ths->mv2)); #ifdef _OPENMP #pragma omp critical (nfft_omp_critical_fftw_plan) { #endif fftw_destroy_plan(ths->fft_plan); #ifdef _OPENMP } #endif nfft_free(ths->b); if (ths->flags & NEARFIELD_BOXES) { nfft_free(ths->box_offset); nfft_free(ths->box_alpha); nfft_free(ths->box_x); } }
/** * Simple example that computes fast and discrete Gauss transforms. * * \arg ths The pointer to the fgt plan * \arg sigma The parameter of the Gaussian * \arg eps The target accuracy * * \author Stefan Kunis */ void fgt_test_simple(int N, int M, double _Complex sigma, double eps) { fgt_plan my_plan; double _Complex *swap_dgt; fgt_init(&my_plan, N, M, sigma, eps); swap_dgt = (double _Complex*)nfft_malloc(my_plan.M*sizeof(double _Complex)); fgt_test_init_rand(&my_plan); fgt_init_node_dependent(&my_plan); NFFT_SWAP_complex(swap_dgt,my_plan.f); dgt_trafo(&my_plan); nfft_vpr_complex(my_plan.f,my_plan.M,"discrete gauss transform"); NFFT_SWAP_complex(swap_dgt,my_plan.f); fgt_trafo(&my_plan); nfft_vpr_complex(my_plan.f,my_plan.M,"fast gauss transform"); printf("\n relative error: %1.3e\n", X(error_l_infty_1_complex)(swap_dgt, my_plan.f, my_plan.M, my_plan.alpha, my_plan.N)); nfft_free(swap_dgt); fgt_finalize(&my_plan); }
/** * Destroys the transform plan. * * \arg ths The pointer to the fgt plan * \author Stefan Kunis */ void fgt_finalize(fgt_plan *ths) { nfft_finalize(ths->nplan2); nfft_finalize(ths->nplan1); nfft_free(ths->nplan2); nfft_free(ths->nplan1); nfft_free(ths->b); nfft_free(ths->f); nfft_free(ths->y); nfft_free(ths->alpha); nfft_free(ths->x); }
/** * Compares accuracy of the fast Gauss transform with increasing expansion * degree. * * \author Stefan Kunis */ void fgt_test_error(void) { fgt_plan my_plan; double _Complex *swap_dgt; int n,mi; double _Complex sigma=4*(138+ _Complex_I*100); int N=1000; int M=1000; int m[2]={7,3}; printf("N=%d;\tM=%d;\nsigma=%1.3e+i*%1.3e;\n",N,M,creal(sigma),cimag(sigma)); printf("error=[\n"); swap_dgt = (double _Complex*)nfft_malloc(M*sizeof(double _Complex)); for(n=8; n<=128; n+=4) { printf("%d\t",n); for(mi=0;mi<2;mi++) { fgt_init_guru(&my_plan, N, M, sigma, n, 1, m[mi], 0); fgt_test_init_rand(&my_plan); fgt_init_node_dependent(&my_plan); NFFT_SWAP_complex(swap_dgt,my_plan.f); dgt_trafo(&my_plan); NFFT_SWAP_complex(swap_dgt,my_plan.f); fgt_trafo(&my_plan); printf("%1.3e\t", X(error_l_infty_1_complex)(swap_dgt, my_plan.f, my_plan.M, my_plan.alpha, my_plan.N)); fflush(stdout); fgt_finalize(&my_plan); fftw_cleanup(); } printf("\n"); } printf("];\n"); nfft_free(swap_dgt); }
int main(int argc, char **argv) { fftw_complex *mem; fftw_plan plan; int N,M,Z; if (argc <= 6) { printf("usage: ./reconstruct_data_gridding FILENAME N M Z ITER WEIGHTS\n"); return 1; } N=atoi(argv[2]); M=atoi(argv[3]); Z=atoi(argv[4]); /* Allocate memory to hold every slice in memory after the 2D-infft */ mem = (fftw_complex*) nfft_malloc(sizeof(fftw_complex) * atoi(argv[2]) * atoi(argv[2]) * atoi(argv[4])); /* Create plan for the 1d-ifft */ plan = fftw_plan_many_dft(1, &Z, N*N, mem, NULL, N*N, 1, mem, NULL, N*N,1 , FFTW_BACKWARD, FFTW_MEASURE); /* execute the 2d-nfft's */ reconstruct(argv[1],atoi(argv[2]),atoi(argv[3]),atoi(argv[4]),atoi(argv[6]),mem); /* execute the 1d-fft's */ fftw_execute(plan); /* write the memory back in files */ print(N,M,Z, mem); /* free memory */ nfft_free(mem); return 1; }
int main(int argc, char **argv) { fftw_complex *mem; if (argc <= 4) { printf("usage: ./construct_data FILENAME N M Z\n"); return 1; } mem = (fftw_complex*) nfft_malloc(sizeof(fftw_complex) * atoi(argv[2]) * atoi(argv[2]) * atoi(argv[4])); read_data(atoi(argv[2]),atoi(argv[3]),atoi(argv[4]), mem); fft(atoi(argv[2]),atoi(argv[3]),atoi(argv[4]), mem); construct(argv[1],atoi(argv[2]),atoi(argv[3]),atoi(argv[4]), mem); nfft_free(mem); return 1; }
/** inverse NFFT-based mpolar FFT */ static int inverse_mpolar_fft(fftw_complex *f, int T, int R, fftw_complex *f_hat, int NN, int max_i, int m) { ticks t0, t1; int j,k; /**< index for nodes and freqencies */ nfft_plan my_nfft_plan; /**< plan for the nfft-2D */ solver_plan_complex my_infft_plan; /**< plan for the inverse nfft */ double *x, *w; /**< knots and associated weights */ int l; /**< index for iterations */ int N[2],n[2]; int M; /**< number of knots */ N[0]=NN; n[0]=2*N[0]; /**< oversampling factor sigma=2 */ N[1]=NN; n[1]=2*N[1]; /**< oversampling factor sigma=2 */ x = (double *)nfft_malloc(5*T*R/2*(sizeof(double))); if (x==NULL) return -1; w = (double *)nfft_malloc(5*T*R/4*(sizeof(double))); if (w==NULL) return -1; /** init two dimensional NFFT plan */ M=mpolar_grid(T,R,x,w); nfft_init_guru(&my_nfft_plan, 2, N, M, n, m, PRE_PHI_HUT| PRE_PSI| MALLOC_X | MALLOC_F_HAT| MALLOC_F| FFTW_INIT | FFT_OUT_OF_PLACE, FFTW_MEASURE| FFTW_DESTROY_INPUT); /** init two dimensional infft plan */ solver_init_advanced_complex(&my_infft_plan,(nfft_mv_plan_complex*)(&my_nfft_plan), CGNR | PRECOMPUTE_WEIGHT ); /** init nodes, given samples and weights */ for(j=0;j<my_nfft_plan.M_total;j++) { my_nfft_plan.x[2*j+0] = x[2*j+0]; my_nfft_plan.x[2*j+1] = x[2*j+1]; my_infft_plan.y[j] = f[j]; my_infft_plan.w[j] = w[j]; } /** precompute psi, the entries of the matrix B */ if(my_nfft_plan.nfft_flags & PRE_LIN_PSI) nfft_precompute_lin_psi(&my_nfft_plan); if(my_nfft_plan.nfft_flags & PRE_PSI) nfft_precompute_psi(&my_nfft_plan); if(my_nfft_plan.nfft_flags & PRE_FULL_PSI) nfft_precompute_full_psi(&my_nfft_plan); /** initialise damping factors */ if(my_infft_plan.flags & PRECOMPUTE_DAMP) for(j=0;j<my_nfft_plan.N[0];j++) for(k=0;k<my_nfft_plan.N[1];k++) { my_infft_plan.w_hat[j*my_nfft_plan.N[1]+k]= (sqrt(pow(j-my_nfft_plan.N[0]/2,2)+pow(k-my_nfft_plan.N[1]/2,2))>(my_nfft_plan.N[0]/2)?0:1); } /** initialise some guess f_hat_0 */ for(k=0;k<my_nfft_plan.N_total;k++) my_infft_plan.f_hat_iter[k] = 0.0 + _Complex_I*0.0; t0 = getticks(); /** solve the system */ solver_before_loop_complex(&my_infft_plan); if (max_i<1) { l=1; for(k=0;k<my_nfft_plan.N_total;k++) my_infft_plan.f_hat_iter[k] = my_infft_plan.p_hat_iter[k]; } else { for(l=1;l<=max_i;l++) { solver_loop_one_step_complex(&my_infft_plan); } } t1 = getticks(); GLOBAL_elapsed_time = nfft_elapsed_seconds(t1,t0); /** copy result */ for(k=0;k<my_nfft_plan.N_total;k++) f_hat[k] = my_infft_plan.f_hat_iter[k]; /** finalise the plans and free the variables */ solver_finalize_complex(&my_infft_plan); nfft_finalize(&my_nfft_plan); nfft_free(x); nfft_free(w); return EXIT_SUCCESS; }
/** * reconstruct */ static void reconstruct(char* filename,int N,int M,int iteration, int weight) { int j,k,l; /* some variables */ nnfft_plan my_plan; /* plan for the two dimensional nfft */ solver_plan_complex my_iplan; /* plan for the two dimensional infft */ FILE* fin; /* input file */ FILE* finh; FILE* ftime; FILE* fout_real; /* output file */ FILE* fout_imag; /* output file */ int my_N[3],my_n[3]; /* to init the nfft */ double t0, t1; double t,epsilon=0.0000003; /* epsilon is a the break criterium for the iteration */ unsigned infft_flags = CGNR | PRECOMPUTE_DAMP; /* flags for the infft*/ double time,min_time,max_time,min_inh,max_inh; double real,imag; double *w; double Ts; double W; int N3; int m=2; double sigma = 1.25; w = (double*)nfft_malloc(N*N*sizeof(double)); ftime=fopen("readout_time.dat","r"); finh=fopen("inh.dat","r"); min_time=INT_MAX; max_time=INT_MIN; for(j=0;j<M;j++) { fscanf(ftime,"%le ",&time); if(time<min_time) min_time = time; if(time>max_time) max_time = time; } fclose(ftime); Ts=(min_time+max_time)/2.0; min_inh=INT_MAX; max_inh=INT_MIN; for(j=0;j<N*N;j++) { fscanf(finh,"%le ",&w[j]); if(w[j]<min_inh) min_inh = w[j]; if(w[j]>max_inh) max_inh = w[j]; } fclose(finh); N3=ceil((MAX(fabs(min_inh),fabs(max_inh))*(max_time-min_time)/2.0)*4); W=MAX(fabs(min_inh),fabs(max_inh))*2.0; fprintf(stderr,"3: %i %e %e %e %e %e %e\n",N3,W,min_inh,max_inh,min_time,max_time,Ts); /* initialise my_plan */ my_N[0]=N;my_n[0]=ceil(N*sigma); my_N[1]=N; my_n[1]=ceil(N*sigma); my_N[2]=N3; my_n[2]=ceil(N3*sigma); nnfft_init_guru(&my_plan, 3, N*N, M, my_N,my_n,m, PRE_PSI| PRE_PHI_HUT| MALLOC_X| MALLOC_V| MALLOC_F_HAT| MALLOC_F ); /* precompute lin psi if set */ if(my_plan.nnfft_flags & PRE_LIN_PSI) nnfft_precompute_lin_psi(&my_plan); /* set the flags for the infft*/ if (weight) infft_flags = infft_flags | PRECOMPUTE_WEIGHT; /* initialise my_iplan, advanced */ solver_init_advanced_complex(&my_iplan,(nfft_mv_plan_complex*)(&my_plan), infft_flags ); /* get the weights */ if(my_iplan.flags & PRECOMPUTE_WEIGHT) { fin=fopen("weights.dat","r"); for(j=0;j<my_plan.M_total;j++) { fscanf(fin,"%le ",&my_iplan.w[j]); } fclose(fin); } /* get the damping factors */ if(my_iplan.flags & PRECOMPUTE_DAMP) { for(j=0;j<N;j++){ for(k=0;k<N;k++) { int j2= j-N/2; int k2= k-N/2; double r=sqrt(j2*j2+k2*k2); if(r>(double) N/2) my_iplan.w_hat[j*N+k]=0.0; else my_iplan.w_hat[j*N+k]=1.0; } } } /* open the input file */ fin=fopen(filename,"r"); ftime=fopen("readout_time.dat","r"); for(j=0;j<my_plan.M_total;j++) { fscanf(fin,"%le %le %le %le ",&my_plan.x[3*j+0],&my_plan.x[3*j+1],&real,&imag); my_iplan.y[j]=real+ _Complex_I*imag; fscanf(ftime,"%le ",&my_plan.x[3*j+2]); my_plan.x[3*j+2] = (my_plan.x[3*j+2]-Ts)*W/N3; } for(j=0;j<N;j++) { for(l=0;l<N;l++) { my_plan.v[3*(N*j+l)+0]=(((double) j) -(((double) N)/2.0))/((double) N); my_plan.v[3*(N*j+l)+1]=(((double) l) -(((double) N)/2.0))/((double) N); my_plan.v[3*(N*j+l)+2] = w[N*j+l]/W ; } } /* precompute psi */ if(my_plan.nnfft_flags & PRE_PSI) { nnfft_precompute_psi(&my_plan); if(my_plan.nnfft_flags & PRE_FULL_PSI) nnfft_precompute_full_psi(&my_plan); } if(my_plan.nnfft_flags & PRE_PHI_HUT) nnfft_precompute_phi_hut(&my_plan); /* init some guess */ for(k=0;k<my_plan.N_total;k++) { my_iplan.f_hat_iter[k]=0.0; } t0 = nfft_clock_gettime_seconds(); /* inverse trafo */ solver_before_loop_complex(&my_iplan); for(l=0;l<iteration;l++) { /* break if dot_r_iter is smaller than epsilon*/ if(my_iplan.dot_r_iter<epsilon) break; fprintf(stderr,"%e, %i of %i\n",sqrt(my_iplan.dot_r_iter), l+1,iteration); solver_loop_one_step_complex(&my_iplan); } t1 = nfft_clock_gettime_seconds(); t = t1-t0; fout_real=fopen("output_real.dat","w"); fout_imag=fopen("output_imag.dat","w"); for(k=0;k<my_plan.N_total;k++) { my_iplan.f_hat_iter[k]*=cexp(2.0*_Complex_I*M_PI*Ts*w[k]); fprintf(fout_real,"%le ", creal(my_iplan.f_hat_iter[k])); fprintf(fout_imag,"%le ", cimag(my_iplan.f_hat_iter[k])); } fclose(fout_real); fclose(fout_imag); /* finalize the infft */ solver_finalize_complex(&my_iplan); /* finalize the nfft */ nnfft_finalize(&my_plan); nfft_free(w); }
/** more memory usage, a bit faster */ void nfst_full_psi(nfst_plan *ths, double eps) { int t, i; /**< index over all dimensions */ int j; /**< index over all nodes */ int l_L; /**< plain index 0<=l_L<lprod */ int lc[ths->d]; /**< multi index 0<=lj<u+o+1 */ int lg_plain[ths->d+1]; /**< postfix plain index */ int count_lg[ths->d]; int lg_offset[ths->d]; int lg[ths->d]; int lprod; /**< 'bandwidth' of matrix B */ int lb[ths->d]; /**< depends on x_j */ double phi_tilde[ths->d+1]; int *index_g, *index_f; double *new_psi; int ix, ix_old, size_psi; phi_tilde[0] = 1.0; lg_plain[0] = 0; if(ths->nfst_flags & PRE_PSI) { size_psi = ths->M_total; index_f = (int*)nfft_malloc( ths->M_total * sizeof( int)); index_g = (int*)nfft_malloc( size_psi * sizeof( int)); new_psi = (double*)nfft_malloc( size_psi * sizeof( double)); for( t = 0,lprod = 1; t < ths->d; t++) { lprod *= NFST_SUMMANDS; eps *= PHI( 0, t); } for( ix = 0, ix_old = 0, j = 0; j < ths->M_total; j++) { MACRO_init_lb_lg_lc_phi_tilde_lg_plain( with_PRE_PSI); for( l_L = 0; l_L < lprod; l_L++) { MACRO_update__phi_tilde__lg_plain( with_PRE_PSI); if( fabs(phi_tilde[ths->d]) > eps) { index_g[ix] = lg_plain[ths->d]; new_psi[ix] = phi_tilde[ths->d]; ix++; if( ix == size_psi) { size_psi += ths->M_total; index_g = (int*)realloc( index_g, size_psi * sizeof( int)); new_psi = (double*)realloc( new_psi, size_psi * sizeof( double)); } } MACRO_count__lg_lc; } /* for(l_L) */ index_f[j] = ix - ix_old; ix_old = ix; } /* for(j) */ nfft_free( ths->psi); size_psi = ix; ths->size_psi = size_psi; index_g = (int*)realloc( index_g, size_psi * sizeof( int)); new_psi = (double*)realloc( new_psi, size_psi * sizeof( double)); ths->psi = new_psi; ths->psi_index_g = index_g; ths->psi_index_f = index_f; } /* if(PRE_PSI) */ } /* nfst_full_psi */
void nfft_benchomp_createdataset(unsigned int d, unsigned int trafo_adjoint, int *N, int M, double sigma) { int n[d]; int t, j; R *x; C *f, *f_hat; int N_total = 1; for (t = 0; t < d; t++) N_total *= N[t]; x = (R*) nfft_malloc(d*M*sizeof(R)); f = (C*) nfft_malloc(M*sizeof(C)); f_hat = (C*) nfft_malloc(N_total*sizeof(C)); for (t=0; t<d; t++) n[t] = sigma*nfft_next_power_of_2(N[t]); /** init pseudo random nodes */ nfft_vrand_shifted_unit_double(x,d*M); if (trafo_adjoint==0) { nfft_vrand_unit_complex(f_hat,N_total); } else { nfft_vrand_unit_complex(f,M); } printf("%d %d ", d, trafo_adjoint); for (t=0; t<d; t++) printf("%d ", N[t]); for (t=0; t<d; t++) printf("%d ", n[t]); printf("%d\n", M); for (j=0; j < M; j++) { for (t=0; t < d; t++) printf("%.16e ", x[d*j+t]); printf("\n"); } if (trafo_adjoint==0) { for (j=0; j < N_total; j++) printf("%.16e %.16e\n", creal(f_hat[j]), cimag(f_hat[j])); } else { for (j=0; j < M; j++) printf("%.16e %.16e\n", creal(f[j]), cimag(f[j])); } nfft_free(x); nfft_free(f); nfft_free(f_hat); }
void nfst_finalize( nfst_plan *ths) { int t; /* index over dimensions */ if( ths->nfst_flags & FFTW_INIT) { fftw_destroy_plan( ths->my_fftw_r2r_plan); if( ths->nfst_flags & FFT_OUT_OF_PLACE) nfft_free( ths->g2); nfft_free( ths->g1); } /* NO FFTW_FREE HERE */ if( ths->nfst_flags & PRE_PSI) { if( ths->nfst_flags & PRE_FULL_PSI) { nfft_free( ths->psi_index_g); nfft_free( ths->psi_index_f); } nfft_free( ths->psi); } if( ths->nfst_flags & PRE_PHI_HUT) { for( t = 0; t < ths->d; t++) nfft_free( ths->c_phi_inv[t]); nfft_free( ths->c_phi_inv); } if( ths->nfst_flags & MALLOC_F) nfft_free( ths->f); if( ths->nfst_flags & MALLOC_F_HAT) nfft_free( ths->f_hat); if( ths->nfst_flags & MALLOC_X) nfft_free( ths->x); WINDOW_HELP_FINALIZE; nfft_free( ths->N); nfft_free( ths->n); nfft_free( ths->sigma); nfft_free(ths->r2r_kind); } /* nfst_finalize */
/** * The main program. * * \param argc The number of arguments * \param argv An array containing the arguments as C-strings * * \return Exit code * * \author Jens Keiner */ int main (int argc, char **argv) { double **p; /* The array containing the parameter sets * * for the kernel functions */ int *m; /* The array containing the cut-off degrees M */ int **ld; /* The array containing the numbers of source * * and target nodes, L and D */ int ip; /* Index variable for p */ int im; /* Index variable for m */ int ild; /* Index variable for l */ int ipp; /* Index for kernel parameters */ int ip_max; /* The maximum index for p */ int im_max; /* The maximum index for m */ int ild_max; /* The maximum index for l */ int ipp_max; /* The maximum index for ip */ int tc_max; /* The number of testcases */ int m_max; /* The maximum cut-off degree M for the * * current dataset */ int l_max; /* The maximum number of source nodes L for * * the current dataset */ int d_max; /* The maximum number of target nodes D for * * the current dataset */ long ld_max_prec; /* The maximum number of source and target * * nodes for precomputation multiplied */ long l_max_prec; /* The maximum number of source nodes for * * precomputation */ int tc; /* Index variable for testcases */ int kt; /* The kernel function */ int cutoff; /* The current NFFT cut-off parameter */ double threshold; /* The current NFSFT threshold parameter */ double t_d; /* Time for direct algorithm in seconds */ double t_dp; /* Time for direct algorithm with * precomputation in seconds */ double t_fd; /* Time for fast direct algorithm in seconds */ double t_f; /* Time for fast algorithm in seconds */ double temp; /* */ double err_f; /* Error E_infty for fast algorithm */ double err_fd; /* Error E_\infty for fast direct algorithm */ ticks t0, t1; /* */ int precompute = NO; /* */ fftw_complex *ptr; /* */ double* steed; /* */ fftw_complex *b; /* The weights (b_l)_{l=0}^{L-1} */ fftw_complex *f_hat; /* The spherical Fourier coefficients */ fftw_complex *a; /* The Fourier-Legendre coefficients */ double *xi; /* Target nodes */ double *eta; /* Source nodes */ fftw_complex *f_m; /* Approximate function values */ fftw_complex *f; /* Exact function values */ fftw_complex *prec = NULL; /* */ nfsft_plan plan; /* NFSFT plan */ nfsft_plan plan_adjoint; /* adjoint NFSFT plan */ int i; /* */ int k; /* */ int n; /* */ int d; /* */ int l; /* */ int use_nfsft; /* */ int use_nfft; /* */ int use_fpt; /* */ int rinc; /* */ double constant; /* */ /* Read the number of testcases. */ fscanf(stdin,"testcases=%d\n",&tc_max); fprintf(stdout,"%d\n",tc_max); /* Process each testcase. */ for (tc = 0; tc < tc_max; tc++) { /* Check if the fast transform shall be used. */ fscanf(stdin,"nfsft=%d\n",&use_nfsft); fprintf(stdout,"%d\n",use_nfsft); if (use_nfsft != NO) { /* Check if the NFFT shall be used. */ fscanf(stdin,"nfft=%d\n",&use_nfft); fprintf(stdout,"%d\n",use_nfft); if (use_nfft != NO) { /* Read the cut-off parameter. */ fscanf(stdin,"cutoff=%d\n",&cutoff); fprintf(stdout,"%d\n",cutoff); } else { /* TODO remove this */ /* Initialize unused variable with dummy value. */ cutoff = 1; } /* Check if the fast polynomial transform shall be used. */ fscanf(stdin,"fpt=%d\n",&use_fpt); fprintf(stdout,"%d\n",use_fpt); /* Read the NFSFT threshold parameter. */ fscanf(stdin,"threshold=%lf\n",&threshold); fprintf(stdout,"%lf\n",threshold); } else { /* TODO remove this */ /* Set dummy values. */ cutoff = 3; threshold = 1000000000000.0; } /* Initialize bandwidth bound. */ m_max = 0; /* Initialize source nodes bound. */ l_max = 0; /* Initialize target nodes bound. */ d_max = 0; /* Initialize source nodes bound for precomputation. */ l_max_prec = 0; /* Initialize source and target nodes bound for precomputation. */ ld_max_prec = 0; /* Read the kernel type. This is one of KT_ABEL_POISSON, KT_SINGULARITY, * KT_LOC_SUPP and KT_GAUSSIAN. */ fscanf(stdin,"kernel=%d\n",&kt); fprintf(stdout,"%d\n",kt); /* Read the number of parameter sets. */ fscanf(stdin,"parameter_sets=%d\n",&ip_max); fprintf(stdout,"%d\n",ip_max); /* Allocate memory for pointers to parameter sets. */ p = (double**) nfft_malloc(ip_max*sizeof(double*)); /* We now read in the parameter sets. */ /* Read number of parameters. */ fscanf(stdin,"parameters=%d\n",&ipp_max); fprintf(stdout,"%d\n",ipp_max); for (ip = 0; ip < ip_max; ip++) { /* Allocate memory for the parameters. */ p[ip] = (double*) nfft_malloc(ipp_max*sizeof(double)); /* Read the parameters. */ for (ipp = 0; ipp < ipp_max; ipp++) { /* Read the next parameter. */ fscanf(stdin,"%lf\n",&p[ip][ipp]); fprintf(stdout,"%lf\n",p[ip][ipp]); } } /* Read the number of cut-off degrees. */ fscanf(stdin,"bandwidths=%d\n",&im_max); fprintf(stdout,"%d\n",im_max); m = (int*) nfft_malloc(im_max*sizeof(int)); /* Read the cut-off degrees. */ for (im = 0; im < im_max; im++) { /* Read cut-off degree. */ fscanf(stdin,"%d\n",&m[im]); fprintf(stdout,"%d\n",m[im]); m_max = MAX(m_max,m[im]); } /* Read number of node specifications. */ fscanf(stdin,"node_sets=%d\n",&ild_max); fprintf(stdout,"%d\n",ild_max); ld = (int**) nfft_malloc(ild_max*sizeof(int*)); /* Read the run specification. */ for (ild = 0; ild < ild_max; ild++) { /* Allocate memory for the run parameters. */ ld[ild] = (int*) nfft_malloc(5*sizeof(int)); /* Read number of source nodes. */ fscanf(stdin,"L=%d ",&ld[ild][0]); fprintf(stdout,"%d\n",ld[ild][0]); l_max = MAX(l_max,ld[ild][0]); /* Read number of target nodes. */ fscanf(stdin,"D=%d ",&ld[ild][1]); fprintf(stdout,"%d\n",ld[ild][1]); d_max = MAX(d_max,ld[ild][1]); /* Determine whether direct and fast algorithm shall be compared. */ fscanf(stdin,"compare=%d ",&ld[ild][2]); fprintf(stdout,"%d\n",ld[ild][2]); /* Check if precomputation for the direct algorithm is used. */ if (ld[ild][2] == YES) { /* Read whether the precomputed version shall also be used. */ fscanf(stdin,"precomputed=%d\n",&ld[ild][3]); fprintf(stdout,"%d\n",ld[ild][3]); /* Read the number of repetitions over which measurements are * averaged. */ fscanf(stdin,"repetitions=%d\n",&ld[ild][4]); fprintf(stdout,"%d\n",ld[ild][4]); /* Update ld_max_prec and l_max_prec. */ if (ld[ild][3] == YES) { /* Update ld_max_prec. */ ld_max_prec = MAX(ld_max_prec,ld[ild][0]*ld[ild][1]); /* Update l_max_prec. */ l_max_prec = MAX(l_max_prec,ld[ild][0]); /* Turn on the precomputation for the direct algorithm. */ precompute = YES; } } else { /* Set default value for the number of repetitions. */ ld[ild][4] = 1; } } /* Allocate memory for data structures. */ b = (fftw_complex*) nfft_malloc(l_max*sizeof(fftw_complex)); eta = (double*) nfft_malloc(2*l_max*sizeof(double)); f_hat = (fftw_complex*) nfft_malloc(NFSFT_F_HAT_SIZE(m_max)*sizeof(fftw_complex)); a = (fftw_complex*) nfft_malloc((m_max+1)*sizeof(fftw_complex)); xi = (double*) nfft_malloc(2*d_max*sizeof(double)); f_m = (fftw_complex*) nfft_malloc(d_max*sizeof(fftw_complex)); f = (fftw_complex*) nfft_malloc(d_max*sizeof(fftw_complex)); /* Allocate memory for precomputed data. */ if (precompute == YES) { prec = (fftw_complex*) nfft_malloc(ld_max_prec*sizeof(fftw_complex)); } /* Generate random source nodes and weights. */ for (l = 0; l < l_max; l++) { b[l] = (((double)rand())/RAND_MAX) - 0.5; eta[2*l] = (((double)rand())/RAND_MAX) - 0.5; eta[2*l+1] = acos(2.0*(((double)rand())/RAND_MAX) - 1.0)/(K2PI); } /* Generate random target nodes. */ for (d = 0; d < d_max; d++) { xi[2*d] = (((double)rand())/RAND_MAX) - 0.5; xi[2*d+1] = acos(2.0*(((double)rand())/RAND_MAX) - 1.0)/(K2PI); } /* Do precomputation. */ nfsft_precompute(m_max,threshold, ((use_nfsft==NO)?(NFSFT_NO_FAST_ALGORITHM):(0U/*NFSFT_NO_DIRECT_ALGORITHM*/)), 0U); /* Process all parameter sets. */ for (ip = 0; ip < ip_max; ip++) { /* Compute kernel coeffcients up to the maximum cut-off degree m_max. */ switch (kt) { case KT_ABEL_POISSON: /* Compute Fourier-Legendre coefficients for the Poisson kernel. */ for (k = 0; k <= m_max; k++) a[k] = SYMBOL_ABEL_POISSON(k,p[ip][0]); break; case KT_SINGULARITY: /* Compute Fourier-Legendre coefficients for the singularity * kernel. */ for (k = 0; k <= m_max; k++) a[k] = SYMBOL_SINGULARITY(k,p[ip][0]); break; case KT_LOC_SUPP: /* Compute Fourier-Legendre coefficients for the locally supported * kernel. */ a[0] = 1.0; if (1 <= m_max) a[1] = ((p[ip][1]+1+p[ip][0])/(p[ip][1]+2.0))*a[0]; for (k = 2; k <= m_max; k++) a[k] = (1.0/(k+p[ip][1]+1))*((2*k-1)*p[ip][0]*a[k-1] - (k-p[ip][1]-2)*a[k-2]); break; case KT_GAUSSIAN: /* Fourier-Legendre coefficients */ steed = (double*) nfft_malloc((m_max+1)*sizeof(double)); smbi(2.0*p[ip][0],0.5,m_max+1,2,steed); for (k = 0; k <= m_max; k++) a[k] = K2PI*(sqrt(KPI/p[ip][0]))*steed[k]; nfft_free(steed); break; } /* Normalize Fourier-Legendre coefficients. */ for (k = 0; k <= m_max; k++) a[k] *= (2*k+1)/(K4PI); /* Process all node sets. */ for (ild = 0; ild < ild_max; ild++) { /* Check if the fast algorithm shall be used. */ if (ld[ild][2] != NO) { /* Check if the direct algorithm with precomputation should be * tested. */ if (ld[ild][3] != NO) { /* Get pointer to start of data. */ ptr = prec; /* Calculate increment from one row to the next. */ rinc = l_max_prec-ld[ild][0]; /* Process al target nodes. */ for (d = 0; d < ld[ild][1]; d++) { /* Process all source nodes. */ for (l = 0; l < ld[ild][0]; l++) { /* Compute inner product between current source and target * node. */ temp = innerProduct(eta[2*l],eta[2*l+1],xi[2*d],xi[2*d+1]); /* Switch by the kernel type. */ switch (kt) { case KT_ABEL_POISSON: /* Evaluate the Poisson kernel for the current value. */ *ptr++ = poissonKernel(temp,p[ip][0]); break; case KT_SINGULARITY: /* Evaluate the singularity kernel for the current * value. */ *ptr++ = singularityKernel(temp,p[ip][0]); break; case KT_LOC_SUPP: /* Evaluate the localized kernel for the current * value. */ *ptr++ = locallySupportedKernel(temp,p[ip][0],p[ip][1]); break; case KT_GAUSSIAN: /* Evaluate the spherical Gaussian kernel for the current * value. */ *ptr++ = gaussianKernel(temp,p[ip][0]); break; } } /* Increment pointer for next row. */ ptr += rinc; } /* Initialize cumulative time variable. */ t_dp = 0.0; /* Initialize time measurement. */ t0 = getticks(); /* Cycle through all runs. */ for (i = 0; i < ld[ild][4]; i++) { /* Reset pointer to start of precomputed data. */ ptr = prec; /* Calculate increment from one row to the next. */ rinc = l_max_prec-ld[ild][0]; /* Check if the localized kernel is used. */ if (kt == KT_LOC_SUPP) { /* Perform final summation */ /* Calculate the multiplicative constant. */ constant = ((p[ip][1]+1)/(K2PI*pow(1-p[ip][0],p[ip][1]+1))); /* Process all target nodes. */ for (d = 0; d < ld[ild][1]; d++) { /* Initialize function value. */ f[d] = 0.0; /* Process all source nodes. */ for (l = 0; l < ld[ild][0]; l++) f[d] += b[l]*(*ptr++); /* Multiply with the constant. */ f[d] *= constant; /* Proceed to next row. */ ptr += rinc; } } else { /* Process all target nodes. */ for (d = 0; d < ld[ild][1]; d++) { /* Initialize function value. */ f[d] = 0.0; /* Process all source nodes. */ for (l = 0; l < ld[ild][0]; l++) f[d] += b[l]*(*ptr++); /* Proceed to next row. */ ptr += rinc; } } } /* Calculate the time needed. */ t1 = getticks(); t_dp = nfft_elapsed_seconds(t1,t0); /* Calculate average time needed. */ t_dp = t_dp/((double)ld[ild][4]); } else { /* Initialize cumulative time variable with dummy value. */ t_dp = -1.0; } /* Initialize cumulative time variable. */ t_d = 0.0; /* Initialize time measurement. */ t0 = getticks(); /* Cycle through all runs. */ for (i = 0; i < ld[ild][4]; i++) { /* Switch by the kernel type. */ switch (kt) { case KT_ABEL_POISSON: /* Process all target nodes. */ for (d = 0; d < ld[ild][1]; d++) { /* Initialize function value. */ f[d] = 0.0; /* Process all source nodes. */ for (l = 0; l < ld[ild][0]; l++) { /* Compute the inner product for the current source and * target nodes. */ temp = innerProduct(eta[2*l],eta[2*l+1],xi[2*d],xi[2*d+1]); /* Evaluate the Poisson kernel for the current value and add * to the result. */ f[d] += b[l]*poissonKernel(temp,p[ip][0]); } } break; case KT_SINGULARITY: /* Process all target nodes. */ for (d = 0; d < ld[ild][1]; d++) { /* Initialize function value. */ f[d] = 0.0; /* Process all source nodes. */ for (l = 0; l < ld[ild][0]; l++) { /* Compute the inner product for the current source and * target nodes. */ temp = innerProduct(eta[2*l],eta[2*l+1],xi[2*d],xi[2*d+1]); /* Evaluate the Poisson kernel for the current value and add * to the result. */ f[d] += b[l]*singularityKernel(temp,p[ip][0]); } } break; case KT_LOC_SUPP: /* Calculate the multiplicative constant. */ constant = ((p[ip][1]+1)/(K2PI*pow(1-p[ip][0],p[ip][1]+1))); /* Process all target nodes. */ for (d = 0; d < ld[ild][1]; d++) { /* Initialize function value. */ f[d] = 0.0; /* Process all source nodes. */ for (l = 0; l < ld[ild][0]; l++) { /* Compute the inner product for the current source and * target nodes. */ temp = innerProduct(eta[2*l],eta[2*l+1],xi[2*d],xi[2*d+1]); /* Evaluate the Poisson kernel for the current value and add * to the result. */ f[d] += b[l]*locallySupportedKernel(temp,p[ip][0],p[ip][1]); } /* Multiply result with constant. */ f[d] *= constant; } break; case KT_GAUSSIAN: /* Process all target nodes. */ for (d = 0; d < ld[ild][1]; d++) { /* Initialize function value. */ f[d] = 0.0; /* Process all source nodes. */ for (l = 0; l < ld[ild][0]; l++) { /* Compute the inner product for the current source and * target nodes. */ temp = innerProduct(eta[2*l],eta[2*l+1],xi[2*d],xi[2*d+1]); /* Evaluate the Poisson kernel for the current value and add * to the result. */ f[d] += b[l]*gaussianKernel(temp,p[ip][0]); } } break; } } /* Calculate and add the time needed. */ t1 = getticks(); t_d = nfft_elapsed_seconds(t1,t0); /* Calculate average time needed. */ t_d = t_d/((double)ld[ild][4]); } else { /* Initialize cumulative time variable with dummy value. */ t_d = -1.0; t_dp = -1.0; } /* Initialize error and cumulative time variables for the fast * algorithm. */ err_fd = -1.0; err_f = -1.0; t_fd = -1.0; t_f = -1.0; /* Process all cut-off bandwidths. */ for (im = 0; im < im_max; im++) { /* Init transform plans. */ nfsft_init_guru(&plan_adjoint, m[im],ld[ild][0], ((use_nfft!=0)?(0U):(NFSFT_USE_NDFT)) | ((use_fpt!=0)?(0U):(NFSFT_USE_DPT)), PRE_PHI_HUT | PRE_PSI | FFTW_INIT | FFT_OUT_OF_PLACE, cutoff); nfsft_init_guru(&plan,m[im],ld[ild][1], ((use_nfft!=0)?(0U):(NFSFT_USE_NDFT)) | ((use_fpt!=0)?(0U):(NFSFT_USE_DPT)), PRE_PHI_HUT | PRE_PSI | FFTW_INIT | FFT_OUT_OF_PLACE, cutoff); plan_adjoint.f_hat = f_hat; plan_adjoint.x = eta; plan_adjoint.f = b; plan.f_hat = f_hat; plan.x = xi; plan.f = f_m; nfsft_precompute_x(&plan_adjoint); nfsft_precompute_x(&plan); /* Check if direct algorithm shall also be tested. */ if (use_nfsft == BOTH) { /* Initialize cumulative time variable. */ t_fd = 0.0; /* Initialize time measurement. */ t0 = getticks(); /* Cycle through all runs. */ for (i = 0; i < ld[ild][4]; i++) { /* Execute adjoint direct NDSFT transformation. */ nfsft_adjoint_direct(&plan_adjoint); /* Multiplication with the Fourier-Legendre coefficients. */ for (k = 0; k <= m[im]; k++) for (n = -k; n <= k; n++) f_hat[NFSFT_INDEX(k,n,&plan_adjoint)] *= a[k]; /* Execute direct NDSFT transformation. */ nfsft_trafo_direct(&plan); } /* Calculate and add the time needed. */ t1 = getticks(); t_fd = nfft_elapsed_seconds(t1,t0); /* Calculate average time needed. */ t_fd = t_fd/((double)ld[ild][4]); /* Check if error E_infty should be computed. */ if (ld[ild][2] != NO) { /* Compute the error E_infinity. */ err_fd = X(error_l_infty_1_complex)(f, f_m, ld[ild][1], b, ld[ild][0]); } } /* Check if the fast NFSFT algorithm shall also be tested. */ if (use_nfsft != NO) { /* Initialize cumulative time variable for the NFSFT algorithm. */ t_f = 0.0; } else { /* Initialize cumulative time variable for the direct NDSFT * algorithm. */ t_fd = 0.0; } /* Initialize time measurement. */ t0 = getticks(); /* Cycle through all runs. */ for (i = 0; i < ld[ild][4]; i++) { /* Check if the fast NFSFT algorithm shall also be tested. */ if (use_nfsft != NO) { /* Execute the adjoint NFSFT transformation. */ nfsft_adjoint(&plan_adjoint); } else { /* Execute the adjoint direct NDSFT transformation. */ nfsft_adjoint_direct(&plan_adjoint); } /* Multiplication with the Fourier-Legendre coefficients. */ for (k = 0; k <= m[im]; k++) for (n = -k; n <= k; n++) f_hat[NFSFT_INDEX(k,n,&plan_adjoint)] *= a[k]; /* Check if the fast NFSFT algorithm shall also be tested. */ if (use_nfsft != NO) { /* Execute the NFSFT transformation. */ nfsft_trafo(&plan); } else { /* Execute the NDSFT transformation. */ nfsft_trafo_direct(&plan); } } /* Check if the fast NFSFT algorithm has been used. */ t1 = getticks(); if (use_nfsft != NO) t_f = nfft_elapsed_seconds(t1,t0); else t_fd = nfft_elapsed_seconds(t1,t0); /* Check if the fast NFSFT algorithm has been used. */ if (use_nfsft != NO) { /* Calculate average time needed. */ t_f = t_f/((double)ld[ild][4]); } else { /* Calculate average time needed. */ t_fd = t_fd/((double)ld[ild][4]); } /* Check if error E_infty should be computed. */ if (ld[ild][2] != NO) { /* Check if the fast NFSFT algorithm has been used. */ if (use_nfsft != NO) { /* Compute the error E_infinity. */ err_f = X(error_l_infty_1_complex)(f, f_m, ld[ild][1], b, ld[ild][0]); } else { /* Compute the error E_infinity. */ err_fd = X(error_l_infty_1_complex)(f, f_m, ld[ild][1], b, ld[ild][0]); } } /* Print out the error measurements. */ fprintf(stdout,"%e\n%e\n%e\n%e\n%e\n%e\n\n",t_d,t_dp,t_fd,t_f,err_fd, err_f); /* Finalize the NFSFT plans */ nfsft_finalize(&plan_adjoint); nfsft_finalize(&plan); } /* for (im = 0; im < im_max; im++) - Process all cut-off * bandwidths.*/ } /* for (ild = 0; ild < ild_max; ild++) - Process all node sets. */ } /* for (ip = 0; ip < ip_max; ip++) - Process all parameter sets. */ /* Delete precomputed data. */ nfsft_forget(); /* Check if memory for precomputed data of the matrix K has been * allocated. */ if (precompute == YES) { /* Free memory for precomputed matrix K. */ nfft_free(prec); } /* Free data arrays. */ nfft_free(f); nfft_free(f_m); nfft_free(xi); nfft_free(eta); nfft_free(a); nfft_free(f_hat); nfft_free(b); /* Free memory for node sets. */ for (ild = 0; ild < ild_max; ild++) nfft_free(ld[ild]); nfft_free(ld); /* Free memory for cut-off bandwidths. */ nfft_free(m); /* Free memory for parameter sets. */ for (ip = 0; ip < ip_max; ip++) nfft_free(p[ip]); nfft_free(p); } /* for (tc = 0; tc < tc_max; tc++) - Process each testcase. */ /* Return exit code for successful run. */ return EXIT_SUCCESS; }
/** computes the inverse discrete Radon transform of Rf * on the grid given by gridfcn() with T angles and R offsets * by a NFFT-based CG-type algorithm */ int Inverse_Radon_trafo(int (*gridfcn)(), int T, int S, double *Rf, int NN, double *f, int max_i) { int j,k; /**< index for nodes and freqencies */ nfft_plan my_nfft_plan; /**< plan for the nfft-2D */ solver_plan_complex my_infft_plan; /**< plan for the inverse nfft */ fftw_complex *fft; /**< variable for the fftw-1Ds */ fftw_plan my_fftw_plan; /**< plan for the fftw-1Ds */ int t,r; /**< index for directions and offsets */ double *x, *w; /**< knots and associated weights */ int l; /**< index for iterations */ int N[2],n[2]; int M=T*S; N[0]=NN; n[0]=2*N[0]; N[1]=NN; n[1]=2*N[1]; fft = (fftw_complex *)nfft_malloc(S*sizeof(fftw_complex)); my_fftw_plan = fftw_plan_dft_1d(S,fft,fft,FFTW_FORWARD,FFTW_MEASURE); x = (double *)nfft_malloc(2*T*S*(sizeof(double))); if (x==NULL) return -1; w = (double *)nfft_malloc(T*S*(sizeof(double))); if (w==NULL) return -1; /** init two dimensional NFFT plan */ nfft_init_guru(&my_nfft_plan, 2, N, M, n, 4, PRE_PHI_HUT| PRE_PSI| MALLOC_X | MALLOC_F_HAT| MALLOC_F| FFTW_INIT | FFT_OUT_OF_PLACE, FFTW_MEASURE| FFTW_DESTROY_INPUT); /** init two dimensional infft plan */ solver_init_advanced_complex(&my_infft_plan,(nfft_mv_plan_complex*)(&my_nfft_plan), CGNR | PRECOMPUTE_WEIGHT); /** init nodes and weights of grid*/ gridfcn(T,S,x,w); for(j=0;j<my_nfft_plan.M_total;j++) { my_nfft_plan.x[2*j+0] = x[2*j+0]; my_nfft_plan.x[2*j+1] = x[2*j+1]; if (j%S) my_infft_plan.w[j] = w[j]; else my_infft_plan.w[j] = 0.0; } /** precompute psi, the entries of the matrix B */ if(my_nfft_plan.nfft_flags & PRE_LIN_PSI) nfft_precompute_lin_psi(&my_nfft_plan); if(my_nfft_plan.nfft_flags & PRE_PSI) nfft_precompute_psi(&my_nfft_plan); if(my_nfft_plan.nfft_flags & PRE_FULL_PSI) nfft_precompute_full_psi(&my_nfft_plan); /** compute 1D-ffts and init given samples and weights */ for(t=0; t<T; t++) { /* for(r=0; r<R/2; r++) fft[r] = cexp(I*KPI*r)*Rf[t*R+(r+R/2)]; for(r=0; r<R/2; r++) fft[r+R/2] = cexp(I*KPI*r)*Rf[t*R+r]; */ for(r=0; r<S; r++) fft[r] = Rf[t*S+r] + _Complex_I*0.0; nfft_fftshift_complex(fft, 1, &S); fftw_execute(my_fftw_plan); nfft_fftshift_complex(fft, 1, &S); my_infft_plan.y[t*S] = 0.0; for(r=-S/2+1; r<S/2; r++) my_infft_plan.y[t*S+(r+S/2)] = fft[r+S/2]/KERNEL(r); } /** initialise some guess f_hat_0 */ for(k=0;k<my_nfft_plan.N_total;k++) my_infft_plan.f_hat_iter[k] = 0.0 + _Complex_I*0.0; /** solve the system */ solver_before_loop_complex(&my_infft_plan); if (max_i<1) { l=1; for(k=0;k<my_nfft_plan.N_total;k++) my_infft_plan.f_hat_iter[k] = my_infft_plan.p_hat_iter[k]; } else { for(l=1;l<=max_i;l++) { solver_loop_one_step_complex(&my_infft_plan); /*if (sqrt(my_infft_plan.dot_r_iter)<=1e-12) break;*/ } } /*printf("after %d iteration(s): weighted 2-norm of original residual vector = %g\n",l-1,sqrt(my_infft_plan.dot_r_iter));*/ /** copy result */ for(k=0;k<my_nfft_plan.N_total;k++) f[k] = creal(my_infft_plan.f_hat_iter[k]); /** finalise the plans and free the variables */ fftw_destroy_plan(my_fftw_plan); nfft_free(fft); solver_finalize_complex(&my_infft_plan); nfft_finalize(&my_nfft_plan); nfft_free(x); nfft_free(w); return 0; }
/** * The main program. * * \param argc The number of arguments * \param argv An array containing the arguments as C-strings * * \return Exit code */ int main (int argc, char **argv) { int tc; /**< The index variable for testcases */ int tc_max; /**< The number of testcases */ int *NQ; /**< The array containing the cut-off degrees * \f$N\f$ */ int NQ_max; /**< The maximum cut-off degree \f$N\f$ for the* current testcase */ int *SQ; /**< The array containing the grid size parameters */ int SQ_max; /**< The maximum grid size parameter */ int *RQ; /**< The array containing the grid size parameters */ int iNQ; /**< Index variable for cut-off degrees */ int iNQ_max; /**< The maximum number of cut-off degrees */ int testfunction; /**< The testfunction */ int N; /**< The test function's bandwidth */ int use_nfsft; /**< Whether to use the NFSFT algorithm or not */ int use_nfft; /**< Whether to use the NFFT algorithm or not */ int use_fpt; /**< Whether to use the FPT algorithm or not */ int cutoff; /**< The current NFFT cut-off parameter */ double threshold; /**< The current NFSFT threshold parameter */ int gridtype; /**< The type of quadrature grid to be used */ int repetitions; /**< The number of repetitions to be performed */ int mode; /**< The number of repetitions to be performed */ double *w; /**< The quadrature weights */ double *x_grid; /**< The quadrature nodes */ double *x_compare; /**< The quadrature nodes */ double _Complex *f_grid; /**< The reference function values */ double _Complex *f_compare; /**< The function values */ double _Complex *f; /**< The function values */ double _Complex *f_hat_gen; /**< The reference spherical Fourier * coefficients */ double _Complex *f_hat; /**< The spherical Fourier coefficients */ nfsft_plan plan_adjoint; /**< The NFSFT plan */ nfsft_plan plan; /**< The NFSFT plan */ nfsft_plan plan_gen; /**< The NFSFT plan */ double t_avg; /**< The average computation time needed */ double err_infty_avg; /**< The average error \f$E_\infty\f$ */ double err_2_avg; /**< The average error \f$E_2\f$ */ int i; /**< A loop variable */ int k; /**< A loop variable */ int n; /**< A loop variable */ int d; /**< A loop variable */ int m_theta; /**< The current number of different * colatitudinal angles (for grids) */ int m_phi; /**< The current number of different * longitudinal angles (for grids). */ int m_total; /**< The total number nodes. */ double *theta; /**< An array for saving the angles theta of a * grid */ double *phi; /**< An array for saving the angles phi of a * grid */ fftw_plan fplan; /**< An FFTW plan for computing Clenshaw-Curtis quadrature weights */ //int nside; /**< The size parameter for the HEALPix grid */ int d2; int M; double theta_s; double x1,x2,x3,temp; int m_compare; nfsft_plan *plan_adjoint_ptr; nfsft_plan *plan_ptr; double *w_temp; int testmode; ticks t0, t1; /* Read the number of testcases. */ fscanf(stdin,"testcases=%d\n",&tc_max); fprintf(stdout,"%d\n",tc_max); /* Process each testcase. */ for (tc = 0; tc < tc_max; tc++) { /* Check if the fast transform shall be used. */ fscanf(stdin,"nfsft=%d\n",&use_nfsft); fprintf(stdout,"%d\n",use_nfsft); if (use_nfsft != NO) { /* Check if the NFFT shall be used. */ fscanf(stdin,"nfft=%d\n",&use_nfft); fprintf(stdout,"%d\n",use_nfsft); if (use_nfft != NO) { /* Read the cut-off parameter. */ fscanf(stdin,"cutoff=%d\n",&cutoff); fprintf(stdout,"%d\n",cutoff); } else { /* TODO remove this */ /* Initialize unused variable with dummy value. */ cutoff = 1; } /* Check if the fast polynomial transform shall be used. */ fscanf(stdin,"fpt=%d\n",&use_fpt); fprintf(stdout,"%d\n",use_fpt); if (use_fpt != NO) { /* Read the NFSFT threshold parameter. */ fscanf(stdin,"threshold=%lf\n",&threshold); fprintf(stdout,"%lf\n",threshold); } else { /* TODO remove this */ /* Initialize unused variable with dummy value. */ threshold = 1000.0; } } else { /* TODO remove this */ /* Set dummy values. */ use_nfft = NO; use_fpt = NO; cutoff = 3; threshold = 1000.0; } /* Read the testmode type. */ fscanf(stdin,"testmode=%d\n",&testmode); fprintf(stdout,"%d\n",testmode); if (testmode == ERROR) { /* Read the quadrature grid type. */ fscanf(stdin,"gridtype=%d\n",&gridtype); fprintf(stdout,"%d\n",gridtype); /* Read the test function. */ fscanf(stdin,"testfunction=%d\n",&testfunction); fprintf(stdout,"%d\n",testfunction); /* Check if random bandlimited function has been chosen. */ if (testfunction == FUNCTION_RANDOM_BANDLIMITED) { /* Read the bandwidht. */ fscanf(stdin,"bandlimit=%d\n",&N); fprintf(stdout,"%d\n",N); } else { N = 1; } /* Read the number of repetitions. */ fscanf(stdin,"repetitions=%d\n",&repetitions); fprintf(stdout,"%d\n",repetitions); fscanf(stdin,"mode=%d\n",&mode); fprintf(stdout,"%d\n",mode); if (mode == RANDOM) { /* Read the bandwidht. */ fscanf(stdin,"points=%d\n",&m_compare); fprintf(stdout,"%d\n",m_compare); x_compare = (double*) nfft_malloc(2*m_compare*sizeof(double)); d = 0; while (d < m_compare) { x1 = 2.0*(((double)rand())/RAND_MAX) - 1.0; x2 = 2.0*(((double)rand())/RAND_MAX) - 1.0; x3 = 2.0*(((double)rand())/RAND_MAX) - 1.0; temp = sqrt(x1*x1+x2*x2+x3*x3); if (temp <= 1) { x_compare[2*d+1] = acos(x3); if (x_compare[2*d+1] == 0 || x_compare[2*d+1] == KPI) { x_compare[2*d] = 0.0; } else { x_compare[2*d] = atan2(x2/sin(x_compare[2*d+1]),x1/sin(x_compare[2*d+1])); } x_compare[2*d] *= 1.0/(2.0*KPI); x_compare[2*d+1] *= 1.0/(2.0*KPI); d++; } } f_compare = (double _Complex*) nfft_malloc(m_compare*sizeof(double _Complex)); f = (double _Complex*) nfft_malloc(m_compare*sizeof(double _Complex)); } } /* Initialize maximum cut-off degree and grid size parameter. */ NQ_max = 0; SQ_max = 0; /* Read the number of cut-off degrees. */ fscanf(stdin,"bandwidths=%d\n",&iNQ_max); fprintf(stdout,"%d\n",iNQ_max); /* Allocate memory for the cut-off degrees and grid size parameters. */ NQ = (int*) nfft_malloc(iNQ_max*sizeof(int)); SQ = (int*) nfft_malloc(iNQ_max*sizeof(int)); if (testmode == TIMING) { RQ = (int*) nfft_malloc(iNQ_max*sizeof(int)); } /* Read the cut-off degrees and grid size parameters. */ for (iNQ = 0; iNQ < iNQ_max; iNQ++) { if (testmode == TIMING) { /* Read cut-off degree and grid size parameter. */ fscanf(stdin,"%d %d %d\n",&NQ[iNQ],&SQ[iNQ],&RQ[iNQ]); fprintf(stdout,"%d %d %d\n",NQ[iNQ],SQ[iNQ],RQ[iNQ]); NQ_max = MAX(NQ_max,NQ[iNQ]); SQ_max = MAX(SQ_max,SQ[iNQ]); } else { /* Read cut-off degree and grid size parameter. */ fscanf(stdin,"%d %d\n",&NQ[iNQ],&SQ[iNQ]); fprintf(stdout,"%d %d\n",NQ[iNQ],SQ[iNQ]); NQ_max = MAX(NQ_max,NQ[iNQ]); SQ_max = MAX(SQ_max,SQ[iNQ]); } } /* Do precomputation. */ //fprintf(stderr,"NFSFT Precomputation\n"); //fflush(stderr); nfsft_precompute(NQ_max, threshold, ((use_nfsft==NO)?(NFSFT_NO_FAST_ALGORITHM):(0U)), 0U); if (testmode == TIMING) { /* Allocate data structures. */ f_hat = (double _Complex*) nfft_malloc(NFSFT_F_HAT_SIZE(NQ_max)*sizeof(double _Complex)); f = (double _Complex*) nfft_malloc(SQ_max*sizeof(double _Complex)); x_grid = (double*) nfft_malloc(2*SQ_max*sizeof(double)); for (d = 0; d < SQ_max; d++) { f[d] = (((double)rand())/RAND_MAX)-0.5 + _Complex_I*((((double)rand())/RAND_MAX)-0.5); x_grid[2*d] = (((double)rand())/RAND_MAX) - 0.5; x_grid[2*d+1] = (((double)rand())/RAND_MAX) * 0.5; } } //fprintf(stderr,"Entering loop\n"); //fflush(stderr); /* Process all cut-off bandwidths. */ for (iNQ = 0; iNQ < iNQ_max; iNQ++) { if (testmode == TIMING) { nfsft_init_guru(&plan,NQ[iNQ],SQ[iNQ], NFSFT_NORMALIZED | ((use_nfft!=NO)?(0U):(NFSFT_USE_NDFT)) | ((use_fpt!=NO)?(0U):(NFSFT_USE_DPT)), PRE_PHI_HUT | PRE_PSI | FFTW_INIT | FFTW_MEASURE | FFT_OUT_OF_PLACE, cutoff); plan.f_hat = f_hat; plan.x = x_grid; plan.f = f; nfsft_precompute_x(&plan); t_avg = 0.0; for (i = 0; i < RQ[iNQ]; i++) { t0 = getticks(); if (use_nfsft != NO) { /* Execute the adjoint NFSFT transformation. */ nfsft_adjoint(&plan); } else { /* Execute the adjoint direct NDSFT transformation. */ nfsft_adjoint_direct(&plan); } t1 = getticks(); t_avg += nfft_elapsed_seconds(t1,t0); } t_avg = t_avg/((double)RQ[iNQ]); nfsft_finalize(&plan); fprintf(stdout,"%+le\n", t_avg); fprintf(stderr,"%d: %4d %4d %+le\n", tc, NQ[iNQ], SQ[iNQ], t_avg); } else { /* Determine the maximum number of nodes. */ switch (gridtype) { case GRID_GAUSS_LEGENDRE: /* Calculate grid dimensions. */ m_theta = SQ[iNQ] + 1; m_phi = 2*SQ[iNQ] + 2; m_total = m_theta*m_phi; break; case GRID_CLENSHAW_CURTIS: /* Calculate grid dimensions. */ m_theta = 2*SQ[iNQ] + 1; m_phi = 2*SQ[iNQ] + 2; m_total = m_theta*m_phi; break; case GRID_HEALPIX: m_theta = 1; m_phi = 12*SQ[iNQ]*SQ[iNQ]; m_total = m_theta * m_phi; //fprintf("HEALPix: SQ = %d, m_theta = %d, m_phi= %d, m"); break; case GRID_EQUIDISTRIBUTION: case GRID_EQUIDISTRIBUTION_UNIFORM: m_theta = 2; //fprintf(stderr,"ed: m_theta = %d\n",m_theta); for (k = 1; k < SQ[iNQ]; k++) { m_theta += (int)floor((2*KPI)/acos((cos(KPI/(double)SQ[iNQ])- cos(k*KPI/(double)SQ[iNQ])*cos(k*KPI/(double)SQ[iNQ]))/ (sin(k*KPI/(double)SQ[iNQ])*sin(k*KPI/(double)SQ[iNQ])))); //fprintf(stderr,"ed: m_theta = %d\n",m_theta); } //fprintf(stderr,"ed: m_theta final = %d\n",m_theta); m_phi = 1; m_total = m_theta * m_phi; break; } /* Allocate memory for data structures. */ w = (double*) nfft_malloc(m_theta*sizeof(double)); x_grid = (double*) nfft_malloc(2*m_total*sizeof(double)); //fprintf(stderr,"NQ = %d\n",NQ[iNQ]); //fflush(stderr); switch (gridtype) { case GRID_GAUSS_LEGENDRE: //fprintf(stderr,"Generating grid for NQ = %d, SQ = %d\n",NQ[iNQ],SQ[iNQ]); //fflush(stderr); /* Read quadrature weights. */ for (k = 0; k < m_theta; k++) { fscanf(stdin,"%le\n",&w[k]); w[k] *= (2.0*KPI)/((double)m_phi); } //fprintf(stderr,"Allocating theta and phi\n"); //fflush(stderr); /* Allocate memory to store the grid's angles. */ theta = (double*) nfft_malloc(m_theta*sizeof(double)); phi = (double*) nfft_malloc(m_phi*sizeof(double)); //if (theta == NULL || phi == NULL) //{ //fprintf(stderr,"Couldn't allocate theta and phi\n"); //fflush(stderr); //} /* Read angles theta. */ for (k = 0; k < m_theta; k++) { fscanf(stdin,"%le\n",&theta[k]); } /* Generate the grid angles phi. */ for (n = 0; n < m_phi; n++) { phi[n] = n/((double)m_phi); phi[n] -= ((phi[n]>=0.5)?(1.0):(0.0)); } //fprintf(stderr,"Generating grid nodes\n"); //fflush(stderr); /* Generate the grid's nodes. */ d = 0; for (k = 0; k < m_theta; k++) { for (n = 0; n < m_phi; n++) { x_grid[2*d] = phi[n]; x_grid[2*d+1] = theta[k]; d++; } } //fprintf(stderr,"Freeing theta and phi\n"); //fflush(stderr); /* Free the arrays for the grid's angles. */ nfft_free(theta); nfft_free(phi); break; case GRID_CLENSHAW_CURTIS: /* Allocate memory to store the grid's angles. */ theta = (double*) nfft_malloc(m_theta*sizeof(double)); phi = (double*) nfft_malloc(m_phi*sizeof(double)); /* Generate the grid angles theta. */ for (k = 0; k < m_theta; k++) { theta[k] = k/((double)2*(m_theta-1)); } /* Generate the grid angles phi. */ for (n = 0; n < m_phi; n++) { phi[n] = n/((double)m_phi); phi[n] -= ((phi[n]>=0.5)?(1.0):(0.0)); } /* Generate quadrature weights. */ fplan = fftw_plan_r2r_1d(SQ[iNQ]+1, w, w, FFTW_REDFT00, 0U); for (k = 0; k < SQ[iNQ]+1; k++) { w[k] = -2.0/(4*k*k-1); } fftw_execute(fplan); w[0] *= 0.5; for (k = 0; k < SQ[iNQ]+1; k++) { w[k] *= (2.0*KPI)/((double)(m_theta-1)*m_phi); w[m_theta-1-k] = w[k]; } fftw_destroy_plan(fplan); /* Generate the grid's nodes. */ d = 0; for (k = 0; k < m_theta; k++) { for (n = 0; n < m_phi; n++) { x_grid[2*d] = phi[n]; x_grid[2*d+1] = theta[k]; d++; } } /* Free the arrays for the grid's angles. */ nfft_free(theta); nfft_free(phi); break; case GRID_HEALPIX: d = 0; for (k = 1; k <= SQ[iNQ]-1; k++) { for (n = 0; n <= 4*k-1; n++) { x_grid[2*d+1] = 1 - (k*k)/((double)(3.0*SQ[iNQ]*SQ[iNQ])); x_grid[2*d] = ((n+0.5)/(4*k)); x_grid[2*d] -= (x_grid[2*d]>=0.5)?(1.0):(0.0); d++; } } d2 = d-1; for (k = SQ[iNQ]; k <= 3*SQ[iNQ]; k++) { for (n = 0; n <= 4*SQ[iNQ]-1; n++) { x_grid[2*d+1] = 2.0/(3*SQ[iNQ])*(2*SQ[iNQ]-k); x_grid[2*d] = (n+((k%2==0)?(0.5):(0.0)))/(4*SQ[iNQ]); x_grid[2*d] -= (x_grid[2*d]>=0.5)?(1.0):(0.0); d++; } } for (k = 1; k <= SQ[iNQ]-1; k++) { for (n = 0; n <= 4*k-1; n++) { x_grid[2*d+1] = -x_grid[2*d2+1]; x_grid[2*d] = x_grid[2*d2]; d++; d2--; } } for (d = 0; d < m_total; d++) { x_grid[2*d+1] = acos(x_grid[2*d+1])/(2.0*KPI); } w[0] = (4.0*KPI)/(m_total); break; case GRID_EQUIDISTRIBUTION: case GRID_EQUIDISTRIBUTION_UNIFORM: /* TODO Compute the weights. */ if (gridtype == GRID_EQUIDISTRIBUTION) { w_temp = (double*) nfft_malloc((SQ[iNQ]+1)*sizeof(double)); fplan = fftw_plan_r2r_1d(SQ[iNQ]/2+1, w_temp, w_temp, FFTW_REDFT00, 0U); for (k = 0; k < SQ[iNQ]/2+1; k++) { w_temp[k] = -2.0/(4*k*k-1); } fftw_execute(fplan); w_temp[0] *= 0.5; for (k = 0; k < SQ[iNQ]/2+1; k++) { w_temp[k] *= (2.0*KPI)/((double)(SQ[iNQ])); w_temp[SQ[iNQ]-k] = w_temp[k]; } fftw_destroy_plan(fplan); } d = 0; x_grid[2*d] = -0.5; x_grid[2*d+1] = 0.0; if (gridtype == GRID_EQUIDISTRIBUTION) { w[d] = w_temp[0]; } else { w[d] = (4.0*KPI)/(m_total); } d = 1; x_grid[2*d] = -0.5; x_grid[2*d+1] = 0.5; if (gridtype == GRID_EQUIDISTRIBUTION) { w[d] = w_temp[SQ[iNQ]]; } else { w[d] = (4.0*KPI)/(m_total); } d = 2; for (k = 1; k < SQ[iNQ]; k++) { theta_s = (double)k*KPI/(double)SQ[iNQ]; M = (int)floor((2.0*KPI)/acos((cos(KPI/(double)SQ[iNQ])- cos(theta_s)*cos(theta_s))/(sin(theta_s)*sin(theta_s)))); for (n = 0; n < M; n++) { x_grid[2*d] = (n + 0.5)/M; x_grid[2*d] -= (x_grid[2*d]>=0.5)?(1.0):(0.0); x_grid[2*d+1] = theta_s/(2.0*KPI); if (gridtype == GRID_EQUIDISTRIBUTION) { w[d] = w_temp[k]/((double)(M)); } else { w[d] = (4.0*KPI)/(m_total); } d++; } } if (gridtype == GRID_EQUIDISTRIBUTION) { nfft_free(w_temp); } break; default: break; } /* Allocate memory for grid values. */ f_grid = (double _Complex*) nfft_malloc(m_total*sizeof(double _Complex)); if (mode == RANDOM) { } else { m_compare = m_total; f_compare = (double _Complex*) nfft_malloc(m_compare*sizeof(double _Complex)); x_compare = x_grid; f = f_grid; } //fprintf(stderr,"Generating test function\n"); //fflush(stderr); switch (testfunction) { case FUNCTION_RANDOM_BANDLIMITED: f_hat_gen = (double _Complex*) nfft_malloc(NFSFT_F_HAT_SIZE(N)*sizeof(double _Complex)); //fprintf(stderr,"Generating random test function\n"); //fflush(stderr); /* Generate random function samples by sampling a bandlimited * function. */ nfsft_init_guru(&plan_gen,N,m_total, NFSFT_NORMALIZED | ((use_nfft!=NO)?(0U):(NFSFT_USE_NDFT)) | ((use_fpt!=NO)?(0U):(NFSFT_USE_DPT)), ((N>512)?(0U):(PRE_PHI_HUT | PRE_PSI)) | FFTW_INIT | FFT_OUT_OF_PLACE, cutoff); plan_gen.f_hat = f_hat_gen; plan_gen.x = x_grid; plan_gen.f = f_grid; nfsft_precompute_x(&plan_gen); for (k = 0; k < plan_gen.N_total; k++) { f_hat_gen[k] = 0.0; } for (k = 0; k <= N; k++) { for (n = -k; n <= k; n++) { f_hat_gen[NFSFT_INDEX(k,n,&plan_gen)] = (((double)rand())/RAND_MAX)-0.5 + _Complex_I*((((double)rand())/RAND_MAX)-0.5); } } if (use_nfsft != NO) { /* Execute the NFSFT transformation. */ nfsft_trafo(&plan_gen); } else { /* Execute the direct NDSFT transformation. */ nfsft_trafo_direct(&plan_gen); } nfsft_finalize(&plan_gen); if (mode == RANDOM) { nfsft_init_guru(&plan_gen,N,m_compare, NFSFT_NORMALIZED | ((use_nfft!=NO)?(0U):(NFSFT_USE_NDFT)) | ((use_fpt!=NO)?(0U):(NFSFT_USE_DPT)), ((N>512)?(0U):(PRE_PHI_HUT | PRE_PSI)) | FFTW_INIT | FFT_OUT_OF_PLACE, cutoff); plan_gen.f_hat = f_hat_gen; plan_gen.x = x_compare; plan_gen.f = f_compare; nfsft_precompute_x(&plan_gen); if (use_nfsft != NO) { /* Execute the NFSFT transformation. */ nfsft_trafo(&plan_gen); } else { /* Execute the direct NDSFT transformation. */ nfsft_trafo_direct(&plan_gen); } nfsft_finalize(&plan_gen); } else { memcpy(f_compare,f_grid,m_total*sizeof(double _Complex)); } nfft_free(f_hat_gen); break; case FUNCTION_F1: for (d = 0; d < m_total; d++) { x1 = sin(x_grid[2*d+1]*2.0*KPI)*cos(x_grid[2*d]*2.0*KPI); x2 = sin(x_grid[2*d+1]*2.0*KPI)*sin(x_grid[2*d]*2.0*KPI); x3 = cos(x_grid[2*d+1]*2.0*KPI); f_grid[d] = x1*x2*x3; } if (mode == RANDOM) { for (d = 0; d < m_compare; d++) { x1 = sin(x_compare[2*d+1]*2.0*KPI)*cos(x_compare[2*d]*2.0*KPI); x2 = sin(x_compare[2*d+1]*2.0*KPI)*sin(x_compare[2*d]*2.0*KPI); x3 = cos(x_compare[2*d+1]*2.0*KPI); f_compare[d] = x1*x2*x3; } } else { memcpy(f_compare,f_grid,m_total*sizeof(double _Complex)); } break; case FUNCTION_F2: for (d = 0; d < m_total; d++) { x1 = sin(x_grid[2*d+1]*2.0*KPI)*cos(x_grid[2*d]*2.0*KPI); x2 = sin(x_grid[2*d+1]*2.0*KPI)*sin(x_grid[2*d]*2.0*KPI); x3 = cos(x_grid[2*d+1]*2.0*KPI); f_grid[d] = 0.1*exp(x1+x2+x3); } if (mode == RANDOM) { for (d = 0; d < m_compare; d++) { x1 = sin(x_compare[2*d+1]*2.0*KPI)*cos(x_compare[2*d]*2.0*KPI); x2 = sin(x_compare[2*d+1]*2.0*KPI)*sin(x_compare[2*d]*2.0*KPI); x3 = cos(x_compare[2*d+1]*2.0*KPI); f_compare[d] = 0.1*exp(x1+x2+x3); } } else { memcpy(f_compare,f_grid,m_total*sizeof(double _Complex)); } break; case FUNCTION_F3: for (d = 0; d < m_total; d++) { x1 = sin(x_grid[2*d+1]*2.0*KPI)*cos(x_grid[2*d]*2.0*KPI); x2 = sin(x_grid[2*d+1]*2.0*KPI)*sin(x_grid[2*d]*2.0*KPI); x3 = cos(x_grid[2*d+1]*2.0*KPI); temp = sqrt(x1*x1)+sqrt(x2*x2)+sqrt(x3*x3); f_grid[d] = 0.1*temp; } if (mode == RANDOM) { for (d = 0; d < m_compare; d++) { x1 = sin(x_compare[2*d+1]*2.0*KPI)*cos(x_compare[2*d]*2.0*KPI); x2 = sin(x_compare[2*d+1]*2.0*KPI)*sin(x_compare[2*d]*2.0*KPI); x3 = cos(x_compare[2*d+1]*2.0*KPI); temp = sqrt(x1*x1)+sqrt(x2*x2)+sqrt(x3*x3); f_compare[d] = 0.1*temp; } } else { memcpy(f_compare,f_grid,m_total*sizeof(double _Complex)); } break; case FUNCTION_F4: for (d = 0; d < m_total; d++) { x1 = sin(x_grid[2*d+1]*2.0*KPI)*cos(x_grid[2*d]*2.0*KPI); x2 = sin(x_grid[2*d+1]*2.0*KPI)*sin(x_grid[2*d]*2.0*KPI); x3 = cos(x_grid[2*d+1]*2.0*KPI); temp = sqrt(x1*x1)+sqrt(x2*x2)+sqrt(x3*x3); f_grid[d] = 1.0/(temp); } if (mode == RANDOM) { for (d = 0; d < m_compare; d++) { x1 = sin(x_compare[2*d+1]*2.0*KPI)*cos(x_compare[2*d]*2.0*KPI); x2 = sin(x_compare[2*d+1]*2.0*KPI)*sin(x_compare[2*d]*2.0*KPI); x3 = cos(x_compare[2*d+1]*2.0*KPI); temp = sqrt(x1*x1)+sqrt(x2*x2)+sqrt(x3*x3); f_compare[d] = 1.0/(temp); } } else { memcpy(f_compare,f_grid,m_total*sizeof(double _Complex)); } break; case FUNCTION_F5: for (d = 0; d < m_total; d++) { x1 = sin(x_grid[2*d+1]*2.0*KPI)*cos(x_grid[2*d]*2.0*KPI); x2 = sin(x_grid[2*d+1]*2.0*KPI)*sin(x_grid[2*d]*2.0*KPI); x3 = cos(x_grid[2*d+1]*2.0*KPI); temp = sqrt(x1*x1)+sqrt(x2*x2)+sqrt(x3*x3); f_grid[d] = 0.1*sin(1+temp)*sin(1+temp); } if (mode == RANDOM) { for (d = 0; d < m_compare; d++) { x1 = sin(x_compare[2*d+1]*2.0*KPI)*cos(x_compare[2*d]*2.0*KPI); x2 = sin(x_compare[2*d+1]*2.0*KPI)*sin(x_compare[2*d]*2.0*KPI); x3 = cos(x_compare[2*d+1]*2.0*KPI); temp = sqrt(x1*x1)+sqrt(x2*x2)+sqrt(x3*x3); f_compare[d] = 0.1*sin(1+temp)*sin(1+temp); } } else { memcpy(f_compare,f_grid,m_total*sizeof(double _Complex)); } break; case FUNCTION_F6: for (d = 0; d < m_total; d++) { if (x_grid[2*d+1] <= 0.25) { f_grid[d] = 1.0; } else { f_grid[d] = 1.0/(sqrt(1+3*cos(2.0*KPI*x_grid[2*d+1])*cos(2.0*KPI*x_grid[2*d+1]))); } } if (mode == RANDOM) { for (d = 0; d < m_compare; d++) { if (x_compare[2*d+1] <= 0.25) { f_compare[d] = 1.0; } else { f_compare[d] = 1.0/(sqrt(1+3*cos(2.0*KPI*x_compare[2*d+1])*cos(2.0*KPI*x_compare[2*d+1]))); } } } else { memcpy(f_compare,f_grid,m_total*sizeof(double _Complex)); } break; default: //fprintf(stderr,"Generating one function\n"); //fflush(stderr); for (d = 0; d < m_total; d++) { f_grid[d] = 1.0; } if (mode == RANDOM) { for (d = 0; d < m_compare; d++) { f_compare[d] = 1.0; } } else { memcpy(f_compare,f_grid,m_total*sizeof(double _Complex)); } break; } //fprintf(stderr,"Initializing trafo\n"); //fflush(stderr); /* Init transform plan. */ nfsft_init_guru(&plan_adjoint,NQ[iNQ],m_total, NFSFT_NORMALIZED | ((use_nfft!=NO)?(0U):(NFSFT_USE_NDFT)) | ((use_fpt!=NO)?(0U):(NFSFT_USE_DPT)), ((NQ[iNQ]>512)?(0U):(PRE_PHI_HUT | PRE_PSI)) | FFTW_INIT | FFT_OUT_OF_PLACE, cutoff); plan_adjoint_ptr = &plan_adjoint; if (mode == RANDOM) { nfsft_init_guru(&plan,NQ[iNQ],m_compare, NFSFT_NORMALIZED | ((use_nfft!=NO)?(0U):(NFSFT_USE_NDFT)) | ((use_fpt!=NO)?(0U):(NFSFT_USE_DPT)), ((NQ[iNQ]>512)?(0U):(PRE_PHI_HUT | PRE_PSI)) | FFTW_INIT | FFT_OUT_OF_PLACE, cutoff); plan_ptr = &plan; } else { plan_ptr = &plan_adjoint; } f_hat = (double _Complex*) nfft_malloc(NFSFT_F_HAT_SIZE(NQ[iNQ])*sizeof(double _Complex)); plan_adjoint_ptr->f_hat = f_hat; plan_adjoint_ptr->x = x_grid; plan_adjoint_ptr->f = f_grid; plan_ptr->f_hat = f_hat; plan_ptr->x = x_compare; plan_ptr->f = f; //fprintf(stderr,"Precomputing for x\n"); //fflush(stderr); nfsft_precompute_x(plan_adjoint_ptr); if (plan_adjoint_ptr != plan_ptr) { nfsft_precompute_x(plan_ptr); } /* Initialize cumulative time variable. */ t_avg = 0.0; err_infty_avg = 0.0; err_2_avg = 0.0; /* Cycle through all runs. */ for (i = 0; i < 1/*repetitions*/; i++) { //fprintf(stderr,"Copying original values\n"); //fflush(stderr); /* Copy exact funtion values to working array. */ //memcpy(f,f_grid,m_total*sizeof(double _Complex)); /* Initialize time measurement. */ t0 = getticks(); //fprintf(stderr,"Multiplying with quadrature weights\n"); //fflush(stderr); /* Multiplication with the quadrature weights. */ /*fprintf(stderr,"\n");*/ d = 0; for (k = 0; k < m_theta; k++) { for (n = 0; n < m_phi; n++) { /*fprintf(stderr,"f_ref[%d] = %le + I*%le,\t f[%d] = %le + I*%le, \t w[%d] = %le\n", d,creal(f_ref[d]),cimag(f_ref[d]),d,creal(f[d]),cimag(f[d]),k, w[k]);*/ f_grid[d] *= w[k]; d++; } } t1 = getticks(); t_avg += nfft_elapsed_seconds(t1,t0); nfft_free(w); t0 = getticks(); /*fprintf(stderr,"\n"); d = 0; for (d = 0; d < grid_total; d++) { fprintf(stderr,"f[%d] = %le + I*%le, theta[%d] = %le, phi[%d] = %le\n", d,creal(f[d]),cimag(f[d]),d,x[2*d+1],d,x[2*d]); }*/ //fprintf(stderr,"Executing adjoint\n"); //fflush(stderr); /* Check if the fast NFSFT algorithm shall be tested. */ if (use_nfsft != NO) { /* Execute the adjoint NFSFT transformation. */ nfsft_adjoint(plan_adjoint_ptr); } else { /* Execute the adjoint direct NDSFT transformation. */ nfsft_adjoint_direct(plan_adjoint_ptr); } /* Multiplication with the Fourier-Legendre coefficients. */ /*for (k = 0; k <= m[im]; k++) { for (n = -k; n <= k; n++) { fprintf(stderr,"f_hat[%d,%d] = %le\t + I*%le\n",k,n, creal(f_hat[NFSFT_INDEX(k,n,&plan_adjoint)]), cimag(f_hat[NFSFT_INDEX(k,n,&plan_adjoint)])); } }*/ //fprintf(stderr,"Executing trafo\n"); //fflush(stderr); if (use_nfsft != NO) { /* Execute the NFSFT transformation. */ nfsft_trafo(plan_ptr); } else { /* Execute the direct NDSFT transformation. */ nfsft_trafo_direct(plan_ptr); } t1 = getticks(); t_avg += nfft_elapsed_seconds(t1,t0); //fprintf(stderr,"Finalizing\n"); //fflush(stderr); /* Finalize the NFSFT plans */ nfsft_finalize(plan_adjoint_ptr); if (plan_ptr != plan_adjoint_ptr) { nfsft_finalize(plan_ptr); } /* Free data arrays. */ nfft_free(f_hat); nfft_free(x_grid); err_infty_avg += X(error_l_infty_complex)(f, f_compare, m_compare); err_2_avg += X(error_l_2_complex)(f, f_compare, m_compare); nfft_free(f_grid); if (mode == RANDOM) { } else { nfft_free(f_compare); } /*for (d = 0; d < m_total; d++) { fprintf(stderr,"f_ref[%d] = %le + I*%le,\t f[%d] = %le + I*%le\n", d,creal(f_ref[d]),cimag(f_ref[d]),d,creal(f[d]),cimag(f[d])); }*/ } //fprintf(stderr,"Calculating the error\n"); //fflush(stderr); /* Calculate average time needed. */ t_avg = t_avg/((double)repetitions); /* Calculate the average error. */ err_infty_avg = err_infty_avg/((double)repetitions); /* Calculate the average error. */ err_2_avg = err_2_avg/((double)repetitions); /* Print out the error measurements. */ fprintf(stdout,"%+le %+le %+le\n", t_avg, err_infty_avg, err_2_avg); fprintf(stderr,"%d: %4d %4d %+le %+le %+le\n", tc, NQ[iNQ], SQ[iNQ], t_avg, err_infty_avg, err_2_avg); } } /* for (im = 0; im < im_max; im++) - Process all cut-off * bandwidths.*/ fprintf(stderr,"\n"); /* Delete precomputed data. */ nfsft_forget(); /* Free memory for cut-off bandwidths and grid size parameters. */ nfft_free(NQ); nfft_free(SQ); if (testmode == TIMING) { nfft_free(RQ); } if (mode == RANDOM) { nfft_free(x_compare); nfft_free(f_compare); nfft_free(f); } if (testmode == TIMING) { /* Allocate data structures. */ nfft_free(f_hat); nfft_free(f); nfft_free(x_grid); } } /* for (tc = 0; tc < tc_max; tc++) - Process each testcase. */ /* Return exit code for successful run. */ return EXIT_SUCCESS; }
/** * Compares accuracy and execution time of the fast Gauss transform with * increasing expansion degree. * Similar to the test in F. Andersson and G. Beylkin. * The fast Gauss transform with double _Complex parameters. * J. Comput. Physics 203 (2005) 274-286 * * \author Stefan Kunis */ void fgt_test_andersson(void) { fgt_plan my_plan; double _Complex *swap_dgt; int N; double _Complex sigma=4*(138+ _Complex_I*100); int n=128; int N_dgt_pre_exp=(int)(1U<<11); int N_dgt=(int)(1U<<19); printf("n=%d, sigma=%1.3e+i%1.3e\n",n,creal(sigma),cimag(sigma)); for(N=((int)(1U<<6)); N<((int)(1U<<22)); N=N<<1) { printf("$%d$\t & ",N); if(N<N_dgt_pre_exp) fgt_init_guru(&my_plan, N, N, sigma, n, 1, 7, DGT_PRE_CEXP); else fgt_init_guru(&my_plan, N, N, sigma, n, 1, 7, 0); swap_dgt = (double _Complex*)nfft_malloc(my_plan.M* sizeof(double _Complex)); fgt_test_init_rand(&my_plan); fgt_init_node_dependent(&my_plan); if(N<N_dgt) { NFFT_SWAP_complex(swap_dgt,my_plan.f); if(N<N_dgt_pre_exp) my_plan.flags^=DGT_PRE_CEXP; printf("$%1.1e$\t & ",fgt_test_measure_time(&my_plan, 1)); if(N<N_dgt_pre_exp) my_plan.flags^=DGT_PRE_CEXP; NFFT_SWAP_complex(swap_dgt,my_plan.f); } else printf("\t\t & "); if(N<N_dgt_pre_exp) printf("$%1.1e$\t & ",fgt_test_measure_time(&my_plan, 1)); else printf("\t\t & "); my_plan.flags^=FGT_NDFT; printf("$%1.1e$\t & ",fgt_test_measure_time(&my_plan, 0)); my_plan.flags^=FGT_NDFT; printf("$%1.1e$\t & ",fgt_test_measure_time(&my_plan, 0)); printf("$%1.1e$\t \\\\ \n", X(error_l_infty_1_complex)(swap_dgt, my_plan.f, my_plan.M, my_plan.alpha, my_plan.N)); fflush(stdout); nfft_free(swap_dgt); fgt_finalize(&my_plan); fftw_cleanup(); } }
/** * The main program. * * \param argc The number of arguments * \param argv An array containing the arguments as C-strings * * \return Exit code * * \author Jens Keiner */ int main (int argc, char **argv) { int T; int N; int M; int M2; int t; /* Index variable for testcases */ nfsft_plan plan; /* NFSFT plan */ nfsft_plan plan2; /* NFSFT plan */ solver_plan_complex iplan; /* NFSFT plan */ int j; /* */ int k; /* */ int m; /* */ int use_nfsft; /* */ int use_nfft; /* */ int use_fpt; /* */ int cutoff; /**< The current NFFT cut-off parameter */ double threshold; /**< The current NFSFT threshold parameter */ double re; double im; double a; double *scratch; double xs; double *ys; double *temp; double _Complex *temp2; int qlength; double *qweights; fftw_plan fplan; fpt_set set; int npt; int npt_exp; double *alpha, *beta, *gamma; /* Read the number of testcases. */ fscanf(stdin,"testcases=%d\n",&T); fprintf(stderr,"%d\n",T); /* Process each testcase. */ for (t = 0; t < T; t++) { /* Check if the fast transform shall be used. */ fscanf(stdin,"nfsft=%d\n",&use_nfsft); fprintf(stderr,"%d\n",use_nfsft); if (use_nfsft != NO) { /* Check if the NFFT shall be used. */ fscanf(stdin,"nfft=%d\n",&use_nfft); fprintf(stderr,"%d\n",use_nfsft); if (use_nfft != NO) { /* Read the cut-off parameter. */ fscanf(stdin,"cutoff=%d\n",&cutoff); fprintf(stderr,"%d\n",cutoff); } else { /* TODO remove this */ /* Initialize unused variable with dummy value. */ cutoff = 1; } /* Check if the fast polynomial transform shall be used. */ fscanf(stdin,"fpt=%d\n",&use_fpt); fprintf(stderr,"%d\n",use_fpt); if (use_fpt != NO) { /* Read the NFSFT threshold parameter. */ fscanf(stdin,"threshold=%lf\n",&threshold); fprintf(stderr,"%lf\n",threshold); } else { /* TODO remove this */ /* Initialize unused variable with dummy value. */ threshold = 1000.0; } } else { /* TODO remove this */ /* Set dummy values. */ use_nfft = NO; use_fpt = NO; cutoff = 3; threshold = 1000.0; } /* Read the bandwidth. */ fscanf(stdin,"bandwidth=%d\n",&N); fprintf(stderr,"%d\n",N); /* Do precomputation. */ nfsft_precompute(N,threshold, ((use_nfsft==NO)?(NFSFT_NO_FAST_ALGORITHM):(0U/*NFSFT_NO_DIRECT_ALGORITHM*/)), 0U); /* Read the number of nodes. */ fscanf(stdin,"nodes=%d\n",&M); fprintf(stderr,"%d\n",M); /* */ if ((N+1)*(N+1) > M) { X(next_power_of_2_exp)(N, &npt, &npt_exp); fprintf(stderr, "npt = %d, npt_exp = %d\n", npt, npt_exp); fprintf(stderr,"Optimal interpolation!\n"); scratch = (double*) nfft_malloc(4*sizeof(double)); ys = (double*) nfft_malloc((N+1)*sizeof(double)); temp = (double*) nfft_malloc((2*N+1)*sizeof(double)); temp2 = (double _Complex*) nfft_malloc((N+1)*sizeof(double _Complex)); a = 0.0; for (j = 0; j <= N; j++) { xs = 2.0 + (2.0*j)/(N+1); ys[j] = (2.0-((j == 0)?(1.0):(0.0)))*4.0*nfft_bspline(4,xs,scratch); //fprintf(stdout,"%3d: g(%le) = %le\n",j,xs,ys[j]); a += ys[j]; } //fprintf(stdout,"a = %le\n",a); for (j = 0; j <= N; j++) { ys[j] *= 1.0/a; } qlength = 2*N+1; qweights = (double*) nfft_malloc(qlength*sizeof(double)); fplan = fftw_plan_r2r_1d(N+1, qweights, qweights, FFTW_REDFT00, 0U); for (j = 0; j < N+1; j++) { qweights[j] = -2.0/(4*j*j-1); } fftw_execute(fplan); qweights[0] *= 0.5; for (j = 0; j < N+1; j++) { qweights[j] *= 1.0/(2.0*N+1.0); qweights[2*N+1-1-j] = qweights[j]; } fplan = fftw_plan_r2r_1d(2*N+1, temp, temp, FFTW_REDFT00, 0U); for (j = 0; j <= N; j++) { temp[j] = ((j==0 || j == 2*N)?(1.0):(0.5))*ys[j]; } for (j = N+1; j < 2*N+1; j++) { temp[j] = 0.0; } fftw_execute(fplan); for (j = 0; j < 2*N+1; j++) { temp[j] *= qweights[j]; } fftw_execute(fplan); for (j = 0; j < 2*N+1; j++) { temp[j] *= ((j==0 || j == 2*N)?(1.0):(0.5)); if (j <= N) { temp2[j] = temp[j]; } } set = fpt_init(1, npt_exp, 0U); alpha = (double*) nfft_malloc((N+2)*sizeof(double)); beta = (double*) nfft_malloc((N+2)*sizeof(double)); gamma = (double*) nfft_malloc((N+2)*sizeof(double)); alpha_al_row(alpha, N, 0); beta_al_row(beta, N, 0); gamma_al_row(gamma, N, 0); fpt_precompute(set, 0, alpha, beta, gamma, 0, 1000.0); fpt_transposed(set,0, temp2, temp2, N, 0U); fpt_finalize(set); nfft_free(alpha); nfft_free(beta); nfft_free(gamma); fftw_destroy_plan(fplan); nfft_free(scratch); nfft_free(qweights); nfft_free(ys); nfft_free(temp); } /* Init transform plans. */ nfsft_init_guru(&plan, N, M, ((use_nfft!=0)?(0U):(NFSFT_USE_NDFT)) | ((use_fpt!=0)?(0U):(NFSFT_USE_DPT)) | NFSFT_MALLOC_F | NFSFT_MALLOC_X | NFSFT_MALLOC_F_HAT | NFSFT_NORMALIZED | NFSFT_ZERO_F_HAT, PRE_PHI_HUT | PRE_PSI | FFTW_INIT | FFT_OUT_OF_PLACE, cutoff); if ((N+1)*(N+1) > M) { solver_init_advanced_complex(&iplan, (nfft_mv_plan_complex*)(&plan), CGNE | PRECOMPUTE_DAMP); } else { solver_init_advanced_complex(&iplan, (nfft_mv_plan_complex*)(&plan), CGNR | PRECOMPUTE_WEIGHT | PRECOMPUTE_DAMP); } /* Read the nodes and function values. */ for (j = 0; j < M; j++) { fscanf(stdin,"%le %le %le %le\n",&plan.x[2*j+1],&plan.x[2*j],&re,&im); plan.x[2*j+1] = plan.x[2*j+1]/(2.0*PI); plan.x[2*j] = plan.x[2*j]/(2.0*PI); if (plan.x[2*j] >= 0.5) { plan.x[2*j] = plan.x[2*j] - 1; } iplan.y[j] = re + _Complex_I * im; fprintf(stderr,"%le %le %le %le\n",plan.x[2*j+1],plan.x[2*j], creal(iplan.y[j]),cimag(iplan.y[j])); } /* Read the number of nodes. */ fscanf(stdin,"nodes_eval=%d\n",&M2); fprintf(stderr,"%d\n",M2); /* Init transform plans. */ nfsft_init_guru(&plan2, N, M2, ((use_nfft!=0)?(0U):(NFSFT_USE_NDFT)) | ((use_fpt!=0)?(0U):(NFSFT_USE_DPT)) | NFSFT_MALLOC_F | NFSFT_MALLOC_X | NFSFT_MALLOC_F_HAT | NFSFT_NORMALIZED | NFSFT_ZERO_F_HAT, PRE_PHI_HUT | PRE_PSI | FFTW_INIT | FFT_OUT_OF_PLACE, cutoff); /* Read the nodes and function values. */ for (j = 0; j < M2; j++) { fscanf(stdin,"%le %le\n",&plan2.x[2*j+1],&plan2.x[2*j]); plan2.x[2*j+1] = plan2.x[2*j+1]/(2.0*PI); plan2.x[2*j] = plan2.x[2*j]/(2.0*PI); if (plan2.x[2*j] >= 0.5) { plan2.x[2*j] = plan2.x[2*j] - 1; } fprintf(stderr,"%le %le\n",plan2.x[2*j+1],plan2.x[2*j]); } nfsft_precompute_x(&plan); nfsft_precompute_x(&plan2); /* Frequency weights. */ if ((N+1)*(N+1) > M) { /* Compute Voronoi weights. */ //nfft_voronoi_weights_S2(iplan.w, plan.x, M); /* Print out Voronoi weights. */ /*a = 0.0; for (j = 0; j < plan.M_total; j++) { fprintf(stderr,"%le\n",iplan.w[j]); a += iplan.w[j]; } fprintf(stderr,"sum = %le\n",a);*/ for (j = 0; j < plan.N_total; j++) { iplan.w_hat[j] = 0.0; } for (k = 0; k <= N; k++) { for (j = -k; j <= k; j++) { iplan.w_hat[NFSFT_INDEX(k,j,&plan)] = 1.0/(pow(k+1.0,2.0)); /*temp2[j]*/; } } } else { for (j = 0; j < plan.N_total; j++) { iplan.w_hat[j] = 0.0; } for (k = 0; k <= N; k++) { for (j = -k; j <= k; j++) { iplan.w_hat[NFSFT_INDEX(k,j,&plan)] = 1/(pow(k+1.0,2.5)); } } /* Compute Voronoi weights. */ nfft_voronoi_weights_S2(iplan.w, plan.x, M); /* Print out Voronoi weights. */ a = 0.0; for (j = 0; j < plan.M_total; j++) { fprintf(stderr,"%le\n",iplan.w[j]); a += iplan.w[j]; } fprintf(stderr,"sum = %le\n",a); } fprintf(stderr, "N_total = %d\n", plan.N_total); fprintf(stderr, "M_total = %d\n", plan.M_total); /* init some guess */ for (k = 0; k < plan.N_total; k++) { iplan.f_hat_iter[k] = 0.0; } /* inverse trafo */ solver_before_loop_complex(&iplan); /*for (k = 0; k < plan.M_total; k++) { printf("%le %le\n",creal(iplan.r_iter[k]),cimag(iplan.r_iter[k])); }*/ for (m = 0; m < 29; m++) { fprintf(stderr,"Residual ||r||=%e,\n",sqrt(iplan.dot_r_iter)); solver_loop_one_step_complex(&iplan); } /*NFFT_SWAP_complex(iplan.f_hat_iter, plan.f_hat); nfsft_trafo(&plan); NFFT_SWAP_complex(iplan.f_hat_iter, plan.f_hat); a = 0.0; b = 0.0; for (k = 0; k < plan.M_total; k++) { printf("%le %le %le\n",cabs(iplan.y[k]),cabs(plan.f[k]), cabs(iplan.y[k]-plan.f[k])); a += cabs(iplan.y[k]-plan.f[k])*cabs(iplan.y[k]-plan.f[k]); b += cabs(iplan.y[k])*cabs(iplan.y[k]); } fprintf(stderr,"relative error in 2-norm: %le\n",a/b);*/ NFFT_SWAP_complex(iplan.f_hat_iter, plan2.f_hat); nfsft_trafo(&plan2); NFFT_SWAP_complex(iplan.f_hat_iter, plan2.f_hat); for (k = 0; k < plan2.M_total; k++) { fprintf(stdout,"%le\n",cabs(plan2.f[k])); } solver_finalize_complex(&iplan); nfsft_finalize(&plan); nfsft_finalize(&plan2); /* Delete precomputed data. */ nfsft_forget(); if ((N+1)*(N+1) > M) { nfft_free(temp2); } } /* Process each testcase. */ /* Return exit code for successful run. */ return EXIT_SUCCESS; }
/** Comparison of the FFTW, mpolar FFT, and inverse mpolar FFT */ static int comparison_fft(FILE *fp, int N, int T, int R) { ticks t0, t1; fftw_plan my_fftw_plan; fftw_complex *f_hat,*f; int m,k; double t_fft, t_dft_mpolar; f_hat = (fftw_complex *)nfft_malloc(sizeof(fftw_complex)*N*N); f = (fftw_complex *)nfft_malloc(sizeof(fftw_complex)*(T*R/4)*5); my_fftw_plan = fftw_plan_dft_2d(N,N,f_hat,f,FFTW_BACKWARD,FFTW_MEASURE); for(k=0; k<N*N; k++) f_hat[k] = (((double)rand())/RAND_MAX) + _Complex_I* (((double)rand())/RAND_MAX); t0 = getticks(); for(m=0;m<65536/N;m++) { fftw_execute(my_fftw_plan); /* touch */ f_hat[2]=2*f_hat[0]; } t1 = getticks(); GLOBAL_elapsed_time = nfft_elapsed_seconds(t1,t0); t_fft=N*GLOBAL_elapsed_time/65536; if(N<256) { mpolar_dft(f_hat,N,f,T,R,1); t_dft_mpolar=GLOBAL_elapsed_time; } for (m=3; m<=9; m+=3) { if((m==3)&&(N<256)) fprintf(fp,"%d\t&\t&\t%1.1e&\t%1.1e&\t%d\t",N,t_fft,t_dft_mpolar,m); else if(m==3) fprintf(fp,"%d\t&\t&\t%1.1e&\t &\t%d\t",N,t_fft,m); else fprintf(fp," \t&\t&\t &\t &\t%d\t",m); printf("N=%d\tt_fft=%1.1e\tt_dft_mpolar=%1.1e\tm=%d\t",N,t_fft,t_dft_mpolar,m); mpolar_fft(f_hat,N,f,T,R,m); fprintf(fp,"%1.1e&\t",GLOBAL_elapsed_time); printf("t_mpolar=%1.1e\t",GLOBAL_elapsed_time); inverse_mpolar_fft(f,T,R,f_hat,N,2*m,m); if(m==9) fprintf(fp,"%1.1e\\\\\\hline\n",GLOBAL_elapsed_time); else fprintf(fp,"%1.1e\\\\\n",GLOBAL_elapsed_time); printf("t_impolar=%1.1e\n",GLOBAL_elapsed_time); } fflush(fp); nfft_free(f); nfft_free(f_hat); return EXIT_SUCCESS; }
void fastsum_benchomp_createdataset(unsigned int d, int L, int M) { int t, j, k; R *x; R *y; C *alpha; x = (R*) nfft_malloc(d*L*sizeof(R)); y = (R*) nfft_malloc(d*L*sizeof(R)); alpha = (C*) nfft_malloc(L*sizeof(C)); /** init source knots in a d-ball with radius 1 */ k = 0; while (k < L) { double r_max = 1.0; double r2 = 0.0; for (j=0; j<d; j++) x[k*d+j] = 2.0 * r_max * (double)rand()/(double)RAND_MAX - r_max; for (j=0; j<d; j++) r2 += x[k*d+j] * x[k*d+j]; if (r2 >= r_max * r_max) continue; k++; } nfft_vrand_unit_complex(alpha,L); /** init target knots in a d-ball with radius 1 */ k = 0; while (k < M) { double r_max = 1.0; double r2 = 0.0; for (j=0; j<d; j++) y[k*d+j] = 2.0 * r_max * (double)rand()/(double)RAND_MAX - r_max; for (j=0; j<d; j++) r2 += y[k*d+j] * y[k*d+j]; if (r2 >= r_max * r_max) continue; k++; } printf("%d %d %d\n", d, L, M); for (j=0; j < L; j++) { for (t=0; t < d; t++) printf("%.16e ", x[d*j+t]); printf("\n"); } for (j=0; j < L; j++) printf("%.16e %.16e\n", creal(alpha[j]), cimag(alpha[j])); for (j=0; j < M; j++) { for (t=0; t < d; t++) printf("%.16e ", y[d*j+t]); printf("\n"); } nfft_free(x); nfft_free(y); nfft_free(alpha); }
/** test program for various parameters */ int main(int argc,char **argv) { int N; /**< mpolar FFT size NxN */ int T, R; /**< number of directions/offsets */ int M; /**< number of knots of mpolar grid */ double *x, *w; /**< knots and associated weights */ fftw_complex *f_hat, *f, *f_direct, *f_tilde; int k; int max_i; /**< number of iterations */ int m; double temp1, temp2, E_max=0.0; FILE *fp1, *fp2; char filename[30]; int logN; if( argc!=4 ) { printf("mpolar_fft_test N T R \n"); printf("\n"); printf("N mpolar FFT of size NxN \n"); printf("T number of slopes \n"); printf("R number of offsets \n"); /** Hence, comparison of the FFTW, mpolar FFT, and inverse mpolar FFT */ printf("\nHence, comparison FFTW, mpolar FFT and inverse mpolar FFT\n"); fp1=fopen("mpolar_comparison_fft.dat","w"); if (fp1==NULL) return(-1); for (logN=4; logN<=8; logN++) comparison_fft(fp1,(1U<< logN), 3*(1U<< logN), 3*(1U<< (logN-1))); fclose(fp1); exit(-1); } N = atoi(argv[1]); T = atoi(argv[2]); R = atoi(argv[3]); printf("N=%d, modified polar grid with T=%d, R=%d => ",N,T,R); x = (double *)nfft_malloc(5*T*R/2*(sizeof(double))); w = (double *)nfft_malloc(5*T*R/4*(sizeof(double))); f_hat = (fftw_complex *)nfft_malloc(sizeof(fftw_complex)*N*N); f = (fftw_complex *)nfft_malloc(sizeof(fftw_complex)*1.25*T*R); /* 4/pi*log(1+sqrt(2)) = 1.122... < 1.25 */ f_direct = (fftw_complex *)nfft_malloc(sizeof(fftw_complex)*1.25*T*R); f_tilde = (fftw_complex *)nfft_malloc(sizeof(fftw_complex)*N*N); /** generate knots of mpolar grid */ M=mpolar_grid(T,R,x,w); printf("M=%d.\n",M); /** load data */ fp1=fopen("input_data_r.dat","r"); fp2=fopen("input_data_i.dat","r"); if ((fp1==NULL) || (fp2==NULL)) return(-1); for(k=0;k<N*N;k++) { fscanf(fp1,"%le ",&temp1); fscanf(fp2,"%le ",&temp2); f_hat[k]=temp1+ _Complex_I*temp2; } fclose(fp1); fclose(fp2); /** direct mpolar FFT */ mpolar_dft(f_hat,N,f_direct,T,R,1); // mpolar_fft(f_hat,N,f_direct,T,R,12); /** Test of the mpolar FFT with different m */ printf("\nTest of the mpolar FFT: \n"); fp1=fopen("mpolar_fft_error.dat","w+"); for (m=1; m<=12; m++) { /** fast mpolar FFT */ mpolar_fft(f_hat,N,f,T,R,m); /** compute error of fast mpolar FFT */ E_max=X(error_l_infty_complex)(f_direct,f,M); printf("m=%2d: E_max = %e\n",m,E_max); fprintf(fp1,"%e\n",E_max); } fclose(fp1); /** Test of the inverse mpolar FFT for different m in dependece of the iteration number*/ for (m=3; m<=9; m+=3) { printf("\nTest of the inverse mpolar FFT for m=%d: \n",m); sprintf(filename,"mpolar_ifft_error%d.dat",m); fp1=fopen(filename,"w+"); for (max_i=0; max_i<=20; max_i+=2) { /** inverse mpolar FFT */ inverse_mpolar_fft(f_direct,T,R,f_tilde,N,max_i,m); /** compute maximum relativ error */ E_max=X(error_l_infty_complex)(f_hat,f_tilde,N*N); printf("%3d iterations: E_max = %e\n",max_i,E_max); fprintf(fp1,"%e\n",E_max); } fclose(fp1); } /** free the variables */ nfft_free(x); nfft_free(w); nfft_free(f_hat); nfft_free(f); nfft_free(f_direct); nfft_free(f_tilde); return 0; }