static void construct(char * file, int N, int M, int Z) { int j,k,l; /* some variables */ double real; nfft_plan my_plan; /* plan for the three dimensional nfft */ FILE* fp,*fk; int my_N[3],my_n[3]; /* to init the nfft */ /* initialise my_plan */ //nfft_init_3d(&my_plan,Z,N,N,M); my_N[0]=Z; my_n[0]=ceil(Z*1.2); my_N[1]=N; my_n[1]=ceil(N*1.2); my_N[2]=N; my_n[2]=ceil(N*1.2); nfft_init_guru(&my_plan, 3, my_N, M, my_n, 6, PRE_PHI_HUT| PRE_PSI |MALLOC_X| MALLOC_F_HAT| MALLOC_F| FFTW_INIT| FFT_OUT_OF_PLACE, FFTW_MEASURE| FFTW_DESTROY_INPUT); fp=fopen("knots.dat","r"); for(j=0;j<M;j++) fscanf(fp,"%le %le %le",&my_plan.x[3*(j)+1], &my_plan.x[3*(j)+2],&my_plan.x[3*(j)+0]); fclose(fp); fp=fopen("input_f.dat","r"); fk=fopen(file,"w"); for(l=0;l<Z;l++) { for(j=0;j<N;j++) { for(k=0;k<N;k++) { //fscanf(fp,"%le ",&my_plan.f_hat[(N*N*(Z-l)+N*j+k+N*N*Z/2)%(N*N*Z)][0]); fscanf(fp,"%le ",&real); my_plan.f_hat[(N*N*l+N*j+k)] = real; } } } if(my_plan.nfft_flags & PRE_PSI) nfft_precompute_psi(&my_plan); nfft_trafo(&my_plan); for(j=0;j<my_plan.M_total;j++) fprintf(fk,"%le %le %le %le %le\n",my_plan.x[3*j+1], my_plan.x[3*j+2],my_plan.x[3*j+0],creal(my_plan.f[j]),cimag(my_plan.f[j])); fclose(fk); fclose(fp); nfft_finalize(&my_plan); }
static void simple_test_nfft_1d(void) { nfft_plan p; double t; int N=14; int M=19; ticks t0, t1; /** init an one dimensional plan */ nfft_init_1d(&p,N,M); /** init pseudo random nodes */ nfft_vrand_shifted_unit_double(p.x,p.M_total); /** precompute psi, the entries of the matrix B */ if(p.nfft_flags & PRE_ONE_PSI) nfft_precompute_one_psi(&p); /** init pseudo random Fourier coefficients and show them */ nfft_vrand_unit_complex(p.f_hat,p.N_total); nfft_vpr_complex(p.f_hat,p.N_total,"given Fourier coefficients, vector f_hat"); /** direct trafo and show the result */ t0 = getticks(); nfft_trafo_direct(&p); t1 = getticks(); t = nfft_elapsed_seconds(t1,t0); nfft_vpr_complex(p.f,p.M_total,"ndft, vector f"); printf(" took %e seconds.\n",t); /** approx. trafo and show the result */ nfft_trafo(&p); nfft_vpr_complex(p.f,p.M_total,"nfft, vector f"); /** approx. adjoint and show the result */ nfft_adjoint_direct(&p); nfft_vpr_complex(p.f_hat,p.N_total,"adjoint ndft, vector f_hat"); /** approx. adjoint and show the result */ nfft_adjoint(&p); nfft_vpr_complex(p.f_hat,p.N_total,"adjoint nfft, vector f_hat"); /** finalise the one dimensional plan */ nfft_finalize(&p); }
/** * construct makes an 2d-nfft for every slice */ static void construct(char * file, int N, int M, int Z, fftw_complex *mem) { int j,z; /* some variables */ double tmp; /* a placeholder */ nfft_plan my_plan; /* plan for the two dimensional nfft */ FILE* fp; /* initialise my_plan */ nfft_init_2d(&my_plan,N,N,M/Z); fp=fopen("knots.dat","r"); for(j=0;j<my_plan.M_total;j++) { fscanf(fp,"%le %le %le",&my_plan.x[2*j+0],&my_plan.x[2*j+1],&tmp); } fclose(fp); fp=fopen(file,"w"); for(z=0;z<Z;z++) { tmp = (double) z; for(j=0;j<N*N;j++) my_plan.f_hat[j] = mem[(z*N*N+N*N*Z/2+j)%(N*N*Z)]; if(my_plan.flags & PRE_PSI) nfft_precompute_psi(&my_plan); nfft_trafo(&my_plan); for(j=0;j<my_plan.M_total;j++) { fprintf(fp,"%le %le %le %le %le\n",my_plan.x[2*j+0],my_plan.x[2*j+1],tmp/Z-0.5, creal(my_plan.f[j]),cimag(my_plan.f[j])); } } fclose(fp); nfft_finalize(&my_plan); }
/** * Executes the fast Gauss transform. * * \arg ths The pointer to a fgt plan * * \author Stefan Kunis */ void fgt_trafo(fgt_plan *ths) { int l; if(ths->flags & FGT_NDFT) { nfft_adjoint_direct(ths->nplan1); for(l=0; l<ths->n; l++) ths->nplan1->f_hat[l] *= ths->b[l]; nfft_trafo_direct(ths->nplan2); } else { nfft_adjoint(ths->nplan1); for(l=0; l<ths->n; l++) ths->nplan1->f_hat[l] *= ths->b[l]; nfft_trafo(ths->nplan2); } }
void // frequency to space mad_cmat_infft (const cnum_t x[], const num_t r_node[], cnum_t r[], ssz_t m, ssz_t n, ssz_t nx) { assert( x && r ); int precomp = 0; if (m != p_n1 || n != p_n2 || nx != p_m) { nfft_finalize(&p); nfft_init_2d (&p, m, n, nx); p_n1 = m, p_n2 = n, p_m = nx, precomp = 1; } if (r_node || precomp) { for (ssz_t i=0; i < m*n; i++) // forward transform needs -r_node p.x[i] = r_node[i] == -0.5 ? 0.4999999999999999 : -r_node[i]; if(p.flags & PRE_ONE_PSI) nfft_precompute_one_psi(&p); } // mad_cvec_copy(x, p.f_hat, nx); mad_cvec_copy(x+nx/2, p.f_hat, nx/2); // for compatibility with FFTW ?? (TBC) mad_cvec_copy(x, p.f_hat+nx/2, nx/2); const char *error_str = nfft_check(&p); if (error_str) error("%s", error_str); nfft_trafo(&p); // nfft_trafo_direct(&p); mad_cvec_copy(p.f, r, m*n); mad_cvec_muln(r, 1.0/(m*n), r, m*n); }
/** Reconstruction routine with cross validation */ static void glacier_cv(int N,int M,int M_cv,unsigned solver_flags) { int j,k,k0,k1,l,my_N[2],my_n[2]; double tmp_y,r; nfft_plan p,cp; solver_plan_complex ip; double _Complex* cp_y; FILE* fp; int M_re=M-M_cv; /* initialise p for reconstruction */ my_N[0]=N; my_n[0]=X(next_power_of_2)(N); my_N[1]=N; my_n[1]=X(next_power_of_2)(N); nfft_init_guru(&p, 2, my_N, M_re, my_n, 6, PRE_PHI_HUT| PRE_FULL_PSI| MALLOC_X| MALLOC_F_HAT| MALLOC_F| FFTW_INIT| FFT_OUT_OF_PLACE, FFTW_MEASURE| FFTW_DESTROY_INPUT); /* initialise ip, specific */ solver_init_advanced_complex(&ip,(nfft_mv_plan_complex*)(&p), solver_flags); /* initialise cp for validation */ cp_y = (double _Complex*) nfft_malloc(M*sizeof(double _Complex)); nfft_init_guru(&cp, 2, my_N, M, my_n, 6, PRE_PHI_HUT| PRE_FULL_PSI| MALLOC_X| MALLOC_F| FFTW_INIT| FFT_OUT_OF_PLACE, FFTW_MEASURE| FFTW_DESTROY_INPUT); cp.f_hat=ip.f_hat_iter; /* set up data in cp and cp_y */ fp=fopen("input_data.dat","r"); for(j=0;j<cp.M_total;j++) { fscanf(fp,"%le %le %le",&cp.x[2*j+0],&cp.x[2*j+1],&tmp_y); cp_y[j]=tmp_y; } fclose(fp); /* copy part of the data to p and ip */ for(j=0;j<p.M_total;j++) { p.x[2*j+0]=cp.x[2*j+0]; p.x[2*j+1]=cp.x[2*j+1]; ip.y[j]=tmp_y; } /* precompute psi */ if(p.nfft_flags & PRE_ONE_PSI) nfft_precompute_one_psi(&p); /* precompute psi */ if(cp.nfft_flags & PRE_ONE_PSI) nfft_precompute_one_psi(&cp); /* initialise damping factors */ if(ip.flags & PRECOMPUTE_DAMP) for(k0=0;k0<p.N[0];k0++) for(k1=0;k1<p.N[1];k1++) ip.w_hat[k0*p.N[1]+k1]= my_weight(((double)(k0-p.N[0]/2))/p.N[0],0.5,3,0.001)* my_weight(((double)(k1-p.N[1]/2))/p.N[1],0.5,3,0.001); /* init some guess */ for(k=0;k<p.N_total;k++) ip.f_hat_iter[k]=0; /* inverse trafo */ solver_before_loop_complex(&ip); // fprintf(stderr,"iteration starts,\t"); for(l=0;l<40;l++) solver_loop_one_step_complex(&ip); //fprintf(stderr,"r=%1.2e, ",sqrt(ip.dot_r_iter)/M_re); NFFT_SWAP_complex(p.f_hat,ip.f_hat_iter); nfft_trafo(&p); NFFT_SWAP_complex(p.f_hat,ip.f_hat_iter); nfft_upd_axpy_complex(p.f,-1,ip.y,M_re); r=sqrt(nfft_dot_complex(p.f,M_re)/nfft_dot_complex(cp_y,M)); fprintf(stderr,"r=%1.2e, ",r); printf("$%1.1e$ & ",r); nfft_trafo(&cp); nfft_upd_axpy_complex(&cp.f[M_re],-1,&cp_y[M_re],M_cv); r=sqrt(nfft_dot_complex(&cp.f[M_re],M_cv)/nfft_dot_complex(cp_y,M)); fprintf(stderr,"r_1=%1.2e\t",r); printf("$%1.1e$ & ",r); nfft_finalize(&cp); solver_finalize_complex(&ip); nfft_finalize(&p); }
/** fast NFFT-based summation */ void fastsum_trafo(fastsum_plan *ths) { int j,k,t; ticks t0, t1; ths->MEASURE_TIME_t[4] = 0.0; ths->MEASURE_TIME_t[5] = 0.0; ths->MEASURE_TIME_t[6] = 0.0; ths->MEASURE_TIME_t[7] = 0.0; #ifdef MEASURE_TIME t0 = getticks(); #endif /** first step of algorithm */ nfft_adjoint(&(ths->mv1)); #ifdef MEASURE_TIME t1 = getticks(); ths->MEASURE_TIME_t[4] += nfft_elapsed_seconds(t1,t0); #endif #ifdef MEASURE_TIME t0 = getticks(); #endif /** second step of algorithm */ #pragma omp parallel for default(shared) private(k) for (k=0; k<ths->mv2.N_total; k++) ths->mv2.f_hat[k] = ths->b[k] * ths->mv1.f_hat[k]; #ifdef MEASURE_TIME t1 = getticks(); ths->MEASURE_TIME_t[5] += nfft_elapsed_seconds(t1,t0); #endif #ifdef MEASURE_TIME t0 = getticks(); #endif /** third step of algorithm */ nfft_trafo(&(ths->mv2)); #ifdef MEASURE_TIME t1 = getticks(); ths->MEASURE_TIME_t[6] += nfft_elapsed_seconds(t1,t0); #endif #ifdef MEASURE_TIME t0 = getticks(); #endif /** add near field */ #pragma omp parallel for default(shared) private(j,k,t) for (j=0; j<ths->M_total; j++) { double ymin[ths->d], ymax[ths->d]; /** limits for d-dimensional near field box */ if (ths->flags & NEARFIELD_BOXES) { ths->f[j] = ths->mv2.f[j] + SearchBox(ths->y + ths->d*j, ths); } else { for (t=0; t<ths->d; t++) { ymin[t] = ths->y[ths->d*j+t] - ths->eps_I; ymax[t] = ths->y[ths->d*j+t] + ths->eps_I; } ths->f[j] = ths->mv2.f[j] + SearchTree(ths->d,0, ths->x, ths->alpha, ymin, ymax, ths->N_total, ths->k, ths->kernel_param, ths->Ad, ths->Add, ths->p, ths->flags); } /* ths->f[j] = ths->mv2.f[j]; */ /* ths->f[j] = SearchTree(ths->d,0, ths->x, ths->alpha, ymin, ymax, ths->N_total, ths->k, ths->kernel_param, ths->Ad, ths->Add, ths->p, ths->flags); */ } #ifdef MEASURE_TIME t1 = getticks(); ths->MEASURE_TIME_t[7] += nfft_elapsed_seconds(t1,t0); #endif }
void FC_FUNC(oct_nfft_trafo,OCT_NFFT_TRAFO) (nfft_plan *ths) { nfft_trafo (ths); }
static void simple_test_nfft_2d(void) { int K,N[2],n[2],M; double t; ticks t0, t1; nfft_plan p; N[0]=32; n[0]=64; N[1]=14; n[1]=32; M=N[0]*N[1]; K=16; t0 = getticks(); /** init a two dimensional plan */ nfft_init_guru(&p, 2, N, M, n, 7, PRE_PHI_HUT| PRE_FULL_PSI| MALLOC_F_HAT| MALLOC_X| MALLOC_F | FFTW_INIT| FFT_OUT_OF_PLACE, FFTW_ESTIMATE| FFTW_DESTROY_INPUT); /** init pseudo random nodes */ nfft_vrand_shifted_unit_double(p.x,p.d*p.M_total); /** precompute psi, the entries of the matrix B */ if(p.nfft_flags & PRE_ONE_PSI) nfft_precompute_one_psi(&p); /** init pseudo random Fourier coefficients and show them */ nfft_vrand_unit_complex(p.f_hat,p.N_total); t1 = getticks(); t = nfft_elapsed_seconds(t1,t0); nfft_vpr_complex(p.f_hat,K, "given Fourier coefficients, vector f_hat (first few entries)"); printf(" ... initialisation took %e seconds.\n",t); /** direct trafo and show the result */ t0 = getticks(); nfft_trafo_direct(&p); t1 = getticks(); t = nfft_elapsed_seconds(t1,t0); nfft_vpr_complex(p.f,K,"ndft, vector f (first few entries)"); printf(" took %e seconds.\n",t); /** approx. trafo and show the result */ t0 = getticks(); nfft_trafo(&p); t1 = getticks(); t = nfft_elapsed_seconds(t1,t0); nfft_vpr_complex(p.f,K,"nfft, vector f (first few entries)"); printf(" took %e seconds.\n",t); /** direct adjoint and show the result */ t0 = getticks(); nfft_adjoint_direct(&p); t1 = getticks(); t = nfft_elapsed_seconds(t1,t0); nfft_vpr_complex(p.f_hat,K,"adjoint ndft, vector f_hat (first few entries)"); printf(" took %e seconds.\n",t); /** approx. adjoint and show the result */ t0 = getticks(); nfft_adjoint(&p); t1 = getticks(); t = nfft_elapsed_seconds(t1,t0); nfft_vpr_complex(p.f_hat,K,"adjoint nfft, vector f_hat (first few entries)"); printf(" took %e seconds.\n",t); /** finalise the two dimensional plan */ nfft_finalize(&p); }
/** NFFT-based mpolar FFT */ static int mpolar_fft(fftw_complex *f_hat, int NN, fftw_complex *f, int T, int R, int m) { ticks t0, t1; int j,k; /**< index for nodes and freqencies */ nfft_plan my_nfft_plan; /**< plan for the nfft-2D */ double *x, *w; /**< knots and associated weights */ int N[2],n[2]; int M; /**< number of knots */ N[0]=NN; n[0]=2*N[0]; /**< oversampling factor sigma=2 */ N[1]=NN; n[1]=2*N[1]; /**< oversampling factor sigma=2 */ x = (double *)nfft_malloc(5*T*R/2*(sizeof(double))); if (x==NULL) return -1; w = (double *)nfft_malloc(5*T*R/4*(sizeof(double))); if (w==NULL) return -1; /** init two dimensional NFFT plan */ M=mpolar_grid(T,R,x,w); nfft_init_guru(&my_nfft_plan, 2, N, M, n, m, PRE_PHI_HUT| PRE_PSI| MALLOC_X | MALLOC_F_HAT| MALLOC_F| FFTW_INIT | FFT_OUT_OF_PLACE, FFTW_MEASURE| FFTW_DESTROY_INPUT); /** init nodes from mpolar grid*/ for(j=0;j<my_nfft_plan.M_total;j++) { my_nfft_plan.x[2*j+0] = x[2*j+0]; my_nfft_plan.x[2*j+1] = x[2*j+1]; } /** precompute psi, the entries of the matrix B */ if(my_nfft_plan.nfft_flags & PRE_LIN_PSI) nfft_precompute_lin_psi(&my_nfft_plan); if(my_nfft_plan.nfft_flags & PRE_PSI) nfft_precompute_psi(&my_nfft_plan); if(my_nfft_plan.nfft_flags & PRE_FULL_PSI) nfft_precompute_full_psi(&my_nfft_plan); /** init Fourier coefficients from given image */ for(k=0;k<my_nfft_plan.N_total;k++) my_nfft_plan.f_hat[k] = f_hat[k]; t0 = getticks(); /** NFFT-2D */ nfft_trafo(&my_nfft_plan); t1 = getticks(); GLOBAL_elapsed_time = nfft_elapsed_seconds(t1,t0); /** copy result */ for(j=0;j<my_nfft_plan.M_total;j++) f[j] = my_nfft_plan.f[j]; /** finalise the plans and free the variables */ nfft_finalize(&my_nfft_plan); nfft_free(x); nfft_free(w); return EXIT_SUCCESS; }
static PyObject *nfft(PyObject *self, PyObject *args, PyObject *kwargs) { PyObject *in_obj, *coord_obj; static char *kwlist[] = {"real_space", "coordinates", NULL}; if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO", kwlist, &in_obj, &coord_obj)) { return NULL; } PyObject *coord_array = PyArray_FROM_OTF(coord_obj, NPY_DOUBLE, NPY_IN_ARRAY); PyObject *in_array = PyArray_FROM_OTF(in_obj, NPY_COMPLEX128, NPY_IN_ARRAY); if (coord_array == NULL || in_array == NULL) { Py_XDECREF(coord_array); Py_XDECREF(in_array); return NULL; } int ndim = PyArray_NDIM(in_array); if (ndim <= 0) { PyErr_SetString(PyExc_ValueError, "Input array can't be 0 dimensional\n"); return NULL; } if ((PyArray_NDIM(coord_array) != 2 || PyArray_DIM(coord_array, 1) != ndim) && (ndim != 1 || PyArray_NDIM(coord_array) != 1)) { PyErr_SetString(PyExc_ValueError, "Coordinates must be given as array of dimensions [NUMBER_OF_POINTS, NUMBER_OF_DIMENSIONS] of [NUMBER_OF_POINTS for 1D transforms.\n"); Py_XDECREF(coord_array); Py_XDECREF(in_array); return NULL; } int number_of_points = (int) PyArray_DIM(coord_array, 0); nfft_plan my_plan; int total_number_of_pixels = 1; int dims[ndim]; int dim; for (dim = 0; dim < ndim; ++dim) { dims[dim] = (int)PyArray_DIM(in_array, dim); total_number_of_pixels *= dims[dim]; } #if defined(ENABLE_THREADS) printf("OMP_NUM_THREADS=%s\n",getenv("OMP_NUM_THREADS")); printf("nthreads = %d\n", nfft_get_num_threads()); fftw_init_threads(); #endif nfft_init(&my_plan, ndim, dims, number_of_points); memcpy(my_plan.f_hat, PyArray_DATA(in_array), total_number_of_pixels*sizeof(fftw_complex)); memcpy(my_plan.x, PyArray_DATA(coord_array), ndim*number_of_points*sizeof(double)); if (my_plan.nfft_flags &PRE_PSI) { nfft_precompute_one_psi(&my_plan); } nfft_trafo(&my_plan); int out_dim[] = {number_of_points}; PyObject *out_array = (PyObject *)PyArray_FromDims(1, out_dim, NPY_COMPLEX128); memcpy(PyArray_DATA(out_array), my_plan.f, number_of_points*sizeof(fftw_complex)); // Clean up memory nfft_finalize(&my_plan); #if defined(ENABLE_THREADS) fftw_cleanup_threads(); #endif Py_XDECREF(coord_array); Py_XDECREF(in_array); return out_array; }
void bench_openmp(FILE *infile, int m, int psi_flag) { nfft_plan p; int *N; int *n; int M, d, trafo_adjoint; int t, j; double re,im; ticks t0, t1; double tt_total, tt_preonepsi; fscanf(infile, "%d %d", &d, &trafo_adjoint); N = malloc(d*sizeof(int)); n = malloc(d*sizeof(int)); for (t=0; t<d; t++) fscanf(infile, "%d", N+t); for (t=0; t<d; t++) fscanf(infile, "%d", n+t); fscanf(infile, "%d", &M); #ifdef _OPENMP fftw_import_wisdom_from_filename("nfft_benchomp_detail_threads.plan"); #else fftw_import_wisdom_from_filename("nfft_benchomp_detail_single.plan"); #endif /** init an d-dimensional plan */ nfft_init_guru(&p, d, N, M, n, m, PRE_PHI_HUT| psi_flag | MALLOC_X | MALLOC_F_HAT| MALLOC_F| FFTW_INIT | FFT_OUT_OF_PLACE, FFTW_MEASURE| FFTW_DESTROY_INPUT); #ifdef _OPENMP fftw_export_wisdom_to_filename("nfft_benchomp_detail_threads.plan"); #else fftw_export_wisdom_to_filename("nfft_benchomp_detail_single.plan"); #endif for (j=0; j < p.M_total; j++) { for (t=0; t < p.d; t++) fscanf(infile, "%lg", p.x+p.d*j+t); } if (trafo_adjoint==0) { for (j=0; j < p.N_total; j++) { fscanf(infile, "%lg %lg", &re, &im); p.f_hat[j] = re + _Complex_I * im; } } else { for (j=0; j < p.M_total; j++) { fscanf(infile, "%lg %lg", &re, &im); p.f[j] = re + _Complex_I * im; } } t0 = getticks(); /** precompute psi, the entries of the matrix B */ if(p.nfft_flags & PRE_ONE_PSI) nfft_precompute_one_psi(&p); t1 = getticks(); tt_preonepsi = nfft_elapsed_seconds(t1,t0); if (trafo_adjoint==0) nfft_trafo(&p); else nfft_adjoint(&p); t1 = getticks(); tt_total = nfft_elapsed_seconds(t1,t0); #ifndef MEASURE_TIME p.MEASURE_TIME_t[0] = 0.0; p.MEASURE_TIME_t[2] = 0.0; #endif #ifndef MEASURE_TIME_FFTW p.MEASURE_TIME_t[1] = 0.0; #endif printf("%.6e %.6e %6e %.6e %.6e %.6e\n", tt_preonepsi, p.MEASURE_TIME_t[0], p.MEASURE_TIME_t[1], p.MEASURE_TIME_t[2], tt_total-tt_preonepsi-p.MEASURE_TIME_t[0]-p.MEASURE_TIME_t[1]-p.MEASURE_TIME_t[2], tt_total); // printf("%.6e\n", tt); free(N); free(n); /** finalise the one dimensional plan */ nfft_finalize(&p); }