void cSystem::startNthreadsFFTW(void) { require( fftw_init_threads() != 0, "void cSystem::startNthreadsFFTW(void)"); require(fftwf_init_threads() != 0, "void cSystem::startNthreadsFFTW(void)"); fftw_plan_with_nthreads(getNumProcessors()); fftwf_plan_with_nthreads(getNumProcessors()); std::cout << "FFTW multithreading is turned on: " << getNumProcessors() << " threads\n\n"; }
int main(){ int nthreads = 4; omp_set_num_threads(nthreads); #pragma omp parallel fprintf(stderr,"nthreads %d \n", omp_get_num_threads()); int n3 = 128; int n2 = 128; int n1 = 128; // float ***array = sf_floatalloc3(n1,n2,n3); float *array = fftwf_alloc_real(n3*n2*n1); fftwf_complex* cout = fftwf_alloc_complex(n3*n2*n1); int err = fftwf_init_threads(); if (err == 0) { fprintf(stderr,"something went wrong with fftw\n"); } fprintf(stderr,"Got here\n"); double start,end; start = omp_get_wtime()*omp_get_wtick(); fftwf_plan_with_nthreads(nthreads); fftwf_plan plan = fftwf_plan_dft_r2c_3d( n1,n2,n3, array,cout, FFTW_MEASURE); end = omp_get_wtime()*omp_get_wtick(); fprintf(stderr,"elapsed time: %f %f %f\n",end,start,end-start); for(int i = 0; i < n3*n2*n1; ++i) array[i] = rand()/RAND_MAX; //float start = clock()/CLOCKS_PER_SEC; start = omp_get_wtime(); for(int i=0; i < 1001; ++i) fftwf_execute(plan); //float end = clock()/CLOCKS_PER_SEC; end = omp_get_wtime(); fprintf(stderr,"elapsed time: %f time/calc %f\n", end-start,(end-start)/100.0); fftwf_cleanup_threads(); fftwf_cleanup(); fftwf_destroy_plan(plan); fftwf_free(cout); fftwf_free(array); //free(**array); free(*array); free(array); return 0; }
int mcfft3_init(int pad1 /* padding on the first axis */, int nx, int ny, int nz /* input data size */, int *nx2, int *ny2, int *nz2 /* padded data size */, int *n_local, int *o_local /* local size & start */) /*< initialize >*/ { int cpuid; MPI_Comm_rank(MPI_COMM_WORLD, &cpuid); if (threads_ok) threads_ok = fftwf_init_threads(); fftwf_mpi_init(); if (false) sf_warning("Using threaded FFTW3! \n"); if (threads_ok) fftwf_plan_with_nthreads(omp_get_max_threads()); /* axis 1 */ nk = n1 = kiss_fft_next_fast_size(nx*pad1); /* axis 2 */ n2 = kiss_fft_next_fast_size(ny); /* axis 3 */ n3 = kiss_fft_next_fast_size(nz); alloc_local = fftwf_mpi_local_size_3d(n3, n2, n1, MPI_COMM_WORLD, &local_n0, &local_0_start); //cc = sf_complexalloc3(n1,n2,n3); cc = sf_complexalloc(alloc_local); cfg = fftwf_mpi_plan_dft_3d(n3,n2,n1, (fftwf_complex *) cc, (fftwf_complex *) cc, MPI_COMM_WORLD, FFTW_FORWARD, FFTW_MEASURE); icfg = fftwf_mpi_plan_dft_3d(n3,n2,n1, (fftwf_complex *) cc, (fftwf_complex *) cc, MPI_COMM_WORLD, FFTW_BACKWARD, FFTW_MEASURE); if (NULL == cfg || NULL == icfg) sf_error("FFTW failure."); *nx2 = n1; *ny2 = n2; *nz2 = n3; *n_local = (int) local_n0; *o_local = (int) local_0_start; wt = 1.0/(n3*n2*n1); return (nk*n2*n3); }
int cfft2_init(int pad1 /* padding on the first axis */, int nx, int ny /* input data size */, int *nx2, int *ny2 /* padded data size */) /*< initialize >*/ { #ifdef SF_HAS_FFTW #ifdef _OPENMP fftwf_init_threads(); sf_warning("Using threaded FFTW3! \n"); fftwf_plan_with_nthreads(omp_get_max_threads()); #endif #endif #ifndef SF_HAS_FFTW int i2; #endif nk = n1 = kiss_fft_next_fast_size(nx*pad1); #ifndef SF_HAS_FFTW cfg1 = kiss_fft_alloc(n1,0,NULL,NULL); icfg1 = kiss_fft_alloc(n1,1,NULL,NULL); #endif n2 = kiss_fft_next_fast_size(ny); cc = sf_complexalloc2(n1,n2); dd = sf_complexalloc2(nk,n2); #ifndef SF_HAS_FFTW cfg2 = kiss_fft_alloc(n2,0,NULL,NULL); icfg2 = kiss_fft_alloc(n2,1,NULL,NULL); tmp = (kiss_fft_cpx **) sf_alloc(n2,sizeof(*tmp)); tmp[0] = (kiss_fft_cpx *) sf_alloc(nk*n2,sizeof(kiss_fft_cpx)); for (i2=0; i2 < n2; i2++) { tmp[i2] = tmp[0]+i2*nk; } trace2 = sf_complexalloc(n2); ctrace2 = (kiss_fft_cpx *) trace2; #endif *nx2 = n1; *ny2 = n2; wt = 1.0/(n1*n2); return (nk*n2); }
int cfft2_init(int pad1 /* padding on the first axis */, int nx, int ny /* input data size */, int *nx2, int *ny2 /* padded data size */, int *n_local, int *o_local /* local size & start */, MPI_Comm comm) /*< initialize >*/ { if (threads_ok) threads_ok = fftwf_init_threads(); fftwf_mpi_init(); if (false) sf_warning("Using threaded FFTW3! \n"); if (threads_ok) fftwf_plan_with_nthreads(omp_get_max_threads()); nk = n1 = kiss_fft_next_fast_size(nx*pad1); n2 = kiss_fft_next_fast_size(ny); alloc_local = fftwf_mpi_local_size_2d(n2, n1, comm, &local_n0, &local_0_start); //cc = sf_complexalloc2(n1,n2); //dd = sf_complexalloc2(nk,n2); cc = sf_complexalloc(alloc_local); dd = sf_complexalloc(alloc_local); cfg = fftwf_mpi_plan_dft_2d(n2,n1, (fftwf_complex *) cc, (fftwf_complex *) dd, comm, FFTW_FORWARD, FFTW_MEASURE); icfg = fftwf_mpi_plan_dft_2d(n2,n1, (fftwf_complex *) dd, (fftwf_complex *) cc, comm, FFTW_BACKWARD, FFTW_MEASURE); if (NULL == cfg || NULL == icfg) sf_error("FFTW failure."); *nx2 = n1; *ny2 = n2; *n_local = (int) local_n0; *o_local = (int) local_0_start; wt = 1.0/(n1*n2); return (nk*n2); }
void fft_set_num_threads(unsigned int n) { #ifdef FFTWTHREADS #pragma omp critical if (!fft_threads_init) { fft_threads_init = true; fftwf_init_threads(); } #pragma omp critical fftwf_plan_with_nthreads(n); #else UNUSED(n); #endif }
void Fft::prepareFft(){ fftwf_init_threads(); fftwf_plan_with_nthreads(FFT_THREADS); }
int fft2_init(bool cmplx1 /* if complex transform */, int pad1 /* padding on the first axis */, int nx, int ny /* input data size */, int *nx2, int *ny2 /* padded data size */) /*< initialize >*/ { #ifdef SF_HAS_FFTW #ifdef _OPENMP fftwf_init_threads(); sf_warning("Using threaded FFTW3!\n"); fftwf_plan_with_nthreads(omp_get_max_threads()); #endif #else int i2; #endif cmplx = cmplx1; if (cmplx) { nk = n1 = kiss_fft_next_fast_size(nx*pad1); #ifndef SF_HAS_FFTW cfg1 = kiss_fft_alloc(n1,0,NULL,NULL); icfg1 = kiss_fft_alloc(n1,1,NULL,NULL); #endif } else { nk = kiss_fft_next_fast_size(pad1*(nx+1)/2)+1; n1 = 2*(nk-1); #ifndef SF_HAS_FFTW cfg = kiss_fftr_alloc(n1,0,NULL,NULL); icfg = kiss_fftr_alloc(n1,1,NULL,NULL); #endif } n2 = kiss_fft_next_fast_size(ny); if (cmplx) { cc = sf_complexalloc2(n1,n2); } else { ff = sf_floatalloc2(n1,n2); } dd = sf_complexalloc(nk*n2); #ifndef SF_HAS_FFTW cfg2 = kiss_fft_alloc(n2,0,NULL,NULL); icfg2 = kiss_fft_alloc(n2,1,NULL,NULL); tmp = (kiss_fft_cpx **) sf_alloc(n2,sizeof(*tmp)); tmp[0] = (kiss_fft_cpx *) sf_alloc(nk*n2,sizeof(kiss_fft_cpx)); for (i2=0; i2 < n2; i2++) { tmp[i2] = tmp[0]+i2*nk; } trace2 = sf_complexalloc(n2); ctrace2 = (kiss_fft_cpx *) trace2; #endif *nx2 = n1; *ny2 = n2; wt = 1.0/(n1*n2); return (nk*n2); }
int main (int argc, char *argv[]) { bool verb, snap; bool abc, adj; int nz, nx, nt, ns, nr; float dz, dx, dt, oz, ox; int nz0, nx0, nb; float oz0, ox0; int nkz, nkx; int nzpad, nxpad; float **u1, **u0; float *ws, *wr; sf_file file_src = NULL, file_rec = NULL; sf_file file_inp = NULL, file_out = NULL; sf_file file_mdl = NULL; sf_axis az = NULL, ax = NULL, at = NULL, as = NULL, ar = NULL; pt2d *src2d = NULL; pt2d *rec2d = NULL; scoef2d cssinc = NULL; scoef2d crsinc = NULL; float *wi = NULL, *wo = NULL; sf_axis ai = NULL, ao = NULL; scoef2d cisinc = NULL, cosinc = NULL; bool spt = false, rpt = false; bool ipt = false, opt = false; sf_init(argc, argv); if (!sf_getbool("verb", &verb)) verb = false; if (!sf_getbool("snap", &snap)) snap = false; if (!sf_getbool("adj", &adj)) adj = false; if (!sf_getint("nb", &nb)) nb = 4; if (sf_getstring("sou") != NULL) { spt = true; if (adj) opt = true; else ipt = true; } if (sf_getstring("rec") != NULL) { rpt = true; if (adj) ipt = true; else opt = true; } file_inp = sf_input("in"); file_mdl = sf_input("model"); if (spt) file_src = sf_input("sou"); if (rpt) file_rec = sf_input("rec"); file_out = sf_output("out"); if (ipt) at = sf_iaxa(file_inp, 2); else at = sf_iaxa(file_inp, 3); if (spt) as = sf_iaxa(file_src, 2); if (rpt) ar = sf_iaxa(file_rec, 2); az = sf_iaxa(file_mdl, 1); ax = sf_iaxa(file_mdl, 2); nt = sf_n(at); dt = sf_d(at); //ot = sf_o(at); nz0 = sf_n(az); dz = sf_d(az); oz0 = sf_o(az); nx0 = sf_n(ax); dx = sf_d(ax); ox0 = sf_o(ax); if (spt) ns = sf_n(as); if (rpt) nr = sf_n(ar); nz = nz0 + 2 * nb; nx = nx0 + 2 * nb; oz = oz0 - nb * dz; ox = ox0 - nb * dx; abc = nb ? true : false; // sf_error("ox=%f ox0=%f oz=%f oz0=%f",ox,ox0,oz,oz0); nzpad = kiss_fft_next_fast_size( ((nz+1)>>1)<<1 ); nkx = nxpad = kiss_fft_next_fast_size(nx); nkz = nzpad / 2 + 1; /* float okx = - 0.5f / dx; */ float okx = 0.f; float okz = 0.f; float dkx = 1.f / (nxpad * dx); float dkz = 1.f / (nzpad * dz); float **vp, **eps, **del; vp = sf_floatalloc2(nz, nx); eps = sf_floatalloc2(nz, nx); del = sf_floatalloc2(nz, nx); float **tmparray = sf_floatalloc2(nz0, nx0); sf_floatread(tmparray[0], nz0*nx0, file_mdl); expand2d(vp[0], tmparray[0], nz, nx, nz0, nx0); sf_floatread(tmparray[0], nz0*nx0, file_mdl); expand2d(eps[0], tmparray[0], nz, nx, nz0, nx0); sf_floatread(tmparray[0], nz0*nx0, file_mdl); expand2d(del[0], tmparray[0], nz, nx, nz0, nx0); float **vn, **vh; float **eta, **lin_eta; lin_eta = NULL, vh = NULL; vn = sf_floatalloc2(nz, nx); vh = sf_floatalloc2(nz, nx); eta = sf_floatalloc2(nz, nx); lin_eta = sf_floatalloc2(nz, nx); for (int ix=0; ix<nx; ix++) { for (int iz=0; iz<nz; iz++){ vp[ix][iz] *= vp[ix][iz]; vn[ix][iz] = vp[ix][iz] * (1.f + 2.f * del[ix][iz]); vh[ix][iz] = vp[ix][iz] * (1.f + 2.f * eps[ix][iz]); eta[ix][iz] = (eps[ix][iz] - del[ix][iz]) / (1.f + 2.f * del[ix][iz]); lin_eta[ix][iz] = eta[ix][iz] * (1.f + 2.f * del[ix][iz]); } } float *kx = sf_floatalloc(nkx); float *kz = sf_floatalloc(nkz); for (int ikx=0; ikx<nkx; ++ikx) { kx[ikx] = okx + ikx * dkx; /* if (ikx >= nkx/2) kx[ikx] = (nkx - ikx) * dkx; */ if (ikx >= nkx/2) kx[ikx] = (ikx - nkx) * dkx; kx[ikx] *= 2 * SF_PI; kx[ikx] *= kx[ikx]; } for (int ikz=0; ikz<nkz; ++ikz) { kz[ikz] = okz + ikz * dkz; kz[ikz] *= 2 * SF_PI; kz[ikz] *= kz[ikz]; } if (adj) { ai = ar; ao = as; } else { ai = as; ao = ar; } if (opt) { sf_oaxa(file_out, ao, 1); sf_oaxa(file_out, at, 2); } else { sf_oaxa(file_out, az, 1); sf_oaxa(file_out, ax, 2); sf_oaxa(file_out, at, 3); } sf_fileflush(file_out, NULL); if (spt) { src2d = pt2dalloc1(ns); pt2dread1(file_src, src2d, ns, 2); cssinc = sinc2d_make(ns, src2d, nz, nx, dz, dx, oz, ox); ws = sf_floatalloc(ns); if (adj) { cosinc = cssinc; wo = ws; } else { cisinc = cssinc; wi = ws; } } if (rpt) { rec2d = pt2dalloc1(nr); pt2dread1(file_rec, rec2d, nr, 2); crsinc = sinc2d_make(nr, rec2d, nz, nx, dz, dx, oz, ox); wr = sf_floatalloc(nr); if (adj) { cisinc = crsinc; wi = wr; } else { cosinc = crsinc; wo = wr; } } u0 = sf_floatalloc2(nz, nx); u1 = sf_floatalloc2(nz, nx); float *rwave = (float *) fftwf_malloc(nzpad*nxpad*sizeof(float)); float *rwavem = (float *) fftwf_malloc(nzpad*nxpad*sizeof(float)); fftwf_complex *cwave = (fftwf_complex *) fftwf_malloc(nkz*nkx*sizeof(fftwf_complex)); fftwf_complex *cwavem = (fftwf_complex *) fftwf_malloc(nkz*nkx*sizeof(fftwf_complex)); /* float *rwavem = (float *) fftwf_malloc(nzpad*nxpad*sizeof(float)); fftwf_complex *cwave = (fftwf_complex *) fftwf_malloc(nkz*nkx*sizeof(fftwf_complex)); fftwf_complex *cwavem = (fftwf_complex *) fftwf_malloc(nkz*nkx*sizeof(fftwf_complex)); */ /* boundary conditions */ float **ucut = NULL; float *damp = NULL; if (!(ipt &&opt)) ucut = sf_floatalloc2(nz0, nx0); damp = damp_make(nb); float wt = 1./(nxpad * nzpad); wt *= dt * dt; fftwf_plan forward_plan; fftwf_plan inverse_plan; #ifdef _OPENMP #ifdef SF_HAS_FFTW_OMP fftwf_init_threads(); fftwf_plan_with_nthreads(omp_get_max_threads()); #endif #endif forward_plan = fftwf_plan_dft_r2c_2d(nxpad, nzpad, rwave, cwave, FFTW_MEASURE); #ifdef _OPENMP #ifdef SF_HAS_FFTW_OMP fftwf_plan_with_nthreads(omp_get_max_threads()); #endif #endif inverse_plan = fftwf_plan_dft_c2r_2d(nxpad, nzpad, cwavem, rwavem, FFTW_MEASURE); int itb, ite, itc; if (adj) { itb = nt -1; ite = -1; itc = -1; } else { itb = 0; ite = nt; itc = 1; } if (adj) { for (int it=0; it<nt; it++) { if (opt) sf_floatwrite(wo, sf_n(ao), file_out); else sf_floatwrite(ucut[0], nz0*nx0, file_out); } sf_seek(file_out, 0, SEEK_SET); } float **ptrtmp = NULL; memset(u0[0], 0, sizeof(float)*nz*nx); memset(u1[0], 0, sizeof(float)*nz*nx); memset(rwave, 0, sizeof(float)*nzpad*nxpad); memset(rwavem, 0, sizeof(float)*nzpad*nxpad); memset(cwave, 0, sizeof(float)*nkz*nkx*2); memset(cwavem, 0, sizeof(float)*nkz*nkx*2); for (int it=itb; it!=ite; it+=itc) { if (verb) sf_warning("it = %d;",it); #ifdef _OPENMP double tic = omp_get_wtime(); #endif if (ipt) { if (adj) sf_seek(file_inp, (off_t)(it)*sizeof(float)*sf_n(ai), SEEK_SET); sf_floatread(wi, sf_n(ai), file_inp); for (int i=0; i<sf_n(ai); i++) wi[i] *= dt* dt; } else { if (adj) sf_seek(file_inp, (off_t)(it)*sizeof(float)*nz0*nx0, SEEK_SET); sf_floatread(ucut[0], nz0*nx0, file_inp); for (int j=0; j<nx0; j++) for (int i=0; i<nz0; i++) ucut[j][i] *= dt * dt; } /* apply absorbing boundary condition: E \times u@n-1 */ damp2d_apply(u0, damp, nz, nx, nb); fft_stepforward(u0, u1, rwave, rwavem, cwave, cwavem, vp, vn, eta, vh, eps, lin_eta, kz, kx, forward_plan, inverse_plan, nz, nx, nzpad, nxpad, nkz, nkx, wt, adj); // sinc2d_inject1(u0, ws[it][s_idx], cssinc[s_idx]); if (ipt) sinc2d_inject(u0, wi, cisinc); else wfld2d_inject(u0, ucut, nz0, nx0, nb); /* apply absorbing boundary condition: E \times u@n+1 */ damp2d_apply(u0, damp, nz, nx, nb); /* loop over pointers */ ptrtmp = u0; u0 = u1; u1 = ptrtmp; if (opt) { if (adj) sf_seek(file_out, (off_t)(it)*sizeof(float)*sf_n(ao),SEEK_SET); sinc2d_extract(u0, wo, cosinc); sf_floatwrite(wo, sf_n(ao), file_out); } else { if (adj) sf_seek(file_out, (off_t)(it)*sizeof(float)*nz0*nx0,SEEK_SET); wwin2d(ucut, u0, nz0, nx0, nb); sf_floatwrite(ucut[0], nz0*nx0, file_out); } #ifdef _OPENMP double toc = omp_get_wtime(); if (verb) fprintf(stderr," clock = %lf;", toc-tic); #endif } /* END OF TIME LOOP */ return 0; }
int cfft3_init(int pad1 /* padding on the first axis */, int nx, int ny, int nz /* input data size */, int *nx2, int *ny2, int *nz2 /* padded data size */) /*< initialize >*/ { #ifdef SF_HAS_FFTW #ifdef _OPENMP fftwf_init_threads(); sf_warning("Using threaded FFTW3! %d\n",omp_get_max_threads()); fftwf_plan_with_nthreads(omp_get_max_threads()); #endif #else int i2, i3; #endif /* axis 1 */ nk = n1 = kiss_fft_next_fast_size(nx*pad1); #ifndef SF_HAS_FFTW cfg1 = kiss_fft_alloc(n1,0,NULL,NULL); icfg1 = kiss_fft_alloc(n1,1,NULL,NULL); #endif /* axis 2 */ n2 = kiss_fft_next_fast_size(ny); #ifndef SF_HAS_FFTW cfg2 = kiss_fft_alloc(n2,0,NULL,NULL); icfg2 = kiss_fft_alloc(n2,1,NULL,NULL); trace2 = sf_complexalloc(n2); ctrace2 = (kiss_fft_cpx *) trace2; #endif /* axis 3 */ n3 = kiss_fft_next_fast_size(nz); #ifndef SF_HAS_FFTW cfg3 = kiss_fft_alloc(n3,0,NULL,NULL); icfg3 = kiss_fft_alloc(n3,1,NULL,NULL); trace3 = sf_complexalloc(n3); ctrace3 = (kiss_fft_cpx *) trace3; /* --- */ tmp = (kiss_fft_cpx***) sf_alloc (n3,sizeof(kiss_fft_cpx**)); tmp[0] = (kiss_fft_cpx**) sf_alloc (n2*n3,sizeof(kiss_fft_cpx*)); tmp[0][0] = (kiss_fft_cpx*) sf_alloc (nk*n2*n3,sizeof(kiss_fft_cpx)); for (i2=1; i2 < n2*n3; i2++) { tmp[0][i2] = tmp[0][0]+i2*nk; } for (i3=1; i3 < n3; i3++) { tmp[i3] = tmp[0]+i3*n2; } #endif cc = sf_complexalloc3(n1,n2,n3); *nx2 = n1; *ny2 = n2; *nz2 = n3; wt = 1.0/(n3*n2*n1); return (nk*n2*n3); }
int main(int argc, char** argv){ float tr[6]; const float ZAP=32; const uint64_t TSIZE=18; const uint64_t zapE=64; fftwf_init_threads(); fftwf_plan_with_nthreads(omp_get_max_threads()); logmsg("Open file '%s'",argv[1]); FILE* f = fopen(argv[1],"r"); int hdr_bytes = read_header(f); const uint64_t nskip = hdr_bytes; const uint64_t nchan = nchans; logmsg("Nchan=%"PRIu64", tsamp=%f",nchan,tsamp); mjk_rand_t *random = mjk_rand_init(12345); rewind(f); FILE* of = fopen("clean.fil","w"); uint8_t hdr[nskip]; fread(hdr,1,nskip,f); fwrite(hdr,1,nskip,of); const uint64_t nsamp_per_block=round(pow(2,TSIZE)); logmsg("Tblock = %f",nsamp_per_block*tsamp); mjk_clock_t *t_all = init_clock(); start_clock(t_all); mjk_clock_t *t_read = init_clock(); mjk_clock_t *t_trns= init_clock(); mjk_clock_t *t_rms = init_clock(); mjk_clock_t *t_fft = init_clock(); mjk_clock_t *t_spec = init_clock(); const uint64_t bytes_per_block = nchan*nsamp_per_block; uint8_t *buffer = calloc(bytes_per_block,1); float **data = malloc_2df(nchan,nsamp_per_block); float **clean = malloc_2df(nchan,nsamp_per_block); float *bpass = calloc(nchan,sizeof(float)); float *ch_var=NULL; float *ch_mean=NULL; float *ch_fft_n=NULL; float *ch_fft_p=NULL; logmsg("Planning FFT - this will take a long time the first time it is run!"); start_clock(t_fft); FILE * wisfile; if(wisfile=fopen("wisdom.txt","r")){ fftwf_import_wisdom_from_file(wisfile); fclose(wisfile); } const int fftX=nsamp_per_block; const int fftY=nchan; const int fftXo=nsamp_per_block/2+1; float *X = fftwf_malloc(sizeof(float)*fftX); for (uint64_t i = 0; i < nsamp_per_block ; i++){ X[i]=i; } float *tseries = fftwf_malloc(sizeof(float)*fftX); float complex *fseries = fftwf_malloc(sizeof(float complex)*fftXo); float *pseries = fftwf_malloc(sizeof(float)*fftXo); uint8_t *mask = malloc(sizeof(uint8_t)*fftXo); fftwf_plan fft_1d = fftwf_plan_dft_r2c_1d(fftX,tseries,fseries,FFTW_MEASURE|FFTW_DESTROY_INPUT); complex float * fftd = fftwf_malloc(sizeof(complex float)*(fftXo*fftY)); fftwf_plan fft_plan = fftwf_plan_many_dft_r2c( 1,&fftX,fftY, data[0] ,&fftX,1,fftX, fftd ,&fftXo,1,fftXo, FFTW_MEASURE|FFTW_PRESERVE_INPUT); logmsg("Planning iFFT - this will take a long time the first time it is run!"); fftwf_plan ifft_plan = fftwf_plan_many_dft_c2r( 1,&fftX,fftY, fftd ,&fftXo,1,fftXo, clean[0] ,&fftX,1,fftX, FFTW_MEASURE|FFTW_PRESERVE_INPUT); if(!fft_plan){ logmsg("Error - could not do FFT plan"); exit(2); } wisfile=fopen("wisdom.txt","w"); fftwf_export_wisdom_to_file(wisfile); fclose(wisfile); stop_clock(t_fft); logmsg("T(planFFT)= %.2lfs",read_clock(t_fft)); reset_clock(t_fft); float min_var=1e9; float max_var=0; float min_fft_n=1e9; float max_fft_n=0; float min_fft_p=1e9; float max_fft_p=0; float min_mean=1e9; float max_mean=0; uint64_t nblocks=0; uint64_t totread=0; while(!feof(f)){ nblocks++; ch_var = realloc(ch_var,nchan*nblocks*sizeof(float)); ch_mean = realloc(ch_mean,nchan*nblocks*sizeof(float)); ch_fft_n = realloc(ch_fft_n,nchan*nblocks*sizeof(float)); ch_fft_p = realloc(ch_fft_p,nchan*nblocks*sizeof(float)); start_clock(t_read); uint64_t read = fread(buffer,1,bytes_per_block,f); stop_clock(t_read); if (read!=bytes_per_block){ nblocks--; break; } totread+=read; logmsg("read=%"PRIu64" bytes. T=%fs",read,totread*tsamp/(float)nchan); uint64_t offset = (nblocks-1)*nchan; start_clock(t_trns); // transpose with small blocks in order to increase cache efficiency. #define BLK 8 #pragma omp parallel for schedule(static,2) shared(buffer,data) for (uint64_t j = 0; j < nchan ; j+=BLK){ for (uint64_t i = 0; i < nsamp_per_block ; i++){ for (uint64_t k = 0; k < BLK ; k++){ data[j+k][i] = buffer[i*nchan+j+k]; } } } #pragma omp parallel for shared(data) for (uint64_t j = 0; j < nchan ; j++){ if(j<zapE || (nchan-j) < zapE){ for (uint64_t i = 0; i < nsamp_per_block ; i++){ data[j][i]=ZAP; } } } if(nblocks==1){ #pragma omp parallel for shared(data,bpass) for (uint64_t j = 0; j < nchan ; j++){ for (uint64_t i = 0; i < nsamp_per_block ; i++){ bpass[j]+=data[j][i]; } bpass[j]/=(float)nsamp_per_block; bpass[j]-=ZAP; } } #pragma omp parallel for shared(data,bpass) for (uint64_t j = 0; j < nchan ; j++){ for (uint64_t i = 0; i < nsamp_per_block ; i++){ data[j][i]-=bpass[j]; } } stop_clock(t_trns); start_clock(t_rms); #pragma omp parallel for shared(data,ch_mean,ch_var) for (uint64_t j = 0; j < nchan ; j++){ float mean=0; for (uint64_t i = 0; i < nsamp_per_block ; i++){ mean+=data[j][i]; } mean/=(float)nsamp_per_block; if(mean > ZAP+5 || mean < ZAP-5){ logmsg("ZAP ch=%"PRIu64,j); for (uint64_t i = 0; i < nsamp_per_block ; i++){ data[j][i]=ZAP; } } float ss=0; float x=0; for (uint64_t i = 0; i < nsamp_per_block ; i++){ x = data[j][i]-mean; ss+=x*x; } float var=ss/(float)nsamp_per_block; if (var > 0){ for (uint64_t i = 0; i < nsamp_per_block ; i++){ float v = (data[j][i]-mean)/sqrt(var); if(v > 3 || v < -3){ data[j][i]=mjk_rand_gauss(random)*sqrt(var)+mean; } } } ch_var[offset+j] = var; ch_mean[offset+j] = mean; } stop_clock(t_rms); for (uint64_t i = 0; i < nsamp_per_block ; i++){ tseries[i]=0; } float tmean=0; float tvar=0; float max=0; float min=1e99; //#pragma omp parallel for shared(data,tseries) // NOT THREAD SAFE for (uint64_t j = 0; j < nchan ; j++){ tmean+=ch_mean[offset+j]; tvar+=ch_var[offset+j]; for (uint64_t i = 0; i < nsamp_per_block ; i++){ tseries[i]+=data[j][i]; if(data[j][i]>max)max=data[j][i]; if(data[j][i]<min)min=data[j][i]; } } float ss=0; float mm=0; for (uint64_t i = 0; i < nsamp_per_block ; i++){ float x=tseries[i]-tmean; mm+=tseries[i]; ss+=x*x; } float rvar=ss/(float)nsamp_per_block; logmsg("var=%g tvar=%g",ss/(float)nsamp_per_block,tvar); logmsg("mean=%g tmean=%g",mm/(float)nsamp_per_block,tmean); cpgopen("3/xs"); cpgsvp(0.1,0.9,0.1,0.9); cpgswin(0,fftX,tmean-sqrt(tvar)*30,tmean+sqrt(tvar)*30); cpgbox("ABN",0,0,"ABN",0,0); cpgline(fftX,X,tseries); cpgsci(2); cpgclos(); tr[0] = 0.0 ; tr[1] = 1; tr[2] = 0; tr[3] = 0.5; tr[4] = 0; tr[5] = 1; logmsg("max=%g min=%g",max,min); cpgopen("4/xs"); cpgsvp(0.1,0.9,0.1,0.9); cpgswin(0,nsamp_per_block,0,nchan); cpgbox("ABN",0,0,"ABN",0,0); cpggray(*data,nsamp_per_block,nchan,1,nsamp_per_block,1,nchan,tmean/(float)nchan+sqrt(rvar/(float)nchan),tmean/(float)nchan-sqrt(rvar/(float)nchan),tr); cpgclos(); start_clock(t_fft); fftwf_execute(fft_1d); fftwf_execute(fft_plan); stop_clock(t_fft); { float T = sqrt(fftXo*tvar)*12; logmsg("Zap T=%.2e",T); float fx[fftXo]; float fT[fftXo]; #pragma omp parallel for shared(fseries,pseries,mask) for (uint64_t i = 0; i < fftXo ; i++){ mask[i]=1; } #pragma omp parallel for shared(fseries,pseries,mask) for (uint64_t i = 0; i < fftXo ; i++){ pseries[i]=camp(fseries[i]); fx[i]=i; float TT = T; if (i>512)TT=T/2.0; if(i>32){ fT[i]=TT; if (pseries[i] > TT) { mask[i]=0; } } else fT[i]=0; } uint64_t nmask=0; for (uint64_t i = 0; i < fftXo ; i++){ if (mask[i]==0){ nmask++; } } logmsg("masked=%d (%.2f%%)",nmask,100*nmask/(float)fftXo); cpgopen("1/xs"); cpgsvp(0.1,0.9,0.1,0.9); cpgswin(0,fftXo,0,T*10); cpgbox("ABN",0,0,"ABN",0,0); cpgline(fftXo,fx,pseries); cpgsci(2); cpgline(fftXo,fx,fT); cpgclos(); } // exit(1); start_clock(t_spec); //FILE* ff=fopen("plot","w"); #pragma omp parallel for shared(fftd,ch_mean,ch_fft_n,ch_fft_p) for (uint64_t j = 0; j < nchan ; j++){ float var = ch_var[offset+j]; float m=sqrt(var*fftXo/2.0); float T = sqrt(var*fftXo)*3; uint64_t n=0; float p=0; float complex *fftch = fftd + fftXo*j; for(uint64_t i = 1; i < fftXo; i++){ if (camp(fftch[i]) > T) { n++; p+=camp(fftch[i]); } // if(j==512)fprintf(ff,"%f ",camp(fftch[i])); if(mask[i]==0){ fftch[i]=m*(mjk_rand_gauss(random) + I*mjk_rand_gauss(random)); } // if(j==512)fprintf(ff,"%f\n",camp(fftch[i])); } ch_fft_n[offset+j]=n; ch_fft_p[offset+j]=p; } // fclose(ff); logmsg("iFFT"); fftwf_execute(ifft_plan); #pragma omp parallel for schedule(static,2) shared(buffer,clean) for (uint64_t j = 0; j < nchan ; j+=BLK){ for (uint64_t i = 0; i < nsamp_per_block ; i++){ for (uint64_t k = 0; k < BLK ; k++){ clean[j+k][i]/=(float)fftX; buffer[i*nchan+j+k] = round(clean[j+k][i]); } } if(j==512){ cpgopen("2/xs"); cpgsvp(0.1,0.9,0.1,0.9); cpgswin(0,fftX,ch_mean[j]-sqrt(ch_var[j])*10,ch_mean[j]+sqrt(ch_var[j])*10); cpgbox("ABN",0,0,"ABN",0,0); cpgline(fftX,X,data[j]); cpgsci(2); cpgline(fftX,X,clean[j]); cpgclos(); } } fwrite(buffer,1,bytes_per_block,of); for (uint64_t i = 0; i < nsamp_per_block ; i++){ tseries[i]=0; } tmean=0; tvar=0; max=0; min=1e99; //#pragma omp parallel for shared(clean,tseries) // NOT THREAD SAFE for (uint64_t j = 0; j < nchan ; j++){ tmean+=ch_mean[offset+j]; tvar+=ch_var[offset+j]; for (uint64_t i = 0; i < nsamp_per_block ; i++){ tseries[i]+=clean[j][i]; if(clean[j][i]>max)max=clean[j][i]; if(clean[j][i]<min)min=clean[j][i]; } } ss=0; mm=0; for (uint64_t i = 0; i < nsamp_per_block ; i++){ float x=tseries[i]-tmean; mm+=tseries[i]; ss+=x*x; } rvar=ss/(float)nsamp_per_block; logmsg("var=%g tvar=%g",ss/(float)nsamp_per_block,tvar); logmsg("mean=%g tmean=%g",mm/(float)nsamp_per_block,tmean); cpgopen("5/xs"); cpgsvp(0.1,0.9,0.1,0.9); cpgswin(0,fftX,tmean-sqrt(tvar)*30,tmean+sqrt(tvar)*30); cpgbox("ABN",0,0,"ABN",0,0); cpgline(fftX,X,tseries); cpgsci(2); cpgclos(); tr[0] = 0.0 ; tr[1] = 1; tr[2] = 0; tr[3] = 0.5; tr[4] = 0; tr[5] = 1; logmsg("max=%g min=%g",max,min); cpgopen("6/xs"); cpgsvp(0.1,0.9,0.1,0.9); cpgswin(0,nsamp_per_block,0,nchan); cpgbox("ABN",0,0,"ABN",0,0); cpggray(*clean,nsamp_per_block,nchan,1,nsamp_per_block,1,nchan,tmean/(float)nchan+sqrt(rvar/(float)nchan),tmean/(float)nchan-sqrt(rvar/(float)nchan),tr); cpgclos(); stop_clock(t_spec); for (uint64_t j = 0; j < nchan ; j++){ float mean=ch_mean[offset+j]; if (mean > max_mean)max_mean=mean; if (mean < min_mean)min_mean=mean; float var=ch_var[offset+j]; if (var > max_var)max_var=var; if (var < min_var)min_var=var; float fft_n=ch_fft_n[offset+j]; if (fft_n > max_fft_n)max_fft_n=fft_n; if (fft_n < min_fft_n)min_fft_n=fft_n; float fft_p=ch_fft_p[offset+j]; if (fft_p > max_fft_p)max_fft_p=fft_p; if (fft_p < min_fft_p)min_fft_p=fft_p; } } stop_clock(t_all); fclose(of); logmsg("T(all) = %.2lfs",read_clock(t_all)); logmsg("T(read) = %.2lfs",read_clock(t_read)); logmsg("T(trans)= %.2lfs",read_clock(t_trns)); logmsg("T(fft) = %.2lfs",read_clock(t_fft)); logmsg("T(fan) = %.2lfs",read_clock(t_spec)); logmsg("T(rms) = %.2lfs",read_clock(t_rms)); logmsg("T(rest) = %.2lfs",read_clock(t_all)-read_clock(t_read)-read_clock(t_trns)-read_clock(t_rms)-read_clock(t_fft)-read_clock(t_spec)); tr[0] = -tsamp*nsamp_per_block*0.5; tr[2] = tsamp*nsamp_per_block; tr[1] = 0; tr[3] = 0.5; tr[5] = 0; tr[4] = 1; cpgopen("1/xs"); cpgsvp(0.1,0.9,0.1,0.9); cpgswin(0,nblocks*tsamp*nsamp_per_block,0,nchan); cpgbox("ABN",600,10,"ABN",100,1); cpggray(ch_mean,nchan,nblocks,1,nchan,1,nblocks,max_mean,min_mean,tr); cpgclos(); cpgopen("2/xs"); cpgsvp(0.1,0.9,0.1,0.9); cpgswin(0,nblocks*tsamp*nsamp_per_block,0,nchan); cpgbox("ABN",600,10,"ABN",100,1); cpggray(ch_var,nchan,nblocks,1,nchan,1,nblocks,max_var,min_var,tr); cpgclos(); cpgopen("3/xs"); cpgsvp(0.1,0.9,0.1,0.9); cpgswin(0,nblocks*tsamp*nsamp_per_block,0,nchan); cpgbox("ABN",600,10,"ABN",100,1); cpggray(ch_fft_n,nchan,nblocks,1,nchan,1,nblocks,max_fft_n,min_fft_n,tr); cpgclos(); cpgopen("4/xs"); cpgsvp(0.1,0.9,0.1,0.9); cpgswin(0,nblocks*tsamp*nsamp_per_block,0,nchan); cpgbox("ABN",600,10,"ABN",100,1); cpggray(ch_fft_p,nchan,nblocks,1,nchan,1,nblocks,max_fft_p,min_fft_p,tr); cpgclos(); cpgopen("mean.ps/vcps"); cpgsvp(0.1,0.9,0.1,0.9); cpgswin(0,nblocks*tsamp*nsamp_per_block,0,nchan); cpgbox("ABN",600,10,"ABN",100,1); cpggray(ch_mean,nchan,nblocks,1,nchan,1,nblocks,max_mean,min_mean,tr); cpgclos(); cpgopen("var.ps/vcps"); cpgsvp(0.1,0.9,0.1,0.9); cpgswin(0,nblocks*tsamp*nsamp_per_block,0,nchan); cpgbox("ABN",600,10,"ABN",100,1); cpggray(ch_var,nchan,nblocks,1,nchan,1,nblocks,max_var,min_var,tr); cpgclos(); cpgopen("fft_n.ps/vcps"); cpgsvp(0.1,0.9,0.1,0.9); cpgswin(0,nblocks*tsamp*nsamp_per_block,0,nchan); cpgbox("ABN",600,10,"ABN",100,1); cpggray(ch_fft_n,nchan,nblocks,1,nchan,1,nblocks,max_fft_n,min_fft_n,tr); cpgclos(); cpgopen("fft_p.ps/vcps"); cpgsvp(0.1,0.9,0.1,0.9); cpgswin(0,nblocks*tsamp*nsamp_per_block,0,nchan); cpgbox("ABN",600,10,"ABN",100,1); cpggray(ch_fft_p,nchan,nblocks,1,nchan,1,nblocks,max_fft_p,min_fft_p,tr); cpgclos(); fclose(f); free(buffer); free_2df(data); return 0; }
int main(int argc, char *argv[]) { int opt=0, verb=0; int max_harm = 64, max_lag=0; int isub = 1; while ((opt=getopt(argc,argv,"hvI:H:L:"))!=-1) { switch (opt) { case 'v': verb++; break; case 'I': isub = atoi(optarg); break; case 'H': max_harm = atoi(optarg); break; case 'L': max_lag = atoi(optarg); break; case 'h': usage(); exit(0); break; } } if (optind==argc) { usage(); exit(1); } int i, rv; /* Open file */ fitsfile *f; int status; fits_open_file(&f, argv[optind], READONLY, &status); fits_error_check_fatal(); /* Get basic dims */ struct cyclic_work w; cyclic_load_params(f, &w, &status); fits_error_check_fatal(); if (verb) { printf("Read nphase=%d npol=%d nchan=%d\n", w.nphase, w.npol, w.nchan); fflush(stdout); } int orig_npol = w.npol; w.npol = 1; /* Init FFTs */ fftwf_init_threads(); fftwf_plan_with_nthreads(4); if (verb) { printf("Planning FFTs\n"); fflush(stdout); } #define WF "/home/pdemores/share/cyclic_wisdom.dat" FILE *wf = fopen(WF,"r"); if (wf!=NULL) { fftwf_import_wisdom_from_file(wf); fclose(wf); } rv = cyclic_init_ffts(&w); if (rv) { fprintf(stderr, "Error planning ffts (rv=%d)\n", rv); exit(1); } wf = fopen(WF,"w"); if (wf!=NULL) { fftwf_export_wisdom_to_file(wf); fclose(wf); } /* Alloc some stuff */ struct periodic_spectrum raw; struct cyclic_spectrum cs, model_cs; struct filter_time ht; struct filter_freq hf; struct profile_phase pp; struct profile_harm ph; raw.nphase = pp.nphase = w.nphase; raw.nchan = cs.nchan = hf.nchan = w.nchan; cs.nharm = ph.nharm = w.nharm; ht.nlag = w.nlag; raw.npol = orig_npol; cs.npol = 1; model_cs.nchan = cs.nchan; model_cs.nharm = cs.nharm; model_cs.npol = cs.npol; cyclic_alloc_ps(&raw); cyclic_alloc_cs(&cs); cyclic_alloc_cs(&model_cs); filter_alloc_time(&ht); filter_alloc_freq(&hf); profile_alloc_phase(&pp); profile_alloc_harm(&ph); #if 0 // XXX not implemented yet /* Check bounds */ if (max_harm > w.nharm) { max_harm = w.nharm; } if (max_lag > w.nlag/2) { max_lag = w.nlag/2; } if (verb) { printf("Using max of %d harmonics and %d lags\n", max_harm, max_lag); } #endif /* Load data */ cyclic_load_ps(f, &raw, isub, &status); fits_error_check_fatal(); /* Add polns w/o calibration */ cyclic_pscrunch_ps(&raw, 1.0, 1.0); /* Initialize H, profile guesses */ cyclic_fscrunch_ps(&pp, &raw); profile_phase2harm(&pp, &ph, &w); ht.data[0] = 1.0; for (i=1; i<ht.nlag; i++) { ht.data[i] = 0.0; } filter_profile_norm(&ht, &ph, w.nharm); profile_harm2phase(&ph, &pp, &w); /* convert input data to cyclic spectrum */ cyclic_ps2cs(&raw, &cs, &w); /* could output initial profile */ /* Fill in data struct for nlopt */ struct cyclic_data cdata; cdata.cs = &cs; cdata.s0 = &ph; cdata.ht = &ht; cdata.model_cs = &model_cs; cdata.w = &w; /* Set up minimizer */ const int dim = 2*(w.nharm-1) + 2*w.nlag; /* number of free params */ printf("number of fit params = %d\n", dim); nlopt_opt op; op = nlopt_create(NLOPT_LN_COBYLA, dim); nlopt_set_min_objective(op, cyclic_ms_difference_nlopt, &cdata); nlopt_set_xtol_rel(op, 1e-4); /* Set up initial params */ double *x = (double *)malloc(sizeof(double) * dim); double *xtmp = x; for (i=1; i<ph.nharm; i++) { xtmp[0] = creal(ph.data[i]); xtmp[1] = cimag(ph.data[i]); xtmp += 2; } for (i=0; i<ht.nlag; i++) { xtmp[0] = creal(ht.data[i]); xtmp[1] = cimag(ht.data[i]); xtmp += 2; } /* Run optimization */ double min; if (nlopt_optimize(op, x, &min)) { fprintf(stderr, "nlopt_optimize failed\n"); exit(1); } /* TODO: some kind of output */ /* All done :) */ nlopt_destroy(op); exit(0); }
int main(int argc, char *argv[]) { uint16_t *addr_in; float *addr_out; int fd_in; int fd_out; struct stat sb; size_t length; uint64_t fftlen; size_t num_ffts; size_t index; if (argc < 4|| argc > 5) { fprintf(stderr, "%s filein fileout fftlength [nthreads]\n", argv[0]); exit(EXIT_FAILURE); } fd_in = open(argv[1], O_RDONLY|O_LARGEFILE); if (fd_in == -1) handle_error("in open"); fd_out = open(argv[2], O_LARGEFILE|O_CREAT|O_TRUNC|O_RDWR, 0666); if (fd_out == -1) handle_error("out open"); fftlen = atoll(argv[3]); int nthreads = (argc == 5) ? 4 : atoi(argv[4]); posix_fallocate(fd_out, 0, fftlen* sizeof(float)); if (fstat(fd_in, &sb) == -1) /* To obtain file size */ handle_error("fstat"); length = sb.st_size; //two bytes per short, two shorts per complex value num_ffts = length/fftlen; //map the input into memory. addr_in = mmap(NULL, length, PROT_READ, MAP_PRIVATE, fd_in, 0); if (addr_in == MAP_FAILED) handle_error("input mmap"); //map the output into memory. addr_out = mmap(NULL, fftlen*sizeof(float), PROT_WRITE, MAP_SHARED, fd_out, 0); if (addr_out == MAP_FAILED) handle_error("output mmap"); //fftwf stuff fftwf_init_threads(); fftwf_plan_with_nthreads(nthreads); fftwf_complex *in, *out; fftwf_plan my_plan; in = (fftwf_complex*) fftwf_malloc(sizeof(fftwf_complex)*fftlen); out = (fftwf_complex*) fftwf_malloc(sizeof(fftwf_complex)*fftlen); my_plan = fftwf_plan_dft_1d(fftlen, in, out, FFTW_FORWARD, FFTW_ESTIMATE); float *fin = (float*)in; float *fout = (float*)out; for(index = 0; index < fftlen*2; index++) { float tmp = (float) addr_in[index]; fin[index] =tmp; } addr_in += 2 * fftlen; //run that FFT fftwf_execute(my_plan); //calculate Power size_t currindex=0; float real, imag; for(index = 0; index < fftlen; index++) { real = fout[currindex++]; imag = fout[currindex++]; addr_out[index] = 10.0f*log10f( real * real + imag * imag ); } fftwf_destroy_plan(my_plan); fftwf_free(in); fftwf_free(out); msync(addr_out, fftlen * sizeof(float), MS_SYNC); exit(EXIT_SUCCESS); }
int main(int argc, char *argv[]) { int opt=0, verb=0; int max_harm = 64, max_lag=0; int causal_filter = 0; while ((opt=getopt(argc,argv,"hvH:L:C"))!=-1) { switch (opt) { case 'v': verb++; break; case 'H': max_harm = atoi(optarg); break; case 'L': max_lag = atoi(optarg); break; case 'C': causal_filter = 1; break; case 'h': usage(); exit(0); break; } } if (optind==argc) { usage(); exit(1); } int i, rv; /* Open file */ fitsfile *f; int status; fits_open_file(&f, argv[optind], READONLY, &status); fits_error_check_fatal(); /* Get basic dims */ struct cyclic_work w; cyclic_load_params(f, &w, &status); fits_error_check_fatal(); if (verb) { printf("Read nphase=%d npol=%d nchan=%d\n", w.nphase, w.npol, w.nchan); fflush(stdout); } int orig_npol = w.npol; w.npol = 1; /* Init FFTs */ fftwf_init_threads(); fftwf_plan_with_nthreads(4); if (verb) { printf("Planning FFTs\n"); fflush(stdout); } #define WF "/home/pdemores/share/cyclic_wisdom.dat" FILE *wf = fopen(WF,"r"); if (wf!=NULL) { fftwf_import_wisdom_from_file(wf); fclose(wf); } rv = cyclic_init_ffts(&w); if (rv) { fprintf(stderr, "Error planning ffts (rv=%d)\n", rv); exit(1); } wf = fopen(WF,"w"); if (wf!=NULL) { fftwf_export_wisdom_to_file(wf); fclose(wf); } /* Alloc some stuff */ struct periodic_spectrum raw; struct cyclic_spectrum cs, cs_neg; struct filter_time ht, ht_new; struct filter_freq hf, hf_new; struct filter_freq *hf_shift_pos, *hf_shift_neg; hf_shift_pos = (struct filter_freq *)malloc( sizeof(struct filter_freq)*w.nharm); hf_shift_neg = (struct filter_freq *)malloc( sizeof(struct filter_freq)*w.nharm); struct profile_phase pp, pp_new; struct profile_harm ph, ph_new; raw.nphase = pp.nphase = pp_new.nphase = w.nphase; raw.nchan = cs.nchan = hf.nchan = hf_new.nchan = w.nchan; cs.nharm = ph.nharm = ph_new.nharm = w.nharm; ht.nlag = ht_new.nlag = w.nlag; for (i=0; i<w.nharm; i++) { hf_shift_pos[i].nchan = w.nchan; } for (i=0; i<w.nharm; i++) { hf_shift_neg[i].nchan = w.nchan; } raw.npol = orig_npol; cs.npol = 1; cs_neg.nchan = cs.nchan; cs_neg.nharm = cs.nharm; cs_neg.npol = cs.npol; cyclic_alloc_ps(&raw); cyclic_alloc_cs(&cs); cyclic_alloc_cs(&cs_neg); filter_alloc_time(&ht); filter_alloc_time(&ht_new); filter_alloc_freq(&hf); filter_alloc_freq(&hf_new); for (i=0; i<w.nharm; i++) { filter_alloc_freq(&hf_shift_pos[i]); filter_alloc_freq(&hf_shift_neg[i]); } profile_alloc_phase(&pp); profile_alloc_phase(&pp_new); profile_alloc_harm(&ph); profile_alloc_harm(&ph_new); /* Check bounds */ if (max_harm > w.nharm) { max_harm = w.nharm; } if (max_lag > w.nlag/2) { max_lag = w.nlag/2; } if (verb) { printf("Using max of %d harmonics and %d lags\n", max_harm, max_lag); } /* Run procedure on subint 0 */ int isub = 1; /* Load data */ cyclic_load_ps(f, &raw, isub, &status); fits_error_check_fatal(); /* Add polns w/o calibration */ cyclic_pscrunch_ps(&raw, 1.0, 1.0); /* Initialize H, profile guesses */ cyclic_fscrunch_ps(&pp, &raw); profile_phase2harm(&pp, &ph, &w); ht.data[0] = 1.0; for (i=1; i<ht.nlag; i++) { ht.data[i] = 0.0; } filter_profile_norm(&ht, &ph, max_harm); profile_harm2phase(&ph, &pp, &w); /* convert to CS, produce shifted version */ cyclic_ps2cs(&raw, &cs, &w); cyclic_ps2cs(&raw, &cs_neg, &w); cyclic_shift_cs(&cs, +1, &w); cyclic_shift_cs(&cs_neg, -1, &w); /* TODO output initial profile */ /* Remove old files */ #define FILT "filters.dat" #define TFILT "tfilters.dat" #define PROF "profs.dat" #define FPROF "fprofs.dat" unlink(FILT); unlink(TFILT); unlink(PROF); unlink(FPROF); FILE *it = fopen("iter.dat", "w"); /* iterate */ int nit=0; double mse=0.0, last_mse=0.0; signal(SIGINT, cc); do { if (verb) { printf("iter %d\n", nit); fflush(stdout); } /* Make freq domain filter */ filter_time2freq(&ht, &hf, &w); write_filter(TFILT, &ht); write_filter_freq(FILT, &hf); /* Make shifted filter array */ filter_shift(hf_shift_pos, &ht, w.nharm, raw.ref_freq/(raw.bw*1e6), &w); filter_shift(hf_shift_neg, &ht, w.nharm, -1.0*raw.ref_freq/(raw.bw*1e6), &w); mse = cyclic_mse(&cs, &cs_neg, &ph, hf_shift_pos, hf_shift_neg, max_harm); /* Update filter, prof */ cyclic_update_filter(&hf_new, &cs, &cs_neg, &ph, hf_shift_pos, hf_shift_neg, max_harm); cyclic_update_profile(&ph_new, &cs, &cs_neg, hf_shift_pos, hf_shift_neg); /* Back to time domain filter */ filter_freq2time(&hf_new, &ht_new, &w); /* Fix filter normalization */ for (i=0; i<ht_new.nlag; i++) ht_new.data[i] /= (float)ht_new.nlag; /* Zero out negative lags */ if (causal_filter) { for (i=ht_new.nlag/2; i<ht_new.nlag; i++) ht_new.data[i] = 0.0; } /* Zero out large lags */ if (max_lag>0) { for (i=max_lag; i<ht_new.nlag-max_lag; i++) ht_new.data[i] = 0.0; } /* Kill nyquist point?? */ ht_new.data[ht_new.nlag/2] = 0.0; /* Normalize prof and filter */ filter_profile_norm(&ht_new, &ph_new, max_harm); /* TODO some kind of convergence test */ double prof_diff = profile_ms_difference(&ph, &ph_new, max_harm); double filt_diff = filter_ms_difference(&ht, &ht_new); /* TODO zero out high harmonics ?? */ /* Step halfway to new versions, except first time */ if (nit==0) { for (i=0; i<w.nharm; i++) ph.data[i] = ph_new.data[i]; for (i=0; i<w.nlag; i++) ht.data[i] = ht_new.data[i]; } else { //double fac = (mse<last_mse) ? 1.0 : 0.5*sqrt(mse/last_mse); double fac=0.25; for (i=0; i<w.nharm; i++) ph.data[i] = (1.0-fac)*ph.data[i] + fac*ph_new.data[i]; for (i=0; i<w.nlag; i++) ht.data[i] = (1.0-fac)*ht.data[i] + fac*ht_new.data[i]; } /* Back to phase domain profile */ ph.data[0] = 0.0; profile_harm2phase(&ph, &pp_new, &w); /* Write out current profiles */ write_profile(PROF, &pp_new); write_fprofile(FPROF, &ph); /* Print convergence params */ if (verb) { fprintf(it,"%.3e %.3e %.8e %.8e\n", prof_diff, filt_diff, mse, mse - last_mse); } last_mse = mse; /* Update iter count */ nit++; } while (run); fclose(it); exit(0); }
int cfft2_init(int pad1 /* padding on the first axis */, int nx, int ny /* input data size */, int *nx2, int *ny2 /* padded data size */) /*< initialize >*/ { #ifdef SF_HAS_FFTW #ifdef _OPENMP fftwf_init_threads(); if (false) sf_warning("Using threaded FFTW3! \n"); fftwf_plan_with_nthreads(omp_get_max_threads()); #else if (false) sf_warning("Using FFTW3! \n"); #endif #else if (false) sf_warning("Using KissFFT! \n"); #endif #ifndef SF_HAS_FFTW int i2; #endif nk = n1 = kiss_fft_next_fast_size(nx*pad1); n2 = kiss_fft_next_fast_size(ny); cc = sf_complexalloc2(n1,n2); #ifdef SF_HAS_FFTW dd = sf_complexalloc2(nk,n2); cfg = fftwf_plan_dft_2d(n2,n1, (fftwf_complex *) cc[0], (fftwf_complex *) dd[0], FFTW_FORWARD, FFTW_MEASURE); icfg = fftwf_plan_dft_2d(n2,n1, (fftwf_complex *) dd[0], (fftwf_complex *) cc[0], FFTW_BACKWARD, FFTW_MEASURE); if (NULL == cfg || NULL == icfg) sf_error("FFTW failure."); #else cfg1 = kiss_fft_alloc(n1,0,NULL,NULL); icfg1 = kiss_fft_alloc(n1,1,NULL,NULL); cfg2 = kiss_fft_alloc(n2,0,NULL,NULL); icfg2 = kiss_fft_alloc(n2,1,NULL,NULL); tmp = (kiss_fft_cpx **) sf_alloc(n2,sizeof(*tmp)); tmp[0] = (kiss_fft_cpx *) sf_alloc(nk*n2,sizeof(kiss_fft_cpx)); #ifdef _OPENMP #pragma omp parallel for private(i2) default(shared) #endif for (i2=0; i2 < n2; i2++) { tmp[i2] = tmp[0]+i2*nk; } trace2 = sf_complexalloc(n2); ctrace2 = (kiss_fft_cpx *) trace2; #endif *nx2 = n1; *ny2 = n2; wt = 1.0/(n1*n2); return (nk*n2); }
static int init_threads() { int res = fftwf_init_threads(); return res; }
float* CalcFFT(GDALDataset *srcDS1, GDALDataset *srcDS2, GDALDataset *dstDS) { fftwf_plan plan1, plan2, planI; fftwf_complex *img1, *img2; unsigned char *out; int band; const size_t px_count = dstDS->GetRasterXSize() * dstDS->GetRasterYSize(); const size_t buffer_len = sizeof(fftwf_complex) * px_count; img1 = (fftwf_complex*) fftwf_malloc(buffer_len); img2 = (fftwf_complex*) fftwf_malloc(buffer_len); out = (unsigned char*) fftwf_malloc(sizeof(unsigned char) * px_count); /* ^ not used in fft, but aligned is good anyway */ if(img1 == NULL || img2 == NULL || out == NULL) error("Could not allocate memory\n"); if(fftwf_init_threads()) fftwf_plan_with_nthreads(CORES); plan1 = fftwf_plan_dft_2d(dstDS->GetRasterYSize(), dstDS->GetRasterXSize(), img1, img1, FFTW_FORWARD, FFTW_ESTIMATE); plan2 = fftwf_plan_dft_2d(dstDS->GetRasterYSize(), dstDS->GetRasterXSize(), img2, img2, FFTW_FORWARD, FFTW_ESTIMATE); planI = fftwf_plan_dft_2d(dstDS->GetRasterYSize(), dstDS->GetRasterXSize(), img2, img2, FFTW_BACKWARD, FFTW_ESTIMATE); if(plan1 == NULL || plan2 == NULL || planI == NULL) error("Could not plan FFT\n"); for(band = 1; band <= dstDS->GetRasterCount(); band++) { printf("FFT 1 band %d\n", band); runFFT( plan1, srcDS1, img1, band, dstDS ); printf("FFT 2 band %d\n", band); runFFT( plan2, srcDS2, img2, band, dstDS ); printf("Complex Conj band %d\n", band); /* mult img1 and conj of img2 */ for(int px = 0; px < px_count; px++) { img2[px] = img1[px] * conj(img2[px]); } /* IFFT of result */ printf("IFFT band %d\n", band); fftwf_execute(planI); printf("normalize band %d\n", band); complex float norm = csqrt(px_count + 0I); float max = cabs(img2[0] / norm); float min = cabs(img2[0] / norm); for(int i = 0; i < px_count; i++) { img2[i] = img2[i] / norm; if(cabs(img2[i]) < min) min = cabs(img2[i]); if(cabs(img2[i]) > max) max = cabs(img2[i]); } /* img2 should now be real - normalize 0-255 and -- write output */ printf("Save band %d; min = %f max = %f\n", band, min, max); for(int i = 0; i < px_count; i++) { out[i] = floor( ((cabs(img2[i]) - min) / (max-min) ) * 255.0 ); } fft2shift(out, dstDS->GetRasterYSize(), dstDS->GetRasterXSize()); dstDS->GetRasterBand(band)->RasterIO( GF_Write, 0, 0, dstDS->GetRasterXSize(), dstDS->GetRasterYSize(), out, dstDS->GetRasterXSize(), dstDS->GetRasterYSize(), GDT_Byte, 0, 0); } fftwf_destroy_plan(plan1); fftwf_destroy_plan(plan2); fftwf_destroy_plan(planI); fftwf_free(img1); fftwf_free(img2); fftwf_free(out); }
int main(int argc, char *argv[]) { char fname[300]; FILE *fid; DIR* dir; size_t elem; long int ii,ij,ik, ii_c, ij_c, ik_c, a, b, c; long int ncells_1D; long int i,j,p,indi,indj,ind; int flag_bub,iz; double redshift,tmp; double kk; double bfactor; /* value by which to divide bubble size R */ double neutral,*xHI; float *halo_map, *top_hat_r, *density_map,*bubblef, *bubble; fftwf_complex *halo_map_c, *top_hat_c, *collapsed_mass_c, *density_map_c, *total_mass_c, *bubble_c; fftwf_plan pr2c1,pr2c2,pr2c3,pr2c4,pc2r1,pc2r2,pc2r3; double zmin,zmax,dz; double R; if(argc != 2) { printf("Generates boxes with ionization fraction for a range of redshifts\n"); printf("usage: get_HIIbubbles base_dir\n"); printf("base_dir contains simfast21.ini and directory structure\n"); exit(1); } get_Simfast21_params(argv[1]); zmin=global_Zminsim; zmax=global_Zmaxsim; dz=global_Dzsim; bfactor=pow(10.0,log10(global_bubble_Rmax/global_dx_smooth)/global_bubble_Nbins); printf("Bubble radius ratio (bfactor): %f\n", bfactor); fflush(0); #ifdef _OMPTHREAD_ omp_set_num_threads(global_nthreads); fftwf_init_threads(); fftwf_plan_with_nthreads(global_nthreads); printf("Using %d threads\n",global_nthreads);fflush(0); #endif /* Create directory Ionization */ sprintf(fname,"%s/Ionization",argv[1]); if((dir=opendir(fname))==NULL) { printf("Creating Ionization directory\n"); if(mkdir(fname,(S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH))!=0) { printf("Error creating directory!\n"); exit(1); } } sprintf(fname,"%s/Output_text_files",argv[1]); if((dir=opendir(fname))==NULL) { printf("Creating Output_text_files directory\n"); if(mkdir(fname,(S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH))!=0) { printf("Error creating directory!\n"); exit(1); } } /* Memory allocation - we could do some of the FFTs inline... */ /*************************************************************/ /* density_map mass */ if(!(density_map=(float *) fftwf_malloc(global_N3_smooth*sizeof(float)))) { printf("Problem1...\n"); exit(1); } if(!(density_map_c=(fftwf_complex *) fftwf_malloc(global_N_smooth*global_N_smooth*(global_N_smooth/2+1)*sizeof(fftwf_complex)))) { printf("Problem2...\n"); exit(1); } if(!(pr2c1=fftwf_plan_dft_r2c_3d(global_N_smooth, global_N_smooth, global_N_smooth, density_map, density_map_c, FFTWflag))) { printf("Problem3...\n"); exit(1); } /* halo_map mass */ if(!(halo_map=(float *) fftwf_malloc(global_N3_smooth*sizeof(float)))) { printf("Problem4...\n"); exit(1); } if(!(halo_map_c=(fftwf_complex *) fftwf_malloc(global_N_smooth*global_N_smooth*(global_N_smooth/2+1)*sizeof(fftwf_complex)))) { printf("Problem5...\n"); exit(1); } if(!(pr2c2=fftwf_plan_dft_r2c_3d(global_N_smooth, global_N_smooth, global_N_smooth, halo_map, halo_map_c, FFTWflag))) { printf("Problem6...\n"); exit(1); } /* total mass */ if(!(total_mass_c=(fftwf_complex *) fftwf_malloc(global_N_smooth*global_N_smooth*(global_N_smooth/2+1)*sizeof(fftwf_complex)))) { printf("Problem7...\n"); exit(1); } if(!(pc2r1=fftwf_plan_dft_c2r_3d(global_N_smooth, global_N_smooth, global_N_smooth, total_mass_c, density_map, FFTWflag))) { printf("Problem8...\n"); exit(1); } /* collapsed mass */ if(!(collapsed_mass_c=(fftwf_complex *) fftwf_malloc(global_N_smooth*global_N_smooth*(global_N_smooth/2+1)*sizeof(fftwf_complex)))) { printf("Problem9...\n"); exit(1); } if(!(pc2r2=fftwf_plan_dft_c2r_3d(global_N_smooth, global_N_smooth, global_N_smooth, collapsed_mass_c, halo_map, FFTWflag))) { printf("Problem10...\n"); exit(1); } /* top hat window */ if(!(top_hat_r=(float *) fftwf_malloc(global_N3_smooth*sizeof(float)))) { printf("Problem11...\n"); exit(1); } if(!(top_hat_c=(fftwf_complex *) fftwf_malloc(global_N_smooth*global_N_smooth*(global_N_smooth/2+1)*sizeof(fftwf_complex)))) { printf("Problem12...\n"); exit(1); } if(!(pr2c3=fftwf_plan_dft_r2c_3d(global_N_smooth, global_N_smooth, global_N_smooth, top_hat_r, top_hat_c, FFTWflag))) { printf("Problem13...\n"); exit(1); } /* bubble boxes */ if(!(bubble=(float *) fftwf_malloc(global_N3_smooth*sizeof(float)))) { printf("Problem14...\n"); exit(1); } if(!(bubble_c=(fftwf_complex *) fftwf_malloc(global_N_smooth*global_N_smooth*(global_N_smooth/2+1)*sizeof(fftwf_complex)))) { printf("Problem15...\n"); exit(1); } if(!(bubblef=(float *) malloc(global_N3_smooth*sizeof(float)))) { printf("Problem16...\n"); exit(1); } if(!(pr2c4=fftwf_plan_dft_r2c_3d(global_N_smooth, global_N_smooth, global_N_smooth, bubble, bubble_c, FFTWflag))) { printf("Problem17...\n"); exit(1); } if(!(pc2r3=fftwf_plan_dft_c2r_3d(global_N_smooth, global_N_smooth, global_N_smooth, bubble_c, bubble, FFTWflag))) { printf("Problem18...\n"); exit(1); } if(!(xHI=(double *) malloc((int)((zmax-zmin)/dz+2)*sizeof(double)))) { printf("Problem19...\n"); exit(1); } /****************************************************/ /***************** Redshift cycle *******************/ printf("Number of bubble sizes: %d\n",(int)((log(global_bubble_Rmax)-log(2.*global_dx_smooth))/log(bfactor))); printf("Redshift cycle...\n");fflush(0); iz=0; neutral=0.; for(redshift=zmin;redshift<(zmax+dz/10) && (neutral < xHlim);redshift+=dz){ printf("z = %f\n",redshift);fflush(0); sprintf(fname, "%s/delta/deltanl_z%.3f_N%ld_L%.1f.dat",argv[1],redshift,global_N_smooth,global_L/global_hubble); fid=fopen(fname,"rb"); if (fid==NULL) {printf("\nError reading deltanl file... Check path or if the file exists..."); exit (1);} elem=fread(density_map,sizeof(float),global_N3_smooth,fid); fclose(fid); #ifdef _OMPTHREAD_ #pragma omp parallel for shared(global_N3_smooth, density_map,global_rho_m, global_dx_smooth,bubblef) private(i) #endif for(i=0;i<(global_N3_smooth);i++){ density_map[i]=(1.0+density_map[i])*global_rho_m*global_dx_smooth*global_dx_smooth*global_dx_smooth; /* total mass in 1 cell */ bubblef[i]=0.0; } sprintf(fname, "%s/Halos/masscoll_z%.3f_N%ld_L%.1f.dat",argv[1],redshift,global_N_smooth,global_L/global_hubble); fid=fopen(fname,"rb"); if (fid==NULL) {printf("\nError reading %s file... Check path or if the file exists...",fname); exit (1);} elem=fread(halo_map,sizeof(float),global_N3_smooth,fid); fclose(fid); /* Quick fill of single cells before going to bubble cycle */ #ifdef _OMPTHREAD_ #pragma omp parallel for shared(global_N3_smooth,halo_map,density_map,global_eff,bubblef) private(i,tmp) #endif for(i=0;i<global_N3_smooth;i++) { if(halo_map[i]>0.) { if(density_map[i]>0.) tmp=(double)halo_map[i]*global_eff/density_map[i]; else tmp=1.0; }else tmp=0.; if(tmp>=1.0) bubblef[i]=1.0; else bubblef[i]=tmp; } /* FFT density and halos */ fftwf_execute(pr2c1); fftwf_execute(pr2c2); /************** going over the bubble sizes ****************/ R=global_bubble_Rmax; /* Maximum bubble size...*/ while(R>=2*global_dx_smooth){ printf("bubble radius R= %lf\n", R);fflush(0); // printf("Filtering halo and density boxes...\n");fflush(0); #ifdef _OMPTHREAD_ #pragma omp parallel for shared(collapsed_mass_c,halo_map_c,total_mass_c,density_map_c,global_N_smooth,global_dk,R) private(i,j,p,indi,indj,kk) #endif for(i=0;i<global_N_smooth;i++) { if(i>global_N_smooth/2) { indi=-(global_N_smooth-i); }else indi=i; for(j=0;j<global_N_smooth;j++) { if(j>global_N_smooth/2) { indj=-(global_N_smooth-j); }else indj=j; for(p=0;p<=global_N_smooth/2;p++) { kk=global_dk*sqrt(indi*indi+indj*indj+p*p); total_mass_c[i*global_N_smooth*(global_N_smooth/2+1)+j*(global_N_smooth/2+1)+p]=density_map_c[i*global_N_smooth*(global_N_smooth/2+1)+j*(global_N_smooth/2+1)+p]*W_filter(kk*R); collapsed_mass_c[i*global_N_smooth*(global_N_smooth/2+1)+j*(global_N_smooth/2+1)+p]=halo_map_c[i*global_N_smooth*(global_N_smooth/2+1)+j*(global_N_smooth/2+1)+p]*W_filter(kk*R); } } } fftwf_execute(pc2r1); fftwf_execute(pc2r2); flag_bub=0; // printf("Starting to find and fill bubbles...\n");fflush(0); /* signal center of bubbles */ #ifdef _OMPTHREAD_ #pragma omp parallel for shared(halo_map,density_map,bubble,global_N_smooth,global_eff,flag_bub) private(ii,ij,ik,ind) #endif for(ii=0;ii<global_N_smooth;ii++){ for(ij=0;ij<global_N_smooth;ij++){ for(ik=0;ik<global_N_smooth;ik++){ ind=ii*global_N_smooth*global_N_smooth+ij*global_N_smooth+ik; if(halo_map[ind]>0.) { if(density_map[ind]>0.) { if((double)halo_map[ind]/density_map[ind]>=1.0/global_eff) { flag_bub=1; bubble[ind]=1.0; }else bubble[ind]=0; }else { flag_bub=1; bubble[ind]=1.0; } }else bubble[ind]=0; } } } /* generate spherical window in real space for a given R */ if(flag_bub>0){ printf("Found bubble...\n");fflush(0); #ifdef _OMPTHREAD_ #pragma omp parallel for shared(top_hat_r,R,global_dx_smooth,global_N_smooth) private(i,j,p) #endif for(i=0;i<global_N_smooth;i++){ for(j=0;j<global_N_smooth;j++){ for(p=0;p<global_N_smooth;p++){ if(sqrt(i*i+j*j+p*p)*global_dx_smooth<=R || sqrt(i*i+(j-global_N_smooth)*(j-global_N_smooth)+p*p)*global_dx_smooth<=R || sqrt(i*i+(j-global_N_smooth)*(j-global_N_smooth)+(p-global_N_smooth)*(p-global_N_smooth))*global_dx_smooth <=R || sqrt(i*i+(p-global_N_smooth)*(p-global_N_smooth)+j*j)*global_dx_smooth<=R || sqrt((i-global_N_smooth)*(i-global_N_smooth)+j*j+p*p)*global_dx_smooth<=R || sqrt((i-global_N_smooth)*(i-global_N_smooth)+(j-global_N_smooth)*(j-global_N_smooth)+p*p)*global_dx_smooth<=R || sqrt((i-global_N_smooth)*(i-global_N_smooth)+(j-global_N_smooth)*(j-global_N_smooth)+(p-global_N_smooth)*(p-global_N_smooth))*global_dx_smooth<=R || sqrt((i-global_N_smooth)*(i-global_N_smooth)+j*j+(p-global_N_smooth)*(p-global_N_smooth))*global_dx_smooth<=R ) { top_hat_r[i*global_N_smooth*global_N_smooth+j*global_N_smooth+p]=1.0; }else top_hat_r[i*global_N_smooth*global_N_smooth+j*global_N_smooth+p]=0.0; } } } /* FFT bubble centers and window */ fftwf_execute(pr2c3); fftwf_execute(pr2c4); /* Make convolution */ #ifdef _OMPTHREAD_ #pragma omp parallel for shared(bubble_c,top_hat_c,global_N_smooth) private(i) #endif for(i=0;i<global_N_smooth*global_N_smooth*(global_N_smooth/2+1);i++) { bubble_c[i]*=top_hat_c[i]; } fftwf_execute(pc2r3); /* after dividing by global_N3_smooth, values in bubble are between 0 (neutral)and global_N3_smooth */ #ifdef _OMPTHREAD_ #pragma omp parallel for shared(bubble,bubblef,global_N3_smooth) private(i) #endif for (i=0; i<global_N3_smooth; i++){ bubble[i]/=global_N3_smooth; if (bubble[i]>0.2) bubblef[i]=1.0; /* neutral should be zero */ } } /* ends filling out bubbles in box for R */ R/=bfactor; } /* ends R cycle */ /* just to check smallest bubbles through older method */ printf("Going to smaller R cycle...\n"); fflush(0); while(R>=global_dx_smooth){ printf("bubble radius R= %lf\n", R);fflush(0); flag_bub=0; #ifdef _OMPTHREAD_ #pragma omp parallel for shared(collapsed_mass_c,halo_map_c,total_mass_c,density_map_c,global_N_smooth,global_dx_smooth,global_dk,R) private(i,j,p,indi,indj,kk) #endif for(i=0;i<global_N_smooth;i++) { if(i>global_N_smooth/2) { indi=-(global_N_smooth-i); }else indi=i; for(j=0;j<global_N_smooth;j++) { if(j>global_N_smooth/2) { indj=-(global_N_smooth-j); }else indj=j; for(p=0;p<=global_N_smooth/2;p++) { kk=global_dk*sqrt(indi*indi+indj*indj+p*p); total_mass_c[i*global_N_smooth*(global_N_smooth/2+1)+j*(global_N_smooth/2+1)+p]=density_map_c[i*global_N_smooth*(global_N_smooth/2+1)+j*(global_N_smooth/2+1)+p]*W_filter(kk*R); collapsed_mass_c[i*global_N_smooth*(global_N_smooth/2+1)+j*(global_N_smooth/2+1)+p]=halo_map_c[i*global_N_smooth*(global_N_smooth/2+1)+j*(global_N_smooth/2+1)+p]*W_filter(kk*R); } } } fftwf_execute(pc2r1); fftwf_execute(pc2r2); /* fill smaller bubbles in box */ ncells_1D=(long int)(R/global_dx_smooth); // printf("Starting to find and fill bubbles...\n");fflush(0); #ifdef _OMPTHREAD_ #pragma omp parallel for shared(halo_map,density_map,global_eff,global_N_smooth,global_dx_smooth,R,ncells_1D,bubblef,flag_bub) private(ii_c,ij_c,ik_c,ii,ij,ik,a,b,c,ind) #endif for(ii_c=0;ii_c<global_N_smooth;ii_c++){ for(ij_c=0;ij_c<global_N_smooth;ij_c++){ for(ik_c=0;ik_c<global_N_smooth;ik_c++){ ind=ii_c*global_N_smooth*global_N_smooth+ij_c*global_N_smooth+ik_c; if(halo_map[ind]>0.) { if(!(density_map[ind]>0.) || ((double)halo_map[ind]/density_map[ind]>=1.0/global_eff)) { flag_bub=1; for(ii=-(ncells_1D+1);ii<=ncells_1D+1;ii++){ a=check_borders(ii_c+ii,global_N_smooth); for(ij=-(ncells_1D+1);ij<=ncells_1D+1;ij++){ if(sqrt(ii*ii+ij*ij)*global_dx_smooth <= R){ b=check_borders(ij_c+ij,global_N_smooth); for(ik=-(ncells_1D+1);ik<=ncells_1D+1;ik++){ c=check_borders(ik_c+ik,global_N_smooth); if(sqrt(ii*ii+ij*ij+ik*ik)*global_dx_smooth <= R){ bubblef[a*global_N_smooth*global_N_smooth+b*global_N_smooth+c]=1.0; } } } } } } } } } } if(flag_bub>0){printf("Found bubble...\n");fflush(0);} R/=bfactor; } /* ends small bubbles R cycle */ neutral=0.; for (i=0; i<global_N3_smooth; i++){ neutral+=1.0-bubblef[i]; } neutral/=global_N3_smooth; printf("neutral fraction=%lf\n",neutral);fflush(0); xHI[iz]=neutral; sprintf(fname, "%s/Ionization/xHII_z%.3f_eff%.2lf_N%ld_L%.1f.dat",argv[1],redshift,global_eff,global_N_smooth,global_L/global_hubble); if((fid = fopen(fname,"wb"))==NULL) { printf("Error opening file:%s\n",fname); exit(1); } elem=fwrite(bubblef,sizeof(float),global_N3_smooth,fid); fclose(fid); iz++; } /* ends redshift cycle */ /* z cycle for neutral>=xHlim */ while(redshift<(zmax+dz/10)) { printf("z(>%f) = %f\n",xHlim,redshift);fflush(0); xHI[iz]=1.0; sprintf(fname, "%s/Ionization/xHII_z%.3f_eff%.2lf_N%ld_L%.1f.dat",argv[1],redshift,global_eff,global_N_smooth,global_L/global_hubble); if((fid = fopen(fname,"wb"))==NULL) { printf("Error opening file:%s\n",fname); exit(1); } #ifdef _OMPTHREAD_ #pragma omp parallel for shared(bubblef,global_N3_smooth) private(i) #endif for(i=0;i<global_N3_smooth;i++) bubblef[i]=0.0; elem=fwrite(bubblef,sizeof(float),global_N3_smooth,fid); fclose(fid); iz++; redshift+=dz; } sprintf(fname, "%s/Output_text_files/zsim.txt",argv[1]); if((fid = fopen(fname,"a"))==NULL) { printf("Error opening file:%s\n",fname); exit(1); } for(redshift=zmax;redshift>(zmin-dz/10);redshift-=dz) fprintf(fid,"%f\n",redshift); /* first line should be highest redshift */ fclose(fid); sprintf(fname, "%s/Output_text_files/x_HI_eff%.2lf_N%ld_L%.1f.dat",argv[1],global_eff,global_N_smooth,global_L/global_hubble); if((fid = fopen(fname,"a"))==NULL) { printf("Error opening file:%s\n",fname); exit(1); } for(i=iz-1;i>=0;i--) fprintf(fid,"%lf\n",xHI[i]); /* first line should be highest redshift */ fclose(fid); free(xHI); free(bubblef); fftwf_free(top_hat_r); fftwf_free(top_hat_c); fftwf_free(collapsed_mass_c); fftwf_free(density_map); fftwf_free(density_map_c); fftwf_free(halo_map); fftwf_free(halo_map_c); fftwf_free(total_mass_c); fftwf_free(bubble); fftwf_free(bubble_c); exit(0); }
GLFFTWater::GLFFTWater(GLFFTWaterParams ¶ms) { #ifdef _WIN32 m_h = (float *)__mingw_aligned_malloc((sizeof(float)*(params.N+2)*(params.N)), 4); m_dx = (float *)__mingw_aligned_malloc((sizeof(float)*(params.N+2)*(params.N)), 4); m_dz = (float *)__mingw_aligned_malloc((sizeof(float)*(params.N+2)*(params.N)), 4); m_w = (float *)__mingw_aligned_malloc((sizeof(float)*(params.N)*(params.N)), 4); #else posix_memalign((void **)&m_h,4,sizeof(float)*(params.N+2)*(params.N)); posix_memalign((void **)&m_dx,4,sizeof(float)*(params.N+2)*(params.N)); posix_memalign((void **)&m_dz,4,sizeof(float)*(params.N+2)*(params.N)); posix_memalign((void **)&m_w,4,sizeof(float)*(params.N)*(params.N)); #endif m_htilde0 = (fftwf_complex *)fftwf_malloc(sizeof(fftwf_complex)*(params.N)*(params.N)); m_heightmap = new float3[(params.N)*(params.N)]; m_params = params; std::tr1::mt19937 prng(1337); std::tr1::normal_distribution<float> normal; std::tr1::uniform_real<float> uniform; std::tr1::variate_generator<std::tr1::mt19937, std::tr1::normal_distribution<float> > randn(prng,normal); std::tr1::variate_generator<std::tr1::mt19937, std::tr1::uniform_real<float> > randu(prng,uniform); for(int i=0, k=0; i<params.N; i++) { float k_x = (-(params.N-1)*0.5f+i)*(2.f*3.141592654f / params.L); for(int j=0; j<params.N; j++, k++) { float k_y = (-(params.N-1)*0.5f+j)*(2.f*3.141592654f / params.L); float A = randn(); float theta = randu()*2.f*3.141592654f; float P = (k_x==0.f && k_y==0.0f) ? 0.f : sqrtf(phillips(k_x,k_y,m_w[k])); m_htilde0[k][0] = m_htilde0[k][1] = P*A*sinf(theta); } } m_kz = new float[params.N*(params.N / 2 + 1)]; m_kx = new float[params.N*(params.N / 2 + 1)]; const int hN = m_params.N / 2; for(int y=0; y<m_params.N; y++) { float kz = (float) (y - hN); for(int x=0; x<=hN; x++) { float kx = (float) (x - hN); float k = 1.f/sqrtf(kx*kx+kz*kz); m_kz[y*(hN+1)+x] = kz*k; m_kx[y*(hN+1)+x] = kx*k; } } if(!fftwf_init_threads()) { cerr << "Error initializing multithreaded fft." << endl; } else { fftwf_plan_with_nthreads(2); } m_fftplan = fftwf_plan_dft_c2r_2d(m_params.N, m_params.N, (fftwf_complex *)m_h, m_h, FFTW_ESTIMATE); glGenTextures(1, &m_texId); glBindTexture(GL_TEXTURE_2D, m_texId); glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB16F, params.N, params.N, 0, GL_RGB, GL_FLOAT, 0); glBindTexture(GL_TEXTURE_2D, 0); }
int main(int argc, char **argv){ FILE *fid; size_t elem; long int i,j,p; long int indi, indj; double kk; fftwf_complex *map_vel_c; float *map; fftwf_complex *map_in_c; fftwf_plan pc2r; fftwf_plan pr2c; char fname[256]; DIR* dir; if(argc != 2) { printf("Usage: get_velocityfield work_dir\n"); printf("work_dir - directory containing simfast21.ini \n"); exit(1); } get_Simfast21_params(argv[1]); #ifdef _OMPTHREAD_ omp_set_num_threads(global_nthreads); fftwf_init_threads(); fftwf_plan_with_nthreads(global_nthreads); printf("Using %d threads\n",global_nthreads); #endif if(!(map=(float *) fftwf_malloc(global_N_halo*global_N_halo*global_N_halo*sizeof(float)))) { printf("Problem...\n"); exit(1); } if(!(map_in_c = (fftwf_complex*) fftwf_malloc(sizeof(fftwf_complex) * global_N_halo*global_N_halo*(global_N_halo/2+1)))) { printf(" Out of memory...\n"); exit(1); } if(!(map_vel_c = (fftwf_complex*) fftwf_malloc(sizeof(fftwf_complex) * global_N_halo*global_N_halo*(global_N_halo/2+1)))) { printf("Problem allocating memory for x velocity field in k-space...\n"); exit(1); } /* Tansformacoes de fourier para calcular as caixas vx(x) vx(y) e vx(z) */ if(!(pc2r=fftwf_plan_dft_c2r_3d(global_N_halo, global_N_halo, global_N_halo, map_vel_c, map, FFTW_ESTIMATE))) { printf("Problem...\n"); exit(1); } /* FFT para map */ if(!(pr2c=fftwf_plan_dft_r2c_3d(global_N_halo, global_N_halo, global_N_halo, map , map_in_c, FFTW_ESTIMATE))) { printf("Problem...\n"); exit(1); } sprintf(fname, "%s/delta/delta_z0_N%ld_L%d.dat", argv[1],global_N_halo,(int)(global_L)); /*Leitura do campo de densidades no espaco real*/ fid=fopen(fname,"rb"); /* second argument contains name of input file */ if (fid==NULL) { printf("\n Density file path is not correct or the file does not exit...\n"); exit (1); } elem=fread(map,sizeof(float),global_N_halo*global_N_halo*global_N_halo,fid); fclose(fid); /***********************************************************************************/ // Conversao do mapa de densidades de real para complexo fftwf_execute(pr2c); /********************************************************************/ /********************************************************************/ /********************************************************************/ /* Computing velocity fields */ /* Create directory Velocity */ sprintf(fname,"%s/Velocity",argv[1]); if((dir=opendir(fname))==NULL) { printf("Creating Velocity directory\n"); if(mkdir(fname,(S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH))!=0) { printf("Error creating directory!\n"); exit(1); } } /********************************************************************/ printf("\nComputing v_x field...\n");fflush(0); #ifdef _OMPTHREAD_ #pragma omp parallel for shared(global_N_halo,global_dk,map_vel_c,map_in_c,global_dx_halo) private(i,indi,j,indj,p,kk) #endif for(i=0;i<global_N_halo;i++) { if(i>global_N_halo/2) { /* Large frequencies are equivalent to smaller negative ones */ indi=-(global_N_halo-i); }else indi=i; for(j=0;j<global_N_halo;j++) { if(j>global_N_halo/2) { indj=-(global_N_halo-j); }else indj=j; for(p=0;p<=global_N_halo/2;p++) { kk=global_dk*sqrt(indi*indi+indj*indj+p*p); if(kk>0){ //Normalizacao pois a biblioteca fftw3 não tem dx nem global_dk nos integrais map_in_c[i*global_N_halo*(global_N_halo/2+1)+j*(global_N_halo/2+1)+p]=I*(global_dk)*(1/(kk*kk))*map_in_c[i*global_N_halo*(global_N_halo/2+1)+j*(global_N_halo/2+1)+p]*global_dx_halo*global_dx_halo*global_dx_halo/global_L3; map_vel_c[i*global_N_halo*(global_N_halo/2+1)+j*(global_N_halo/2+1)+p]=indi*map_in_c[i*global_N_halo*(global_N_halo/2+1)+j*(global_N_halo/2+1)+p]; }else{ map_vel_c[i*global_N_halo*(global_N_halo/2+1)+j*(global_N_halo/2+1)+p]=0; } } } } box_symmetriesf(map_vel_c,global_N_halo); /* Executes FFT */ fftwf_execute(pc2r); printf("\nWriting v_x field to file...\n");fflush(0); sprintf(fname, "%s/Velocity/vel_x_z0_N%ld_L%d.dat", argv[1],global_N_halo,(int)(global_L)); if((fid=fopen(fname,"wb"))==NULL){ printf("\nThe file cannot be open\n"); return 0; } elem=fwrite(map,sizeof(float),global_N_halo*global_N_halo*global_N_halo,fid); fclose(fid); /********************************************************************/ printf("\nComputing v_y field...\n");fflush(0); #ifdef _OMPTHREAD_ #pragma omp parallel for shared(global_N_halo,global_dk,map_vel_c,map_in_c,global_dx_halo) private(i,indi,j,indj,p,kk) #endif for(i=0;i<global_N_halo;i++) { if(i>global_N_halo/2) { /* Large frequencies are equivalent to smaller negative ones */ indi=-(global_N_halo-i); }else indi=i; for(j=0;j<global_N_halo;j++) { if(j>global_N_halo/2) { indj=-(global_N_halo-j); }else indj=j; for(p=0;p<=global_N_halo/2;p++) { kk=global_dk*sqrt(indi*indi+indj*indj+p*p); if(kk>0){ //Normalizacao pois a biblioteca fftw3 não tem dx nem global_dk nos integrais map_vel_c[i*global_N_halo*(global_N_halo/2+1)+j*(global_N_halo/2+1)+p]=indj*map_in_c[i*global_N_halo*(global_N_halo/2+1)+j*(global_N_halo/2+1)+p]; }else{ map_vel_c[i*global_N_halo*(global_N_halo/2+1)+j*(global_N_halo/2+1)+p]=0; } } } } box_symmetriesf(map_vel_c,global_N_halo); /* Executes FFT */ fftwf_execute(pc2r); printf("\nWriting v_y field to file...\n");fflush(0); sprintf(fname, "%s/Velocity/vel_y_z0_N%ld_L%d.dat", argv[1],global_N_halo,(int)(global_L)); if((fid=fopen(fname,"wb"))==NULL){ printf("\nThe file cannot be open\n"); return 0; } elem=fwrite(map,sizeof(float),global_N_halo*global_N_halo*global_N_halo,fid); fclose(fid); /********************************************************************/ printf("\nComputing v_z field...\n");fflush(0); #ifdef _OMPTHREAD_ #pragma omp parallel for shared(global_N_halo,global_dk,map_vel_c,map_in_c,global_dx_halo) private(i,indi,j,indj,p,kk) #endif for(i=0;i<global_N_halo;i++) { if(i>global_N_halo/2) { /* Large frequencies are equivalent to smaller negative ones */ indi=-(global_N_halo-i); }else indi=i; for(j=0;j<global_N_halo;j++) { if(j>global_N_halo/2) { indj=-(global_N_halo-j); }else indj=j; for(p=0;p<=global_N_halo/2;p++) { kk=global_dk*sqrt(indi*indi+indj*indj+p*p); if(kk>0){ //Normalizacao pois a biblioteca fftw3 não tem dx nem global_dk nos integrais map_vel_c[i*global_N_halo*(global_N_halo/2+1)+j*(global_N_halo/2+1)+p]=p*map_in_c[i*global_N_halo*(global_N_halo/2+1)+j*(global_N_halo/2+1)+p]; }else{ map_vel_c[i*global_N_halo*(global_N_halo/2+1)+j*(global_N_halo/2+1)+p]=0; } } } } box_symmetriesf(map_vel_c,global_N_halo); /* Executes FFT */ fftwf_execute(pc2r); printf("\nWriting v_z field to file...\n");fflush(0); sprintf(fname, "%s/Velocity/vel_z_z0_N%ld_L%d.dat", argv[1],global_N_halo,(int)(global_L)); if((fid=fopen(fname,"wb"))==NULL){ printf("\nThe file cannot be open\n"); return 0; } elem=fwrite(map,sizeof(float),global_N_halo*global_N_halo*global_N_halo,fid); fclose(fid); fftwf_free(map); fftwf_free(map_in_c); fftwf_free(map_vel_c); fftwf_destroy_plan(pc2r); fftwf_destroy_plan(pr2c); exit(0); }