void grav_fft_init() { int xblock2 = XRES/CELL*2; int yblock2 = YRES/CELL*2; int x, y, fft_tsize = (xblock2/2+1)*yblock2; float distance, scaleFactor; fftwf_plan plan_ptgravx, plan_ptgravy; if (grav_fft_status) return; //use fftw malloc function to ensure arrays are aligned, to get better performance th_ptgravx = (float*)fftwf_malloc(xblock2*yblock2*sizeof(float)); th_ptgravy = (float*)fftwf_malloc(xblock2*yblock2*sizeof(float)); th_ptgravxt = (fftwf_complex*)fftwf_malloc(fft_tsize*sizeof(fftwf_complex)); th_ptgravyt = (fftwf_complex*)fftwf_malloc(fft_tsize*sizeof(fftwf_complex)); th_gravmapbig = (float*)fftwf_malloc(xblock2*yblock2*sizeof(float)); th_gravmapbigt = (fftwf_complex*)fftwf_malloc(fft_tsize*sizeof(fftwf_complex)); th_gravxbig = (float*)fftwf_malloc(xblock2*yblock2*sizeof(float)); th_gravybig = (float*)fftwf_malloc(xblock2*yblock2*sizeof(float)); th_gravxbigt = (fftwf_complex*)fftwf_malloc(fft_tsize*sizeof(fftwf_complex)); th_gravybigt = (fftwf_complex*)fftwf_malloc(fft_tsize*sizeof(fftwf_complex)); //select best algorithm, could use FFTW_PATIENT or FFTW_EXHAUSTIVE but that increases the time taken to plan, and I don't see much increase in execution speed plan_ptgravx = fftwf_plan_dft_r2c_2d(yblock2, xblock2, th_ptgravx, th_ptgravxt, FFTW_MEASURE); plan_ptgravy = fftwf_plan_dft_r2c_2d(yblock2, xblock2, th_ptgravy, th_ptgravyt, FFTW_MEASURE); plan_gravmap = fftwf_plan_dft_r2c_2d(yblock2, xblock2, th_gravmapbig, th_gravmapbigt, FFTW_MEASURE); plan_gravx_inverse = fftwf_plan_dft_c2r_2d(yblock2, xblock2, th_gravxbigt, th_gravxbig, FFTW_MEASURE); plan_gravy_inverse = fftwf_plan_dft_c2r_2d(yblock2, xblock2, th_gravybigt, th_gravybig, FFTW_MEASURE); //(XRES/CELL)*(YRES/CELL)*4 is size of data array, scaling needed because FFTW calculates an unnormalized DFT scaleFactor = -M_GRAV/((XRES/CELL)*(YRES/CELL)*4); //calculate velocity map caused by a point mass for (y=0; y<yblock2; y++) { for (x=0; x<xblock2; x++) { if (x==XRES/CELL && y==YRES/CELL) continue; distance = sqrtf(pow(x-(XRES/CELL), 2) + pow(y-(YRES/CELL), 2)); th_ptgravx[y*xblock2+x] = scaleFactor*(x-(XRES/CELL)) / pow(distance, 3); th_ptgravy[y*xblock2+x] = scaleFactor*(y-(YRES/CELL)) / pow(distance, 3); } } th_ptgravx[yblock2*xblock2/2+xblock2/2] = 0.0f; th_ptgravy[yblock2*xblock2/2+xblock2/2] = 0.0f; //transform point mass velocity maps fftwf_execute(plan_ptgravx); fftwf_execute(plan_ptgravy); fftwf_destroy_plan(plan_ptgravx); fftwf_destroy_plan(plan_ptgravy); fftwf_free(th_ptgravx); fftwf_free(th_ptgravy); //clear padded gravmap memset(th_gravmapbig,0,xblock2*yblock2*sizeof(float)); grav_fft_status = 1; }
void HDRImage3c::rfftplanUpdate () { uint2 total = getTotalSize()-uint2(0,2); m_rfftplanR = fftwf_plan_dft_c2r_2d (total.y, total.x, m_red, m_hdriRFFT->getRedBuffer(), FFTW_MEASURE); m_rfftplanG = fftwf_plan_dft_c2r_2d (total.y, total.x, m_green, m_hdriRFFT->getGreenBuffer(), FFTW_MEASURE); m_rfftplanB = fftwf_plan_dft_c2r_2d (total.y, total.x, m_blue, m_hdriRFFT->getBlueBuffer(), FFTW_MEASURE); }
gravity_solver::gravity_solver( unsigned n ) : n_( n ) { //fftwf_init_threads(); //fftwf_plan_with_nthreads(omp_get_max_threads()); data = new fftwf_real[ n_ * (n_+2) ]; force = new fftwf_real[ n_ * (n_+2) ]; cdata = reinterpret_cast<fftwf_complex*>(data); box_ = boxlength; box05_ = 0.5f * boxlength; plan = fftwf_plan_dft_r2c_2d( n_, n_, data, cdata, FFTW_MEASURE ), iplan = fftwf_plan_dft_c2r_2d( n_, n_, cdata, data, FFTW_MEASURE ); UnitLength_in_cm = 3.08568025e24f; // ; 1.0 Mpc UnitMass_in_g = 1.989e43f; // ; 1.0e10 solar masses UnitVelocity_in_cm_per_s = 1e5f; // ; 1 km/sec UnitTime_in_s = UnitLength_in_cm / UnitVelocity_in_cm_per_s; GRAVITY = 6.672e-8f; G = GRAVITY / pow(UnitLength_in_cm, 3) * UnitMass_in_g * pow(UnitTime_in_s, 2); Omega_m = 1.0; //0.276; Omega_L = 0.0; //0.724; aforce = 0.0; stepno=0; }
void ifft2(float *out /* [n1*n2] */, sf_complex *inp /* [nk*n2] */) /*< 2-D inverse FFT >*/ { int i1, i2; #ifdef SF_HAS_FFTW if (NULL==icfg) { icfg = cmplx? fftwf_plan_dft_2d(n2,n1, (fftwf_complex *) dd, (fftwf_complex *) cc[0], FFTW_BACKWARD, FFTW_MEASURE): fftwf_plan_dft_c2r_2d(n2,n1, (fftwf_complex *) dd, ff[0], FFTW_MEASURE); if (NULL == icfg) sf_error("FFTW failure."); } #endif #ifdef SF_HAS_FFTW for (i1=0; i1 < nk*n2; i1++) dd[i1] = inp[i1]; fftwf_execute(icfg); #else for (i1=0; i1 < nk; i1++) { kiss_fft_stride(icfg2,(kiss_fft_cpx *) (inp+i1),ctrace2,nk); for (i2=0; i2<n2; i2++) { tmp[i2][i1] = ctrace2[i2]; } } for (i2=0; i2 < n2; i2++) { if (cmplx) { kiss_fft_stride(icfg1,tmp[i2],(kiss_fft_cpx *) cc[i2],1); } else { kiss_fftri(icfg,tmp[i2],ff[i2]); } } #endif /* FFT centering and normalization */ for (i2=0; i2<n2; i2++) { for (i1=0; i1<n1; i1++) { if (cmplx) { out[i2*n1+i1] = (((i2%2==0)==(i1%2==0))? wt:-wt) * crealf(cc[i2][i1]); } else { out[i2*n1+i1] = (i2%2? -wt: wt)*ff[i2][i1]; } } } }
void fft_prepare(PluginData *pd) { gint w = pd->image_width, h = pd->image_height; gint channel_count = pd->channel_count; int x, y; float **image; guchar *img_pixels; float norm; image = pd->image = (float**) malloc(sizeof(float*) * channel_count); pd->image_freq = (fftwf_complex**) malloc(sizeof(fftwf_complex*) * channel_count); img_pixels = pd->img_pixels = g_new (guchar, w * h * channel_count); //allocate an array for each channel for (int channel = 0; channel < channel_count; channel ++){ image[channel] = (float*) fftwf_malloc(sizeof(float) * w * h); pd->image_freq[channel] = (fftwf_complex*) fftwf_malloc(sizeof(fftwf_complex) * (w/2+1) * h); } // printf("Image data occupies %lu MB.\n", (sizeof(float) * w * h * channel_count) >> 20); // printf("Frequency data occupies %lu MB.\n", (sizeof(fftwf_complex) * (w/2+1) * h * channel_count) >> 20); // forward plan fftwf_plan plan = fftwf_plan_dft_r2c_2d(pd->image_height, pd->image_width, *image, *pd->image_freq, FFTW_ESTIMATE); // inverse plan (to be reused) pd->plan = fftwf_plan_dft_c2r_2d(pd->image_height, pd->image_width, *pd->image_freq, *image, FFTW_ESTIMATE); // set image region to reading mode gimp_pixel_rgn_init (&pd->region, pd->drawable, 0, 0, w, h, FALSE, FALSE); gimp_pixel_rgn_get_rect(&pd->region, img_pixels, 0, 0, w, h); // execute forward FFT once int pw = w/2+1; // physical width float diagonal = sqrt(h*h + w*w)/2.0; norm = 1.0/(w*h); for(int channel=0; channel<channel_count; channel++) { // convert one color channel to float[] for(int i=0; i < w*h; i ++) { image[channel][i] = (float) img_pixels[(i)*channel_count + channel] * norm; } // transform the channel fftwf_execute_dft_r2c(plan, image[channel], pd->image_freq[channel]); for(int i=0; i < w*h; i ++) { image[channel][i] = (float) img_pixels[(i)*channel_count + channel] * norm; } // copy the channel again, for preview for(int i=0; i < w*h; i ++) { image[channel][i] = (float) img_pixels[(i)*channel_count + channel]; } } fftwf_destroy_plan(plan); }
void ifft2_allocate(sf_complex *inp /* [nk*n2] */) /*< allocate inverse transform >*/ { #ifdef SF_HAS_FFTW icfg = cmplx? fftwf_plan_dft_2d(n2,n1, (fftwf_complex *) inp, (fftwf_complex *) cc[0], FFTW_BACKWARD, FFTW_MEASURE): fftwf_plan_dft_c2r_2d(n2,n1, (fftwf_complex *) inp, ff[0], FFTW_MEASURE); if (NULL == icfg) sf_error("FFTW failure."); #endif }
/****** fft_conv ************************************************************ PROTO void fft_conv(float *data1, float *fdata2, int *size) PURPOSE Optimized 2-dimensional FFT convolution using the FFTW library. INPUT ptr to the first image, ptr to the Fourier transform of the second image, image size vector. OUTPUT -. NOTES For data1 and fdata2, memory must be allocated for size[0]* ... * 2*(size[naxis-1]/2+1) floats (padding required). AUTHOR E. Bertin (IAP) VERSION 29/03/2013 ***/ void fft_conv(float *data1, float *fdata2, int *size) { float *fdata1p,*fdata2p, real,imag, fac; int i, npix,npix2; /* Convert axis indexing to that of FFTW */ npix = size[0]*size[1]; npix2 = ((size[0]/2) + 1) * size[1]; /* Forward FFT "in place" for data1 */ if (!fplan) { QFFTWF_MALLOC(fdata1, fftwf_complex, npix2); fplan = fftwf_plan_dft_r2c_2d(size[1], size[0], data1, (fftwf_complex *)fdata1, FFTW_ESTIMATE); } fftwf_execute_dft_r2c(fplan, data1, fdata1); /* Actual convolution (Fourier product) */ fac = 1.0/npix; fdata1p = (float *)fdata1; fdata2p = fdata2; #pragma ivdep for (i=npix2; i--;) { real = *fdata1p **fdata2p - *(fdata1p+1)**(fdata2p+1); imag = *(fdata1p+1)**fdata2p + *fdata1p**(fdata2p+1); *(fdata1p) = fac*real; *(fdata1p+1) = fac*imag; fdata1p+=2; fdata2p+=2; } /* Reverse FFT */ if (!bplan) bplan = fftwf_plan_dft_c2r_2d(size[1], size[0], (fftwf_complex *)fdata1, data1, FFTW_ESTIMATE); fftwf_execute_dft_c2r(bplan, fdata1, data1); // fftwf_execute(plan); return; }
GLFFTWater::GLFFTWater(GLFFTWaterParams ¶ms) { #ifdef _WIN32 m_h = (float *)__mingw_aligned_malloc((sizeof(float)*(params.N+2)*(params.N)), 4); m_dx = (float *)__mingw_aligned_malloc((sizeof(float)*(params.N+2)*(params.N)), 4); m_dz = (float *)__mingw_aligned_malloc((sizeof(float)*(params.N+2)*(params.N)), 4); m_w = (float *)__mingw_aligned_malloc((sizeof(float)*(params.N)*(params.N)), 4); #else posix_memalign((void **)&m_h,4,sizeof(float)*(params.N+2)*(params.N)); posix_memalign((void **)&m_dx,4,sizeof(float)*(params.N+2)*(params.N)); posix_memalign((void **)&m_dz,4,sizeof(float)*(params.N+2)*(params.N)); posix_memalign((void **)&m_w,4,sizeof(float)*(params.N)*(params.N)); #endif m_htilde0 = (fftwf_complex *)fftwf_malloc(sizeof(fftwf_complex)*(params.N)*(params.N)); m_heightmap = new float3[(params.N)*(params.N)]; m_params = params; std::tr1::mt19937 prng(1337); std::tr1::normal_distribution<float> normal; std::tr1::uniform_real<float> uniform; std::tr1::variate_generator<std::tr1::mt19937, std::tr1::normal_distribution<float> > randn(prng,normal); std::tr1::variate_generator<std::tr1::mt19937, std::tr1::uniform_real<float> > randu(prng,uniform); for(int i=0, k=0; i<params.N; i++) { float k_x = (-(params.N-1)*0.5f+i)*(2.f*3.141592654f / params.L); for(int j=0; j<params.N; j++, k++) { float k_y = (-(params.N-1)*0.5f+j)*(2.f*3.141592654f / params.L); float A = randn(); float theta = randu()*2.f*3.141592654f; float P = (k_x==0.f && k_y==0.0f) ? 0.f : sqrtf(phillips(k_x,k_y,m_w[k])); m_htilde0[k][0] = m_htilde0[k][1] = P*A*sinf(theta); } } m_kz = new float[params.N*(params.N / 2 + 1)]; m_kx = new float[params.N*(params.N / 2 + 1)]; const int hN = m_params.N / 2; for(int y=0; y<m_params.N; y++) { float kz = (float) (y - hN); for(int x=0; x<=hN; x++) { float kx = (float) (x - hN); float k = 1.f/sqrtf(kx*kx+kz*kz); m_kz[y*(hN+1)+x] = kz*k; m_kx[y*(hN+1)+x] = kx*k; } } if(!fftwf_init_threads()) { cerr << "Error initializing multithreaded fft." << endl; } else { fftwf_plan_with_nthreads(2); } m_fftplan = fftwf_plan_dft_c2r_2d(m_params.N, m_params.N, (fftwf_complex *)m_h, m_h, FFTW_ESTIMATE); glGenTextures(1, &m_texId); glBindTexture(GL_TEXTURE_2D, m_texId); glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB16F, params.N, params.N, 0, GL_RGB, GL_FLOAT, 0); glBindTexture(GL_TEXTURE_2D, 0); }
void cosmo_init_particles( unsigned seed ) { fftwf_real *data = new fftwf_real[ nres * (nres+2) ]; fftwf_complex *cdata = reinterpret_cast<fftwf_complex*>(data); fftwf_real *data2 = new fftwf_real[ nres * (nres+2) ]; fftwf_complex *cdata2 = reinterpret_cast<fftwf_complex*>(data2); gsl_rng *RNG = gsl_rng_alloc( gsl_rng_mt19937 ); gsl_rng_set( RNG, seed ); fftwf_plan plan, iplan, plan2, iplan2; plan = fftwf_plan_dft_r2c_2d( nres, nres, data, cdata, FFTW_MEASURE ), iplan = fftwf_plan_dft_c2r_2d( nres, nres, cdata, data, FFTW_MEASURE ); plan2 = fftwf_plan_dft_r2c_2d( nres, nres, data2, cdata2, FFTW_MEASURE ), iplan2 = fftwf_plan_dft_c2r_2d( nres, nres, cdata2, data2, FFTW_MEASURE ); ///////////////////////////////// int nresp = nres/2+1; float kfac = 2.0*M_PI/boxlength; float gaussran1, gaussran2; float fftnorm = 1.0f / (float)nres * (2.0f*M_PI/boxlength); for( int i=0; i<nres; ++i ) for( int j=0; j<nres; ++j ) { int idx = i*(nres+2)+j; data[idx] = gsl_ran_ugaussian_ratio_method( RNG ) / nres; } fftwf_execute( plan ); ///////////////////////////////// for( int i=0; i<nres; ++i ) for( int j=0; j<nresp; ++j ) { float kx = i>=nresp? (float)(i-nres)*kfac : (float)i*kfac; float ky = (float)j*kfac; float kk = sqrtf(kx*kx+ky*ky); int idx = i*nresp+j; float ampk = cosmo_get_amp_k( kk ); //*sqrtf(kk); if( kk >= nresp*kfac ) ampk = 0.0; cdata[idx][0] *= ampk * fftnorm; cdata[idx][1] *= ampk * fftnorm; } // insert code to make random numbers independent of resolution (have rectangle outliens) float dx = boxlength / nres; float vfact = ComputeVFact( 1.0f/(1.0f+g_zstart)); ///////////////////////////////// // generate x-component for( int i=0; i<nres; ++i ) for( int j=0; j<nresp; ++j ) { float kx = i>=nresp? (float)(i-nres)*kfac : (float)i*kfac; float ky = (float)j*kfac; float kk = sqrtf(kx*kx+ky*ky); int idx = i*nresp+j; // (a+ib) * ik = iak -bk cdata2[idx][0] = kx/kk/kk * cdata[idx][1]; cdata2[idx][1] = -kx/kk/kk * cdata[idx][0]; } cdata2[0][0] = 0.0f; cdata2[0][1] = 0.0f; fftwf_execute( iplan2 ); for( int i=0; i<nres; ++i ) for( int j=0; j<nres; ++j ) { int idx = i*(nres+2)+j; int ii = i*nres+j; P[ii].x = (float)i*dx + data2[idx]; P[ii].vx = data2[idx] * vfact; P[ii].id = ii; P[ii].acc[0] = 0.0f; } ///////////////////////////////// // generate y-component for( int i=0; i<nres; ++i ) for( int j=0; j<nresp; ++j ) { float kx = i>=nresp? (float)(i-nres)*kfac : (float)i*kfac; float ky = (float)j*kfac; float kk = sqrtf(kx*kx+ky*ky); int idx = i*nresp+j; cdata2[idx][0] = ky/kk/kk * cdata[idx][1]; cdata2[idx][1] = -ky/kk/kk * cdata[idx][0]; } cdata2[0][0] = 0.0f; cdata2[0][1] = 0.0f; fftwf_execute( iplan2 ); for( int i=0; i<nres; ++i ) for( int j=0; j<nres; ++j ) { int idx = i*(nres+2)+j; int ii = i*nres+j; P[ii].y = (float)j*dx + data2[idx]; P[ii].vy = data2[idx] * vfact; P[ii].acc[1] = 0.0f; } ///////////////////////////////// delete[] data; delete[] data2; fftwf_destroy_plan(plan); fftwf_destroy_plan(iplan); fftwf_destroy_plan(plan2); fftwf_destroy_plan(iplan2); gsl_rng_free( RNG ); }
void SetFastFFT(float *buf, DIM nsam) { plan_fft_fast=fftwf_plan_dft_r2c_2d(nsam.y,nsam.x,buf,reinterpret_cast<fftwf_complex *>(buf),FFTW_ESTIMATE); plan_ifft_fast=fftwf_plan_dft_c2r_2d(nsam.y,nsam.x,reinterpret_cast<fftwf_complex *>(buf),buf,FFTW_ESTIMATE); }
void ifft2d(float* buf, DIM nsam) { fftwf_plan plan_fft=fftwf_plan_dft_c2r_2d(nsam.y,nsam.x,reinterpret_cast<fftwf_complex *>(buf),buf,FFTW_ESTIMATE); fftwf_execute(plan_fft); fftwf_destroy_plan(plan_fft); }
int ComWallFrame::action(IDS* main) { int x,y,xo,yo, Y; Kinect::depth_buffer* dframe = main->getDepth(); Kinect* kinect = main->getKinect(); Minotaur* minotaur = main->getMinotaur(); Minotaur::MinotaurState minostate = minotaur->getState(); Point p3d[8][8]; Point avg3d; Point avgbar_flat; int valid; float zvariance, xvariance, yvariance, xSS, ySS, xybar, xzbar, yzbar; float slopeyx, slopezx, slopezy; float yint, zxint, zyint; float resid_yx, resid_zx, resid_zy; uint8_t r,g,b; uint16_t d, d0, d1; float fd; float floor_height = 0; int floor_count = 0; float rx, ry, rz; float sin_ori = sin(minostate.orient); float cos_ori = cos(minostate.orient); float origin_dist; float avg_dist; float orient_yx; uint32_t count, max_count = 0; Wall avg_walls[480/8/WALL_AVG_SIZE][640/8/WALL_AVG_SIZE][WALL_AVG_SIZE][WALL_AVG_SIZE]; bool valid_walls[480/8/WALL_AVG_SIZE][640/8/WALL_AVG_SIZE][WALL_AVG_SIZE][WALL_AVG_SIZE]; int nslope = 480; int nodist = 256; int nodist_half = nodist / 2 + 1; float fft_data[nslope][nodist]; fftwf_complex* fft_out = (fftwf_complex*) fftwf_malloc(sizeof(fftwf_complex)*nslope*nodist_half); float avg_slope, avg_yint; int fail_yx_res = 0; int fail_zx_res = 0; int fail_zy_res = 0; int fail_floor_check1 = 0; int fail_floor_check2 = 0; int fail_floor_check3 = 0; int fail_floor_check4 = 0; bool wall_check1, wall_check2, wall_check3; bool floor_check1, floor_check2, floor_check3, floor_check4; if(main->getDepthCount() <= 0) { std::cerr << "MapFrame awaiting depth data" << std::endl; return 1; } for(y = 0; y < 480/8/WALL_AVG_SIZE; y++) for(x = 0; x < 640/8/WALL_AVG_SIZE; x++) for(yo = 0; yo < WALL_AVG_SIZE; yo++) for(xo = 0; xo < WALL_AVG_SIZE; xo++) valid_walls[y][x][yo][xo] = false; for(x = 0; x < nodist; x++) for(y = 0; y < nslope; y++) fft_data[y][x] = fft_data[y][x] = 0; for(y = 0; y < 480/8; y++) { for(x = 0; x < 640/8; x++) { avg3d = {0,0,0}; avg_dist = 0; valid = 0; xybar = 0; for(yo = 0; yo < 8; yo++) { for(xo = 0; xo < 8; xo++) { d0 = (*dframe)[y*8+yo][x*8+xo][0]; d1 = (*dframe)[y*8+yo][x*8+xo][1]; d = d1; d = d << 8 | d0; if(d != 0x07FF && d <= KINECT_CALIB_DOFF) { fd = decode_kinect_dist[d]; avg_dist += fd; rx = kinect->x3d(x,y,xo,yo,fd); ry = kinect->y3d(x,y,xo,yo,fd); rz = kinect->z3d(x,y,xo,yo,fd); p3d[yo][xo].x = rx * cos_ori - ry * sin_ori + minostate.x; p3d[yo][xo].y = rx * sin_ori + ry * cos_ori + minostate.y; p3d[yo][xo].z = rz; avg3d.x += p3d[yo][xo].x; avg3d.y += p3d[yo][xo].y; avg3d.z += p3d[yo][xo].z; valid_points[yo][xo] = true; ++valid; }else valid_points[yo][xo] = false; //p3d[yo][xo].valid = false; } } avg3d.x /= valid; avg3d.y /= valid; avg3d.z /= valid; if(valid <= (8*8)*3/4) { //Not enough data to represent the points r = 0x00; g = 0x00; b = 0x00; }else{ //Calculate statistics for slope calculation zvariance = 0; xvariance = 0; yvariance = 0; xSS = 0, ySS = 0; xybar = 0, xzbar = 0, yzbar = 0; for(yo = 0; yo < 8; yo++) for(xo = 0; xo < 8; xo++) { if(valid_points[yo][xo]) { xvariance += quick_square(p3d[yo][xo].x-avg3d.x); yvariance += quick_square(p3d[yo][xo].y-avg3d.y); zvariance += quick_square(p3d[yo][xo].z-avg3d.z); xSS += quick_square(p3d[yo][xo].x); ySS += quick_square(p3d[yo][xo].y); xybar += p3d[yo][xo].x * p3d[yo][xo].y; xzbar += p3d[yo][xo].x * p3d[yo][xo].z; yzbar += p3d[yo][xo].y * p3d[yo][xo].z; } } xybar /= valid; xzbar /= valid; yzbar /= valid; xSS /= valid; ySS /= valid; slopeyx = (xybar - avg3d.x * avg3d.y) / (xSS - quick_square(avg3d.x)); slopezx = (xzbar - avg3d.x * avg3d.z) / (xSS - quick_square(avg3d.x)); slopezy = (yzbar - avg3d.y * avg3d.z) / (ySS - quick_square(avg3d.y)); yint = avg3d.y - slopeyx * avg3d.x; zxint = avg3d.z - slopezx * avg3d.x; zyint = avg3d.z - slopezy * avg3d.y; resid_yx = 0; resid_zx = 0; resid_zy = 0; for(yo = 0; yo < 8; yo++) for(xo = 0; xo < 8; xo++) if(valid_points[yo][xo]) { resid_yx += quick_square((p3d[yo][xo].y - slopeyx * p3d[yo][xo].x - yint)); resid_zx += quick_square((p3d[yo][xo].z - slopezx * p3d[yo][xo].x - zxint)); resid_zy += quick_square((p3d[yo][xo].z - slopezy * p3d[yo][xo].y - zyint)); } floor_check1 = fabs(atan(slopezx)) < 0.262; floor_check2 = fabs(atan(slopezy)) < 0.262; floor_check3 = resid_zx * 50000 < valid * quick_square(avg_dist/100); floor_check4 = resid_zy * 50000 < valid * quick_square(avg_dist/100); if(floor_check1 && floor_check2 && floor_check3 && floor_check4) { //Floor or ceiling at a constant height from Kinect if(avg3d.z < -800 && avg3d.z > -1600) { r = 0xFF; g = 0xFF; b = 0xFF; floor_height += avg3d.z; floor_count++; }else{ r = 0xFF; g = 0x00; b = 128 + avg3d.z / 12 / 100 * 256; } }else{ //Wall or non-plane //r = std::min<int>(std::max<int>(resid_yx*20,0),255); wall_check1 = resid_yx * 1000 < valid * quick_square(avg_dist/100); if(wall_check1 && !floor_check3 && !floor_check4) { //Using minimum distance to robot point location for hashing, less likely to be out of range. origin_dist = (slopeyx * minostate.x - minostate.y + yint) / sqrt(quick_square(slopeyx)+1); orient_yx = fmod((atan(slopeyx) + PI / 2),PI); fft_data[(int)(orient_yx / PI * nslope / 2)][(int)(origin_dist/100) + nodist/4]++; fft_data[(int)(orient_yx / PI * nslope / 2 + nslope / 2)][(int)(origin_dist/100) + nodist/4]++; avg_walls[y/WALL_AVG_SIZE][x/WALL_AVG_SIZE][y % WALL_AVG_SIZE][x % WALL_AVG_SIZE] = Wall(slopeyx, yint); valid_walls[y/WALL_AVG_SIZE][x/WALL_AVG_SIZE][y % WALL_AVG_SIZE][x % WALL_AVG_SIZE] = true; r = 0; g = 255-std::min<int>(std::max<int>(orient_yx / PI * 256,0),255);//std::min<int>(std::max<int>(yint*20+128,0),255); b = std::min<int>(std::max<int>(orient_yx / PI * 256,0),255); }else{ r = g = b = 0x80; if(!wall_check1) fail_yx_res++; if(!floor_check1) fail_floor_check1++; if(!floor_check2) fail_floor_check2++; if(!floor_check3) fail_floor_check3++; if(!floor_check4) fail_floor_check4++; } } } /* for(yo = 0; yo < 8; yo++) for(xo = 0; xo < 8; xo++) { frame[y*8+yo][x*8+xo][0] = r; frame[y*8+yo][x*8+xo][1] = g; frame[y*8+yo][x*8+xo][2] = b; }*/ } } //std::cerr << fail_yx_res << " " << fail_floor_check1 << " " << fail_floor_check2 << " " << fail_floor_check3 << " " << fail_floor_check4 << std::endl; fftwf_plan fft = fftwf_plan_dft_r2c_2d(nslope, nodist, &(fft_data[0][0]), fft_out, FFTW_ESTIMATE); fftwf_execute(fft); fftwf_destroy_plan(fft); float mag; float stddev_x, stddev_y; float var_x, var_y; float mean_x, mean_y; float filter_x, filter_y; float coeff_x, coeff_y; stddev_x = 2; stddev_y = 2; mean_x = 0; mean_y = nslope / 2; var_x = quick_square(x); var_y = quick_square(y); coeff_x = 1 / (stddev_x * sqrt(2*PI)) / 0.4; coeff_y = 1 / (stddev_y * sqrt(2*PI)) / 0.4; for(y = 0; y < nslope; y++) { Y = (nslope / 2 + y) % nslope; // filter_y = coeff_y * exp(-1 * quick_square(mean_y - y) / (2*var_y)); for(x = 0; x < nodist_half; x++) { /*filter_x = fabs(coeff_x * exp(-1 * quick_square(mean_x - x) / (2*var_x))); fft_out[Y*nodist_half+x][0] *= filter_x * filter_y; fft_out[Y*nodist_half+x][1] *= filter_x * filter_y; continue;*/ if(abs(y - nslope / 2) >= 8 || x >= 8) { fft_out[Y*nodist_half+x][0] = 0; fft_out[Y*nodist_half+x][1] = 0; }else{ mag = sqrt(quick_square(fft_out[Y*nodist_half+x][0]) + quick_square(fft_out[Y*nodist_half+x][1])); /* frame[y][x][0] = mag / fft_out[0][0]*256; frame[y][x][1] = mag / fft_out[0][0]*256; frame[y][x][2] = mag / fft_out[0][0]*256;*/ } } } fft = fftwf_plan_dft_c2r_2d(nslope, nodist, fft_out, &(fft_data[0][0]), FFTW_ESTIMATE); fftwf_execute(fft); fftwf_destroy_plan(fft); float max_mag = 0, maxgrad; int maxgradid; std::set< Wall > walls; std::set< Wall >::iterator it_walls; for(y = 0; y < nslope; y++) { for(x = 0; x < nodist; x++) { mag = fft_data[y][x]; if(mag > max_mag) max_mag = mag; } } for(y = 0; y < nslope; y++) { for(x = 0; x < nodist; x++) { maxgrad = 0; maxgradid = 0; for(yo = -1; yo <= 1; yo++) for(xo = -1; xo <= 1; xo++) if(fft_data[y + yo][x + xo] > maxgrad) { maxgrad = fft_data[y + yo][x + xo]; maxgradid = yo * 3 + xo; } mag = std::max<float>(fft_data[y][x],0); if(maxgradid != 0) { frame[y][x][0] = mag / max_mag * 255; frame[y][x][1] = mag / max_mag * 255; frame[y][x][2] = mag / max_mag * 255; }else if(abs(y - nslope/2) <= nslope/4){ frame[y][x][0] = mag / max_mag * 255; frame[y][x][1] = 0; frame[y][x][2] = 0; if(mag > 125893) //10 ** 5.1 walls.insert(Wall(fmod((float)y / nslope * 2 * PI,PI) - PI / 2,(float)x - nodist / 4.0)); } } } for(it_walls = walls.begin(); it_walls != walls.end(); it_walls++) { std::cerr << " " << it_walls->orient / PI; std::cerr << " " << it_walls->yint; std::cerr << " " << log10(max_mag); std::cerr << std::endl; } std::cerr << std::endl; fftwf_free(fft_out); /* float prev_count = fft_data[255] > 3000 ? fft_data[255] : -1; float prev_count_2 = fft_data[254] > 3000 ? fft_data[254] : -1; for(y = 0; y < 256; y++) { if(fft_data[y] > 3000) { if(prev_count != -1 && prev_count > fft_data[y] && prev_count_2 < prev_count && prev_count_2 != -1) std::cerr << (y-128)*(1/81.487330864) << "\t" << fft_data[y] << std::endl; prev_count_2 = prev_count; prev_count = fft_data[y]; }else{ prev_count = -1; } } std::cerr << std::endl;*/ return 0; }
// store translations into transMap void storeTrans(ImgFetcher &fetcher, const Point2f &absHint, PairToTransData &transMap, const MaxDists &dists) { vector<GridPtOff> imOffs; if (fetcher.row_major) { imOffs.push_back(makeOff(-1, 0)); imOffs.push_back(makeOff(-1, -1)); imOffs.push_back(makeOff(0, -1)); imOffs.push_back(makeOff(1, -1)); } else { imOffs.push_back(makeOff(0, -1)); imOffs.push_back(makeOff(-1, -1)); imOffs.push_back(makeOff(-1, 0)); imOffs.push_back(makeOff(-1, 1)); } map<PtPair, shared_future<TransData>> pairToTransFut; map<GridPt, shared_future<FFTHolder>> ptToFFTFut; unsigned loaded = 0; GridPt fixPt = {{0, 0}}; GridPt waitPt = {{0, 0}}; Mat cur; fetcher.getMat(fixPt, cur); Size imSz = cur.size(); unsigned fftLen = getFFTLen(imSz); map<GridPtOff, Mat> hintToMask; storeHintToMask(hintToMask, imSz, absHint, dists); float *tmp = (float *)fftwf_malloc_thr(sizeof(float) * fftLen); fftwf_plan r2cPlan = fftwf_plan_dft_r2c_2d(imSz.height, imSz.width, tmp, (fftwf_complex *)tmp, FFTW_MEASURE); fftwf_plan c2rPlan = fftwf_plan_dft_c2r_2d(imSz.height, imSz.width, (fftwf_complex *)tmp, tmp, FFTW_MEASURE); fftwf_free_thr(tmp); bool readDone = false; while (true) { //a dirty kind of event loop if (loaded > fetcher.cap || readDone) { // printf("start free waitPt %d %d\n", waitPt[0], waitPt[1]); // free oldest image, at waitPt for (auto &off: imOffs) { // *subtract* offset to avoid duplicating pairs GridPt nbrPt = {{waitPt[0] - off[0], waitPt[1] - off[1]}}; if (ptInGrid(nbrPt, fetcher)) { PtPair pair = {{waitPt, nbrPt}}; shared_future<TransData> transFut; if (!lookupPair(pairToTransFut, pair, transFut)) { printf("err: future of pair %d %d to %d %d not found\n", pair[0][0], pair[0][1], pair[1][0], pair[1][1]); exit(1); } transMap.emplace(pair, transFut.get()); pairToTransFut.erase(pair); } } fftwf_free_thr(ptToFFTFut[waitPt].get().fft); ptToFFTFut.erase(waitPt); if (!nextCoor(waitPt, fetcher)) { break; } loaded--; } if (!readDone) { //printf("emplace fft at %d %d\n", fixPt[0], fixPt[1]); fetcher.getMat(fixPt, cur); // fft only supports 32-bit float with even width, for now assert(cur.type() == CV_32FC1 && (int)cur.step[0] == cur.size().width * 4 && cur.step[1] == 4 && cur.size().width % 2 == 0); assert(cur.isContinuous()); ptToFFTFut.emplace(fixPt, async(launch::async, [&r2cPlan, &absHint](Mat im) { return FFTHolder(im, absHint, r2cPlan); }, cur )); for (auto &off: imOffs) { GridPt nbrPt = {{fixPt[0] + off[0], fixPt[1] + off[1]}}; if (ptInGrid(nbrPt, fetcher)) { PtPair pair = {{fixPt, nbrPt}}; // printf("emplace pair transfut %d %d, %d %d\n", pair[0][0], pair[0][1], pair[1][0], pair[1][1]); // needed since VS2012 async() can't take functions with too many arguments :( shared_future<FFTHolder> &a = ptToFFTFut[fixPt]; shared_future<FFTHolder> &b = ptToFFTFut[nbrPt]; pairToTransFut.emplace(pair, async(launch::async, [=] { return phaseCorrThr(a, b, c2rPlan, pair, absHint, hintToMask, imSz); })); } } loaded++; if (!nextCoor(fixPt, fetcher)) { readDone = true; } } } fftwf_destroy_plan(r2cPlan); fftwf_destroy_plan(c2rPlan); }
void MultiAdaptationCSF::process( BidomainArray2D *in, BidomainArray2D *out, BidomainArray2D *adaptationMap ) { const int cols = in->getCols(), rows = in->getRows(); assert( cols == adaptationMap->getCols() ); assert( rows == adaptationMap->getRows() ); const FFTWComplexArray *freqOriginal = in->getFrequency(); FFTWComplexArray freqFiltered( cols, rows ); FFTWArray2D spatialTemp( cols, rows ); fftwf_plan inverseFFT = fftwf_plan_dft_c2r_2d( rows, cols, freqFiltered.getData(), spatialTemp.getData(), FFTW_ESTIMATE ); // MEASURE would damage the data //NOT compatible with new Cygwin version of gcc. //pfs::Array2DImpl **filteredImage = new (pfs::Array2DImpl*)[adaptationLevelsCount]; // Results of filtering in spatial domain are stored there pfs::Array2DImpl **filteredImage = new pfs::Array2DImpl*[adaptationLevelsCount]; for( int i = 0; i < adaptationLevelsCount; i++ ) { // For each adaptation level filterFFTW( freqOriginal->getData(), freqFiltered.getData(), cols, rows, filters[i] ); // dumpPFS( "fft_image.pfs", freqFiltered, cols/2+1, rows, "Y" ); fftwf_execute(inverseFFT); // Copy to filteredImage and normalize filteredImage[i] = new pfs::Array2DImpl( cols, rows ); for( int pix = 0; pix < cols*rows; pix++ ) (*filteredImage[i])(pix) = spatialTemp(pix)/(cols*rows); // // Some debug info // char buf[100]; // sprintf( buf, "csf_filtered_%g.pfs", adaptationLevels[i] ); // dumpPFS( buf, filteredImage[i], "Y" ); std::cerr << "."; } std::cerr << "\n"; const pfs::Array2D *adaptationMapArray = adaptationMap->getSpatial(); pfs::Array2D *outA = out->setSpatial(); // output array // Linear intepolation between adaptation levels { int ind = 0; for( int ind = 0; ind < rows*cols; ind++ ) { float adapt = (*adaptationMapArray)( ind ); if( adapt < adaptationLevels[0] ) (*outA)(ind) = (*filteredImage[0])(ind); else if( adapt > adaptationLevels[adaptationLevelsCount-1] ) (*outA)(ind) = (*filteredImage[adaptationLevelsCount-1])(ind); else { // interpolate int l; for( l = 1; l < adaptationLevelsCount; l++ ) if(adapt <= adaptationLevels[l]) break; assert( l > 0 && l < adaptationLevelsCount ); (*outA)(ind) = (*filteredImage[l-1])(ind) + ((*filteredImage[l])(ind)-(*filteredImage[l-1])(ind))* (adapt-adaptationLevels[l-1])/(adaptationLevels[l]-adaptationLevels[l-1]); } } } // dumpPFS( "after_csf.pfs", in, "Y" ); // Clean up for( int i = 0; i < adaptationLevelsCount; i++ ) delete filteredImage[i]; delete[] filteredImage; fftwf_destroy_plan(inverseFFT); }
static gboolean focusblur_fft_buffer_update_work (FblurFftBuffer *fft, gint radius) { gint row, col; row = fft->source.width + 2 * radius; col = fft->source.height + 2 * radius; if (fft->work.buffers) { g_warning ("buffer hadn't been cleared."); focusblur_fft_work_free_buffers (fft); } if (fft->work.image && row == fft->work.row && col == fft->work.col) { if (radius != fft->work.space) { fft->work.space = radius; fft->work.origin = (fft->work.col_padded + 1) * radius; fft->work.level = 0; } return TRUE; } focusblur_fft_buffer_clear_work (fft); fft->work.row = row; fft->work.col = col; fft->work.col_padded = (col + 2) & ~1; fft->work.nelements = row * fft->work.col_padded; fft->work.complex_nelements = fft->work.nelements / 2; fft->work.size = sizeof (fftwf_complex) * fft->work.complex_nelements; /* 32-bytes pair (4x complex or 8x real) processing */ fft->work.size += 31; fft->work.size &= ~31; /* fftwf_malloc() (or distributed package) is broken. */ fft->work.image = fftwf_malloc (fft->work.size); fft->work.kernel = fftwf_malloc (fft->work.size); if (! fft->work.image || ! fft->work.kernel) { focusblur_fft_buffer_clear_work (fft); return FALSE; } fft->work.plan_r2c = fftwf_plan_dft_r2c_2d (row, col, (gfloat *) fft->work.image, fft->work.image, FFTW_ESTIMATE); fft->work.plan_c2r = fftwf_plan_dft_c2r_2d (row, col, fft->work.image, (gfloat *) fft->work.image, FFTW_ESTIMATE); if (! fft->work.plan_r2c || ! fft->work.plan_c2r) { focusblur_fft_buffer_clear_work (fft); return FALSE; } fft->work.space = radius; fft->work.origin = (fft->work.col_padded + 1) * radius; fft->work.level = 0; return TRUE; }
int main (int argc, char *argv[]) { bool verb, snap; bool abc, adj; int nz, nx, nt, ns, nr; float dz, dx, dt, oz, ox; int nz0, nx0, nb; float oz0, ox0; int nkz, nkx; int nzpad, nxpad; float **u1, **u0; float *ws, *wr; sf_file file_src = NULL, file_rec = NULL; sf_file file_inp = NULL, file_out = NULL; sf_file file_mdl = NULL; sf_axis az = NULL, ax = NULL, at = NULL, as = NULL, ar = NULL; pt2d *src2d = NULL; pt2d *rec2d = NULL; scoef2d cssinc = NULL; scoef2d crsinc = NULL; float *wi = NULL, *wo = NULL; sf_axis ai = NULL, ao = NULL; scoef2d cisinc = NULL, cosinc = NULL; bool spt = false, rpt = false; bool ipt = false, opt = false; sf_init(argc, argv); if (!sf_getbool("verb", &verb)) verb = false; if (!sf_getbool("snap", &snap)) snap = false; if (!sf_getbool("adj", &adj)) adj = false; if (!sf_getint("nb", &nb)) nb = 4; if (sf_getstring("sou") != NULL) { spt = true; if (adj) opt = true; else ipt = true; } if (sf_getstring("rec") != NULL) { rpt = true; if (adj) ipt = true; else opt = true; } file_inp = sf_input("in"); file_mdl = sf_input("model"); if (spt) file_src = sf_input("sou"); if (rpt) file_rec = sf_input("rec"); file_out = sf_output("out"); if (ipt) at = sf_iaxa(file_inp, 2); else at = sf_iaxa(file_inp, 3); if (spt) as = sf_iaxa(file_src, 2); if (rpt) ar = sf_iaxa(file_rec, 2); az = sf_iaxa(file_mdl, 1); ax = sf_iaxa(file_mdl, 2); nt = sf_n(at); dt = sf_d(at); //ot = sf_o(at); nz0 = sf_n(az); dz = sf_d(az); oz0 = sf_o(az); nx0 = sf_n(ax); dx = sf_d(ax); ox0 = sf_o(ax); if (spt) ns = sf_n(as); if (rpt) nr = sf_n(ar); nz = nz0 + 2 * nb; nx = nx0 + 2 * nb; oz = oz0 - nb * dz; ox = ox0 - nb * dx; abc = nb ? true : false; // sf_error("ox=%f ox0=%f oz=%f oz0=%f",ox,ox0,oz,oz0); nzpad = kiss_fft_next_fast_size( ((nz+1)>>1)<<1 ); nkx = nxpad = kiss_fft_next_fast_size(nx); nkz = nzpad / 2 + 1; /* float okx = - 0.5f / dx; */ float okx = 0.f; float okz = 0.f; float dkx = 1.f / (nxpad * dx); float dkz = 1.f / (nzpad * dz); float **vp, **eps, **del; vp = sf_floatalloc2(nz, nx); eps = sf_floatalloc2(nz, nx); del = sf_floatalloc2(nz, nx); float **tmparray = sf_floatalloc2(nz0, nx0); sf_floatread(tmparray[0], nz0*nx0, file_mdl); expand2d(vp[0], tmparray[0], nz, nx, nz0, nx0); sf_floatread(tmparray[0], nz0*nx0, file_mdl); expand2d(eps[0], tmparray[0], nz, nx, nz0, nx0); sf_floatread(tmparray[0], nz0*nx0, file_mdl); expand2d(del[0], tmparray[0], nz, nx, nz0, nx0); float **vn, **vh; float **eta, **lin_eta; lin_eta = NULL, vh = NULL; vn = sf_floatalloc2(nz, nx); vh = sf_floatalloc2(nz, nx); eta = sf_floatalloc2(nz, nx); lin_eta = sf_floatalloc2(nz, nx); for (int ix=0; ix<nx; ix++) { for (int iz=0; iz<nz; iz++){ vp[ix][iz] *= vp[ix][iz]; vn[ix][iz] = vp[ix][iz] * (1.f + 2.f * del[ix][iz]); vh[ix][iz] = vp[ix][iz] * (1.f + 2.f * eps[ix][iz]); eta[ix][iz] = (eps[ix][iz] - del[ix][iz]) / (1.f + 2.f * del[ix][iz]); lin_eta[ix][iz] = eta[ix][iz] * (1.f + 2.f * del[ix][iz]); } } float *kx = sf_floatalloc(nkx); float *kz = sf_floatalloc(nkz); for (int ikx=0; ikx<nkx; ++ikx) { kx[ikx] = okx + ikx * dkx; /* if (ikx >= nkx/2) kx[ikx] = (nkx - ikx) * dkx; */ if (ikx >= nkx/2) kx[ikx] = (ikx - nkx) * dkx; kx[ikx] *= 2 * SF_PI; kx[ikx] *= kx[ikx]; } for (int ikz=0; ikz<nkz; ++ikz) { kz[ikz] = okz + ikz * dkz; kz[ikz] *= 2 * SF_PI; kz[ikz] *= kz[ikz]; } if (adj) { ai = ar; ao = as; } else { ai = as; ao = ar; } if (opt) { sf_oaxa(file_out, ao, 1); sf_oaxa(file_out, at, 2); } else { sf_oaxa(file_out, az, 1); sf_oaxa(file_out, ax, 2); sf_oaxa(file_out, at, 3); } sf_fileflush(file_out, NULL); if (spt) { src2d = pt2dalloc1(ns); pt2dread1(file_src, src2d, ns, 2); cssinc = sinc2d_make(ns, src2d, nz, nx, dz, dx, oz, ox); ws = sf_floatalloc(ns); if (adj) { cosinc = cssinc; wo = ws; } else { cisinc = cssinc; wi = ws; } } if (rpt) { rec2d = pt2dalloc1(nr); pt2dread1(file_rec, rec2d, nr, 2); crsinc = sinc2d_make(nr, rec2d, nz, nx, dz, dx, oz, ox); wr = sf_floatalloc(nr); if (adj) { cisinc = crsinc; wi = wr; } else { cosinc = crsinc; wo = wr; } } u0 = sf_floatalloc2(nz, nx); u1 = sf_floatalloc2(nz, nx); float *rwave = (float *) fftwf_malloc(nzpad*nxpad*sizeof(float)); float *rwavem = (float *) fftwf_malloc(nzpad*nxpad*sizeof(float)); fftwf_complex *cwave = (fftwf_complex *) fftwf_malloc(nkz*nkx*sizeof(fftwf_complex)); fftwf_complex *cwavem = (fftwf_complex *) fftwf_malloc(nkz*nkx*sizeof(fftwf_complex)); /* float *rwavem = (float *) fftwf_malloc(nzpad*nxpad*sizeof(float)); fftwf_complex *cwave = (fftwf_complex *) fftwf_malloc(nkz*nkx*sizeof(fftwf_complex)); fftwf_complex *cwavem = (fftwf_complex *) fftwf_malloc(nkz*nkx*sizeof(fftwf_complex)); */ /* boundary conditions */ float **ucut = NULL; float *damp = NULL; if (!(ipt &&opt)) ucut = sf_floatalloc2(nz0, nx0); damp = damp_make(nb); float wt = 1./(nxpad * nzpad); wt *= dt * dt; fftwf_plan forward_plan; fftwf_plan inverse_plan; #ifdef _OPENMP #ifdef SF_HAS_FFTW_OMP fftwf_init_threads(); fftwf_plan_with_nthreads(omp_get_max_threads()); #endif #endif forward_plan = fftwf_plan_dft_r2c_2d(nxpad, nzpad, rwave, cwave, FFTW_MEASURE); #ifdef _OPENMP #ifdef SF_HAS_FFTW_OMP fftwf_plan_with_nthreads(omp_get_max_threads()); #endif #endif inverse_plan = fftwf_plan_dft_c2r_2d(nxpad, nzpad, cwavem, rwavem, FFTW_MEASURE); int itb, ite, itc; if (adj) { itb = nt -1; ite = -1; itc = -1; } else { itb = 0; ite = nt; itc = 1; } if (adj) { for (int it=0; it<nt; it++) { if (opt) sf_floatwrite(wo, sf_n(ao), file_out); else sf_floatwrite(ucut[0], nz0*nx0, file_out); } sf_seek(file_out, 0, SEEK_SET); } float **ptrtmp = NULL; memset(u0[0], 0, sizeof(float)*nz*nx); memset(u1[0], 0, sizeof(float)*nz*nx); memset(rwave, 0, sizeof(float)*nzpad*nxpad); memset(rwavem, 0, sizeof(float)*nzpad*nxpad); memset(cwave, 0, sizeof(float)*nkz*nkx*2); memset(cwavem, 0, sizeof(float)*nkz*nkx*2); for (int it=itb; it!=ite; it+=itc) { if (verb) sf_warning("it = %d;",it); #ifdef _OPENMP double tic = omp_get_wtime(); #endif if (ipt) { if (adj) sf_seek(file_inp, (off_t)(it)*sizeof(float)*sf_n(ai), SEEK_SET); sf_floatread(wi, sf_n(ai), file_inp); for (int i=0; i<sf_n(ai); i++) wi[i] *= dt* dt; } else { if (adj) sf_seek(file_inp, (off_t)(it)*sizeof(float)*nz0*nx0, SEEK_SET); sf_floatread(ucut[0], nz0*nx0, file_inp); for (int j=0; j<nx0; j++) for (int i=0; i<nz0; i++) ucut[j][i] *= dt * dt; } /* apply absorbing boundary condition: E \times u@n-1 */ damp2d_apply(u0, damp, nz, nx, nb); fft_stepforward(u0, u1, rwave, rwavem, cwave, cwavem, vp, vn, eta, vh, eps, lin_eta, kz, kx, forward_plan, inverse_plan, nz, nx, nzpad, nxpad, nkz, nkx, wt, adj); // sinc2d_inject1(u0, ws[it][s_idx], cssinc[s_idx]); if (ipt) sinc2d_inject(u0, wi, cisinc); else wfld2d_inject(u0, ucut, nz0, nx0, nb); /* apply absorbing boundary condition: E \times u@n+1 */ damp2d_apply(u0, damp, nz, nx, nb); /* loop over pointers */ ptrtmp = u0; u0 = u1; u1 = ptrtmp; if (opt) { if (adj) sf_seek(file_out, (off_t)(it)*sizeof(float)*sf_n(ao),SEEK_SET); sinc2d_extract(u0, wo, cosinc); sf_floatwrite(wo, sf_n(ao), file_out); } else { if (adj) sf_seek(file_out, (off_t)(it)*sizeof(float)*nz0*nx0,SEEK_SET); wwin2d(ucut, u0, nz0, nx0, nb); sf_floatwrite(ucut[0], nz0*nx0, file_out); } #ifdef _OPENMP double toc = omp_get_wtime(); if (verb) fprintf(stderr," clock = %lf;", toc-tic); #endif } /* END OF TIME LOOP */ return 0; }