コード例 #1
0
ファイル: gravity.c プロジェクト: jacksonmj/The-Powder-Toy
void grav_fft_init()
{
	int xblock2 = XRES/CELL*2;
	int yblock2 = YRES/CELL*2;
	int x, y, fft_tsize = (xblock2/2+1)*yblock2;
	float distance, scaleFactor;
	fftwf_plan plan_ptgravx, plan_ptgravy;
	if (grav_fft_status) return;

	//use fftw malloc function to ensure arrays are aligned, to get better performance
	th_ptgravx = (float*)fftwf_malloc(xblock2*yblock2*sizeof(float));
	th_ptgravy = (float*)fftwf_malloc(xblock2*yblock2*sizeof(float));
	th_ptgravxt = (fftwf_complex*)fftwf_malloc(fft_tsize*sizeof(fftwf_complex));
	th_ptgravyt = (fftwf_complex*)fftwf_malloc(fft_tsize*sizeof(fftwf_complex));
	th_gravmapbig = (float*)fftwf_malloc(xblock2*yblock2*sizeof(float));
	th_gravmapbigt = (fftwf_complex*)fftwf_malloc(fft_tsize*sizeof(fftwf_complex));
	th_gravxbig = (float*)fftwf_malloc(xblock2*yblock2*sizeof(float));
	th_gravybig = (float*)fftwf_malloc(xblock2*yblock2*sizeof(float));
	th_gravxbigt = (fftwf_complex*)fftwf_malloc(fft_tsize*sizeof(fftwf_complex));
	th_gravybigt = (fftwf_complex*)fftwf_malloc(fft_tsize*sizeof(fftwf_complex));

	//select best algorithm, could use FFTW_PATIENT or FFTW_EXHAUSTIVE but that increases the time taken to plan, and I don't see much increase in execution speed
	plan_ptgravx = fftwf_plan_dft_r2c_2d(yblock2, xblock2, th_ptgravx, th_ptgravxt, FFTW_MEASURE);
	plan_ptgravy = fftwf_plan_dft_r2c_2d(yblock2, xblock2, th_ptgravy, th_ptgravyt, FFTW_MEASURE);
	plan_gravmap = fftwf_plan_dft_r2c_2d(yblock2, xblock2, th_gravmapbig, th_gravmapbigt, FFTW_MEASURE);
	plan_gravx_inverse = fftwf_plan_dft_c2r_2d(yblock2, xblock2, th_gravxbigt, th_gravxbig, FFTW_MEASURE);
	plan_gravy_inverse = fftwf_plan_dft_c2r_2d(yblock2, xblock2, th_gravybigt, th_gravybig, FFTW_MEASURE);

	//(XRES/CELL)*(YRES/CELL)*4 is size of data array, scaling needed because FFTW calculates an unnormalized DFT
	scaleFactor = -M_GRAV/((XRES/CELL)*(YRES/CELL)*4);
	//calculate velocity map caused by a point mass
	for (y=0; y<yblock2; y++)
	{
		for (x=0; x<xblock2; x++)
		{
			if (x==XRES/CELL && y==YRES/CELL) continue;
			distance = sqrtf(pow(x-(XRES/CELL), 2) + pow(y-(YRES/CELL), 2));
			th_ptgravx[y*xblock2+x] = scaleFactor*(x-(XRES/CELL)) / pow(distance, 3);
			th_ptgravy[y*xblock2+x] = scaleFactor*(y-(YRES/CELL)) / pow(distance, 3);
		}
	}
	th_ptgravx[yblock2*xblock2/2+xblock2/2] = 0.0f;
	th_ptgravy[yblock2*xblock2/2+xblock2/2] = 0.0f;

	//transform point mass velocity maps
	fftwf_execute(plan_ptgravx);
	fftwf_execute(plan_ptgravy);
	fftwf_destroy_plan(plan_ptgravx);
	fftwf_destroy_plan(plan_ptgravy);
	fftwf_free(th_ptgravx);
	fftwf_free(th_ptgravy);

	//clear padded gravmap
	memset(th_gravmapbig,0,xblock2*yblock2*sizeof(float));

	grav_fft_status = 1;
}
コード例 #2
0
ファイル: hdrimage.cpp プロジェクト: michalkucis/OpenGPP
void HDRImage3c::rfftplanUpdate ()
{
	uint2 total = getTotalSize()-uint2(0,2);
	m_rfftplanR = fftwf_plan_dft_c2r_2d (total.y, total.x, 
			m_red, m_hdriRFFT->getRedBuffer(), FFTW_MEASURE);
	m_rfftplanG = fftwf_plan_dft_c2r_2d (total.y, total.x, 
			m_green, m_hdriRFFT->getGreenBuffer(), FFTW_MEASURE);
	m_rfftplanB = fftwf_plan_dft_c2r_2d (total.y, total.x, 
			m_blue, m_hdriRFFT->getBlueBuffer(), FFTW_MEASURE);
}
コード例 #3
0
ファイル: GravitySolver.cpp プロジェクト: ohahn/tetpm2d
gravity_solver::gravity_solver( unsigned n )
: n_( n )
{
    //fftwf_init_threads();
	//fftwf_plan_with_nthreads(omp_get_max_threads());
    
    data = new fftwf_real[ n_ * (n_+2) ];
    force = new fftwf_real[ n_ * (n_+2) ];
    cdata = reinterpret_cast<fftwf_complex*>(data);
    
    box_ = boxlength;
    box05_ = 0.5f * boxlength;
    
    plan  = fftwf_plan_dft_r2c_2d( n_, n_, data, cdata, FFTW_MEASURE ),
    iplan = fftwf_plan_dft_c2r_2d( n_, n_, cdata, data, FFTW_MEASURE );
    
    UnitLength_in_cm = 3.08568025e24f; //      ;  1.0 Mpc
    UnitMass_in_g    = 1.989e43f; //           ;  1.0e10 solar masses
    UnitVelocity_in_cm_per_s = 1e5f; //                ;  1 km/sec
    UnitTime_in_s = UnitLength_in_cm / UnitVelocity_in_cm_per_s;
    GRAVITY = 6.672e-8f;
    G = GRAVITY / pow(UnitLength_in_cm, 3) * UnitMass_in_g * pow(UnitTime_in_s, 2);
    Omega_m = 1.0; //0.276;
    Omega_L = 0.0; //0.724;
    
    aforce = 0.0;
    stepno=0;
}
コード例 #4
0
ファイル: fft2w.c プロジェクト: 1014511134/src
void ifft2(float *out      /* [n1*n2] */, 
	   sf_complex *inp /* [nk*n2] */)
/*< 2-D inverse FFT >*/
{
    int i1, i2;

#ifdef SF_HAS_FFTW
    if (NULL==icfg) {
      icfg = cmplx? 
	fftwf_plan_dft_2d(n2,n1,
			  (fftwf_complex *) dd, 
			  (fftwf_complex *) cc[0],
			  FFTW_BACKWARD, FFTW_MEASURE):
	fftwf_plan_dft_c2r_2d(n2,n1,
			      (fftwf_complex *) dd, ff[0],
			      FFTW_MEASURE);
      if (NULL == icfg) sf_error("FFTW failure.");
    }
#endif

#ifdef SF_HAS_FFTW
    for (i1=0; i1 < nk*n2; i1++)
      dd[i1] = inp[i1];

    fftwf_execute(icfg);
#else
    for (i1=0; i1 < nk; i1++) {
	kiss_fft_stride(icfg2,(kiss_fft_cpx *) (inp+i1),ctrace2,nk);
		
	for (i2=0; i2<n2; i2++) {
	    tmp[i2][i1] = ctrace2[i2];
	}
    }
    for (i2=0; i2 < n2; i2++) {
	if (cmplx) {
	    kiss_fft_stride(icfg1,tmp[i2],(kiss_fft_cpx *) cc[i2],1);
	} else {
	    kiss_fftri(icfg,tmp[i2],ff[i2]);
	}
    }
#endif
    
    /* FFT centering and normalization */
    for (i2=0; i2<n2; i2++) {
	for (i1=0; i1<n1; i1++) {
	    if (cmplx) {
		out[i2*n1+i1] = (((i2%2==0)==(i1%2==0))? wt:-wt) * crealf(cc[i2][i1]);
	    } else {
		out[i2*n1+i1] = (i2%2? -wt: wt)*ff[i2][i1];
	    }
	}
    }
}
コード例 #5
0
void fft_prepare(PluginData *pd)
{
	gint         w = pd->image_width, h = pd->image_height;
	gint         channel_count = pd->channel_count;
	int          x, y;
	float      **image;
	guchar      *img_pixels;
	float        norm;
	image = pd->image = (float**) malloc(sizeof(float*) * channel_count);
	pd->image_freq = (fftwf_complex**) malloc(sizeof(fftwf_complex*) * channel_count);
  img_pixels = pd->img_pixels = g_new (guchar, w * h * channel_count);
  //allocate an array for each channel
  for (int channel = 0; channel < channel_count; channel ++){
	  image[channel] = (float*) fftwf_malloc(sizeof(float) * w * h);
		pd->image_freq[channel] = (fftwf_complex*) fftwf_malloc(sizeof(fftwf_complex) * (w/2+1) * h);
	}
	// printf("Image data occupies %lu MB.\n", (sizeof(float) * w * h * channel_count) >> 20);
	// printf("Frequency data occupies %lu MB.\n", (sizeof(fftwf_complex) * (w/2+1) * h * channel_count) >> 20);
	
	// forward plan
	fftwf_plan plan = fftwf_plan_dft_r2c_2d(pd->image_height, pd->image_width, *image, *pd->image_freq, FFTW_ESTIMATE);
	// inverse plan (to be reused)
	pd->plan = fftwf_plan_dft_c2r_2d(pd->image_height, pd->image_width, *pd->image_freq, *image, FFTW_ESTIMATE);

	// set image region to reading mode
	gimp_pixel_rgn_init (&pd->region, pd->drawable, 0, 0, w, h, FALSE, FALSE);
	gimp_pixel_rgn_get_rect(&pd->region, img_pixels, 0, 0, w, h);
	
	// execute forward FFT once
	int pw = w/2+1; // physical width
	float diagonal = sqrt(h*h + w*w)/2.0;
	norm = 1.0/(w*h);
	for(int channel=0; channel<channel_count; channel++)
	{
		// convert one color channel to float[]
		for(int i=0; i < w*h; i ++)
		{
			 image[channel][i] =  (float) img_pixels[(i)*channel_count + channel] * norm;
		}
		// transform the channel
		fftwf_execute_dft_r2c(plan, image[channel], pd->image_freq[channel]);
		for(int i=0; i < w*h; i ++)
		{
			 image[channel][i] =  (float) img_pixels[(i)*channel_count + channel] * norm;
		}
		// copy the channel again, for preview
		for(int i=0; i < w*h; i ++)
		{
			 image[channel][i] =  (float) img_pixels[(i)*channel_count + channel];
		}
	}
	fftwf_destroy_plan(plan);
}
コード例 #6
0
ファイル: fft2.c プロジェクト: krushev36/src
void ifft2_allocate(sf_complex *inp /* [nk*n2] */)
/*< allocate inverse transform >*/
{
#ifdef SF_HAS_FFTW
    icfg = cmplx?
           fftwf_plan_dft_2d(n2,n1,
                             (fftwf_complex *) inp,
                             (fftwf_complex *) cc[0],
                             FFTW_BACKWARD, FFTW_MEASURE):
           fftwf_plan_dft_c2r_2d(n2,n1,
                                 (fftwf_complex *) inp, ff[0],
                                 FFTW_MEASURE);
    if (NULL == icfg) sf_error("FFTW failure.");
#endif
}
コード例 #7
0
ファイル: fft.c プロジェクト: Starlink/sextractor
/****** fft_conv ************************************************************
PROTO	void fft_conv(float *data1, float *fdata2, int *size)
PURPOSE	Optimized 2-dimensional FFT convolution using the FFTW library.
INPUT	ptr to the first image,
	ptr to the Fourier transform of the second image,
	image size vector.
OUTPUT	-.
NOTES	For data1 and fdata2, memory must be allocated for
	size[0]* ... * 2*(size[naxis-1]/2+1) floats (padding required).
AUTHOR	E. Bertin (IAP)
VERSION	29/03/2013
 ***/
void    fft_conv(float *data1, float *fdata2, int *size)
  {
   float		*fdata1p,*fdata2p,
			real,imag, fac;
   int			i, npix,npix2;

/* Convert axis indexing to that of FFTW */
  npix = size[0]*size[1];
  npix2 = ((size[0]/2) + 1) * size[1];

/* Forward FFT "in place" for data1 */
  if (!fplan)
    {
    QFFTWF_MALLOC(fdata1, fftwf_complex, npix2);
    fplan = fftwf_plan_dft_r2c_2d(size[1], size[0], data1,
        (fftwf_complex *)fdata1, FFTW_ESTIMATE);
    }

  fftwf_execute_dft_r2c(fplan, data1, fdata1);

/* Actual convolution (Fourier product) */
  fac = 1.0/npix;  
  fdata1p = (float *)fdata1;
  fdata2p = fdata2;
#pragma ivdep
  for (i=npix2; i--;)
    {
    real = *fdata1p **fdata2p - *(fdata1p+1)**(fdata2p+1);
    imag = *(fdata1p+1)**fdata2p + *fdata1p**(fdata2p+1);
    *(fdata1p) = fac*real;
    *(fdata1p+1) = fac*imag;
    fdata1p+=2;
    fdata2p+=2;
    }

/* Reverse FFT */
  if (!bplan)
    bplan = fftwf_plan_dft_c2r_2d(size[1], size[0], (fftwf_complex *)fdata1, 
        data1, FFTW_ESTIMATE);
  fftwf_execute_dft_c2r(bplan, fdata1, data1);

//  fftwf_execute(plan);


  return;
  }
コード例 #8
0
ファイル: glfftwater.cpp プロジェクト: jcayzac/Rocky
GLFFTWater::GLFFTWater(GLFFTWaterParams &params) {
#ifdef _WIN32
    m_h = (float *)__mingw_aligned_malloc((sizeof(float)*(params.N+2)*(params.N)), 4);
    m_dx = (float *)__mingw_aligned_malloc((sizeof(float)*(params.N+2)*(params.N)), 4);
    m_dz = (float *)__mingw_aligned_malloc((sizeof(float)*(params.N+2)*(params.N)), 4);
    m_w = (float *)__mingw_aligned_malloc((sizeof(float)*(params.N)*(params.N)), 4);
#else
    posix_memalign((void **)&m_h,4,sizeof(float)*(params.N+2)*(params.N));
    posix_memalign((void **)&m_dx,4,sizeof(float)*(params.N+2)*(params.N));
    posix_memalign((void **)&m_dz,4,sizeof(float)*(params.N+2)*(params.N));
    posix_memalign((void **)&m_w,4,sizeof(float)*(params.N)*(params.N));
#endif

    m_htilde0 = (fftwf_complex *)fftwf_malloc(sizeof(fftwf_complex)*(params.N)*(params.N));
    m_heightmap = new float3[(params.N)*(params.N)];
    m_params = params;

    std::tr1::mt19937 prng(1337);
    std::tr1::normal_distribution<float> normal;
    std::tr1::uniform_real<float> uniform;
    std::tr1::variate_generator<std::tr1::mt19937, std::tr1::normal_distribution<float> > randn(prng,normal);
    std::tr1::variate_generator<std::tr1::mt19937, std::tr1::uniform_real<float> > randu(prng,uniform);
    for(int i=0, k=0; i<params.N; i++) {
	    float k_x = (-(params.N-1)*0.5f+i)*(2.f*3.141592654f / params.L);
	    for(int j=0; j<params.N; j++, k++) {
		    float k_y = (-(params.N-1)*0.5f+j)*(2.f*3.141592654f / params.L);
		    float A = randn();
		    float theta = randu()*2.f*3.141592654f;
		    float P = (k_x==0.f && k_y==0.0f) ? 0.f : sqrtf(phillips(k_x,k_y,m_w[k]));
		    m_htilde0[k][0] = m_htilde0[k][1] = P*A*sinf(theta);
	    }
    }


    m_kz = new float[params.N*(params.N / 2 + 1)];
    m_kx = new float[params.N*(params.N / 2 + 1)];

    const int hN = m_params.N / 2;
    for(int y=0; y<m_params.N; y++) {
	float kz = (float) (y - hN);
	for(int x=0; x<=hN; x++) {
		float kx = (float) (x - hN);
		float k = 1.f/sqrtf(kx*kx+kz*kz);
		m_kz[y*(hN+1)+x] = kz*k;
		m_kx[y*(hN+1)+x] = kx*k;
	}
    }

    if(!fftwf_init_threads()) {
	cerr << "Error initializing multithreaded fft."  << endl;
    } else {
	fftwf_plan_with_nthreads(2);
    }
  
    m_fftplan = fftwf_plan_dft_c2r_2d(m_params.N, m_params.N, (fftwf_complex *)m_h, m_h, 
				      FFTW_ESTIMATE);

    glGenTextures(1, &m_texId);
    glBindTexture(GL_TEXTURE_2D, m_texId);
    glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
    glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
    glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB16F, params.N, params.N, 0, GL_RGB, GL_FLOAT, 0);
    glBindTexture(GL_TEXTURE_2D, 0);
}
コード例 #9
0
ファイル: TransferFunction.cpp プロジェクト: ohahn/tetpm2d
void cosmo_init_particles( unsigned seed )
{
    fftwf_real *data = new fftwf_real[ nres * (nres+2) ];
    fftwf_complex *cdata = reinterpret_cast<fftwf_complex*>(data);
    
    fftwf_real *data2 = new fftwf_real[ nres * (nres+2) ];
    fftwf_complex *cdata2 = reinterpret_cast<fftwf_complex*>(data2);
    
    gsl_rng	*RNG = gsl_rng_alloc( gsl_rng_mt19937 );
	gsl_rng_set( RNG, seed );
	
    
    
    fftwf_plan plan, iplan, plan2, iplan2;
    plan  = fftwf_plan_dft_r2c_2d( nres, nres, data, cdata, FFTW_MEASURE ),
    iplan = fftwf_plan_dft_c2r_2d( nres, nres, cdata, data, FFTW_MEASURE );
    
    plan2  = fftwf_plan_dft_r2c_2d( nres, nres, data2, cdata2, FFTW_MEASURE ),
    iplan2 = fftwf_plan_dft_c2r_2d( nres, nres, cdata2, data2, FFTW_MEASURE );
    
    /////////////////////////////////
    
    int nresp = nres/2+1;
    float kfac = 2.0*M_PI/boxlength;
    float gaussran1, gaussran2;
    
    float fftnorm = 1.0f / (float)nres * (2.0f*M_PI/boxlength);
    
    for( int i=0; i<nres; ++i )
        for( int j=0; j<nres; ++j )
        {
            int idx = i*(nres+2)+j;
            data[idx] = gsl_ran_ugaussian_ratio_method( RNG ) / nres;
        }
    fftwf_execute( plan );
    
    /////////////////////////////////
    
    
    for( int i=0; i<nres; ++i )
        for( int j=0; j<nresp; ++j )
        {
            float kx = i>=nresp? (float)(i-nres)*kfac : (float)i*kfac;
            float ky = (float)j*kfac;
            
            float kk = sqrtf(kx*kx+ky*ky);
            
            int idx = i*nresp+j;
            
            float ampk = cosmo_get_amp_k( kk ); //*sqrtf(kk);
            
            if( kk >= nresp*kfac )
                ampk = 0.0;
            
            cdata[idx][0] *= ampk * fftnorm;
            cdata[idx][1] *= ampk * fftnorm;
            
            
        }
      
    
    // insert code to make random numbers independent of resolution (have rectangle outliens)
    
    
    float dx = boxlength / nres;
    float vfact = ComputeVFact( 1.0f/(1.0f+g_zstart));
    
    /////////////////////////////////
    // generate x-component
    for( int i=0; i<nres; ++i )
        for( int j=0; j<nresp; ++j )
        {
            float kx = i>=nresp? (float)(i-nres)*kfac : (float)i*kfac;
            float ky = (float)j*kfac;
            
            float kk = sqrtf(kx*kx+ky*ky);
            
            int idx = i*nresp+j; // (a+ib) * ik = iak -bk
            
            cdata2[idx][0] = kx/kk/kk * cdata[idx][1];
            cdata2[idx][1] = -kx/kk/kk * cdata[idx][0];
        }
    
    cdata2[0][0] = 0.0f;
    cdata2[0][1] = 0.0f;
    
    fftwf_execute( iplan2 );
    
    for( int i=0; i<nres; ++i )
        for( int j=0; j<nres; ++j )
        {
            int idx = i*(nres+2)+j;
            int ii = i*nres+j;
            P[ii].x = (float)i*dx + data2[idx];
            P[ii].vx = data2[idx] * vfact;
            P[ii].id = ii;
            P[ii].acc[0] = 0.0f;
            
        }
    
    /////////////////////////////////
    // generate y-component
    for( int i=0; i<nres; ++i )
        for( int j=0; j<nresp; ++j )
        {
            float kx = i>=nresp? (float)(i-nres)*kfac : (float)i*kfac;
            float ky = (float)j*kfac;
            
            float kk = sqrtf(kx*kx+ky*ky);
            
            int idx = i*nresp+j;
            
            cdata2[idx][0] = ky/kk/kk * cdata[idx][1];
            cdata2[idx][1] = -ky/kk/kk * cdata[idx][0];
        }
    
    cdata2[0][0] = 0.0f;
    cdata2[0][1] = 0.0f;
    
    fftwf_execute( iplan2 );
    
    for( int i=0; i<nres; ++i )
        for( int j=0; j<nres; ++j )
        {
            int idx = i*(nres+2)+j;
            int ii = i*nres+j;
            P[ii].y = (float)j*dx + data2[idx];
            P[ii].vy = data2[idx] * vfact;
            P[ii].acc[1] = 0.0f;
        }
    
    /////////////////////////////////
    
    delete[] data;
    delete[] data2;
    fftwf_destroy_plan(plan);
    fftwf_destroy_plan(iplan);
    fftwf_destroy_plan(plan2);
    fftwf_destroy_plan(iplan2);
    gsl_rng_free( RNG );
}
コード例 #10
0
ファイル: func.cpp プロジェクト: M4I-nanoscopy/motioncorr
void SetFastFFT(float *buf, DIM nsam)
{
	plan_fft_fast=fftwf_plan_dft_r2c_2d(nsam.y,nsam.x,buf,reinterpret_cast<fftwf_complex *>(buf),FFTW_ESTIMATE); 
	plan_ifft_fast=fftwf_plan_dft_c2r_2d(nsam.y,nsam.x,reinterpret_cast<fftwf_complex *>(buf),buf,FFTW_ESTIMATE); 
}
コード例 #11
0
ファイル: func.cpp プロジェクト: M4I-nanoscopy/motioncorr
void ifft2d(float* buf, DIM nsam)
{
	fftwf_plan plan_fft=fftwf_plan_dft_c2r_2d(nsam.y,nsam.x,reinterpret_cast<fftwf_complex *>(buf),buf,FFTW_ESTIMATE); 
	fftwf_execute(plan_fft);	
	fftwf_destroy_plan(plan_fft);
}
コード例 #12
0
ファイル: wallframe.cpp プロジェクト: nkumar212/ECE477
int ComWallFrame::action(IDS* main)
{
	int x,y,xo,yo, Y;
	Kinect::depth_buffer* dframe = main->getDepth();
	Kinect* kinect = main->getKinect();
	Minotaur* minotaur = main->getMinotaur();
	Minotaur::MinotaurState minostate = minotaur->getState();

	Point p3d[8][8];
	Point avg3d;
	Point avgbar_flat;
	int valid;
	float zvariance, xvariance, yvariance, xSS, ySS, xybar, xzbar, yzbar;
	float slopeyx, slopezx, slopezy;
	float yint, zxint, zyint;
	float resid_yx, resid_zx, resid_zy;
	uint8_t r,g,b;
	uint16_t d, d0, d1;
	float fd;
	float floor_height = 0;
	int floor_count = 0;
	float rx, ry, rz;
	float sin_ori = sin(minostate.orient);
	float cos_ori = cos(minostate.orient);
	float origin_dist;
	float avg_dist;
	float orient_yx;

	uint32_t count, max_count = 0;

	Wall avg_walls[480/8/WALL_AVG_SIZE][640/8/WALL_AVG_SIZE][WALL_AVG_SIZE][WALL_AVG_SIZE];
	bool valid_walls[480/8/WALL_AVG_SIZE][640/8/WALL_AVG_SIZE][WALL_AVG_SIZE][WALL_AVG_SIZE];

	int nslope = 480;
	int nodist = 256;
	int nodist_half = nodist / 2 + 1;
	float fft_data[nslope][nodist];
	fftwf_complex* fft_out = (fftwf_complex*) fftwf_malloc(sizeof(fftwf_complex)*nslope*nodist_half);

	float avg_slope, avg_yint;

	int fail_yx_res = 0;
	int fail_zx_res = 0;
	int fail_zy_res = 0;

	int fail_floor_check1 = 0;
	int fail_floor_check2 = 0;
	int fail_floor_check3 = 0;
	int fail_floor_check4 = 0;

	bool wall_check1, wall_check2, wall_check3;
	bool floor_check1, floor_check2, floor_check3, floor_check4;

	if(main->getDepthCount() <= 0)
	{
		std::cerr << "MapFrame awaiting depth data" << std::endl;
		return 1;
	}

	for(y = 0; y < 480/8/WALL_AVG_SIZE; y++)
		for(x = 0; x < 640/8/WALL_AVG_SIZE; x++)
			for(yo = 0; yo < WALL_AVG_SIZE; yo++)
				for(xo = 0; xo < WALL_AVG_SIZE; xo++)
					valid_walls[y][x][yo][xo] = false;

	for(x = 0; x < nodist; x++)
		for(y = 0; y < nslope; y++)
			fft_data[y][x] = fft_data[y][x] = 0;

	for(y = 0; y < 480/8; y++)
	{
		for(x = 0; x < 640/8; x++)
		{
			avg3d = {0,0,0};
			avg_dist = 0;
			valid = 0;
			xybar = 0;

			for(yo = 0; yo < 8; yo++)
			{
				for(xo = 0; xo < 8; xo++)
				{
					d0 = (*dframe)[y*8+yo][x*8+xo][0];
					d1 = (*dframe)[y*8+yo][x*8+xo][1];
					d = d1;
					d = d << 8 | d0;

					if(d != 0x07FF && d <= KINECT_CALIB_DOFF)
					{
						fd = decode_kinect_dist[d];

						avg_dist += fd;
						rx = kinect->x3d(x,y,xo,yo,fd);
						ry = kinect->y3d(x,y,xo,yo,fd);
						rz = kinect->z3d(x,y,xo,yo,fd);

						p3d[yo][xo].x = rx * cos_ori - ry * sin_ori + minostate.x;
						p3d[yo][xo].y = rx * sin_ori + ry * cos_ori + minostate.y;
						p3d[yo][xo].z = rz;

						avg3d.x += p3d[yo][xo].x;
						avg3d.y += p3d[yo][xo].y;
						avg3d.z += p3d[yo][xo].z;

						valid_points[yo][xo] = true;
						++valid;
					}else
						valid_points[yo][xo] = false;
						//p3d[yo][xo].valid = false;
				}
			}

			avg3d.x /= valid;
			avg3d.y /= valid;
			avg3d.z /= valid;

			if(valid <= (8*8)*3/4)
			{
				//Not enough data to represent the points
				r = 0x00;
				g = 0x00;
				b = 0x00;
			}else{

				//Calculate statistics for slope calculation
				zvariance = 0;
				xvariance = 0;
				yvariance = 0;
				xSS = 0, ySS = 0;
				xybar = 0, xzbar = 0, yzbar = 0;
				for(yo = 0; yo < 8; yo++)
					for(xo = 0; xo < 8; xo++)
					{
						if(valid_points[yo][xo])
						{
							xvariance += quick_square(p3d[yo][xo].x-avg3d.x);
							yvariance += quick_square(p3d[yo][xo].y-avg3d.y);
							zvariance += quick_square(p3d[yo][xo].z-avg3d.z);

							xSS += quick_square(p3d[yo][xo].x);
							ySS += quick_square(p3d[yo][xo].y);
							xybar += p3d[yo][xo].x * p3d[yo][xo].y;
							xzbar += p3d[yo][xo].x * p3d[yo][xo].z;
							yzbar += p3d[yo][xo].y * p3d[yo][xo].z;
						}
					}

				xybar /= valid;
				xzbar /= valid;
				yzbar /= valid;
				xSS /= valid;
				ySS /= valid;

				slopeyx = (xybar - avg3d.x * avg3d.y) / (xSS - quick_square(avg3d.x));
				slopezx = (xzbar - avg3d.x * avg3d.z) / (xSS - quick_square(avg3d.x));
				slopezy = (yzbar - avg3d.y * avg3d.z) / (ySS - quick_square(avg3d.y));
				yint = avg3d.y - slopeyx * avg3d.x;
				zxint = avg3d.z - slopezx * avg3d.x;
				zyint = avg3d.z - slopezy * avg3d.y;
					
				resid_yx = 0;
				resid_zx = 0;
				resid_zy = 0;
				for(yo = 0; yo < 8; yo++)
					for(xo = 0; xo < 8; xo++)
						if(valid_points[yo][xo])
						{
							resid_yx += quick_square((p3d[yo][xo].y - slopeyx * p3d[yo][xo].x - yint));
							resid_zx += quick_square((p3d[yo][xo].z - slopezx * p3d[yo][xo].x - zxint));
							resid_zy += quick_square((p3d[yo][xo].z - slopezy * p3d[yo][xo].y - zyint));
						}

				floor_check1 = fabs(atan(slopezx)) < 0.262;
				floor_check2 = fabs(atan(slopezy)) < 0.262;
				floor_check3 = resid_zx * 50000 < valid * quick_square(avg_dist/100);
				floor_check4 = resid_zy * 50000 < valid * quick_square(avg_dist/100);

				if(floor_check1 && floor_check2 && floor_check3 && floor_check4)
				{
					//Floor or ceiling at a constant height from Kinect
					if(avg3d.z < -800 && avg3d.z > -1600)
					{
						r = 0xFF;
						g = 0xFF;
						b = 0xFF;
						floor_height += avg3d.z;
						floor_count++;
					}else{
						r = 0xFF;
						g = 0x00;
						b = 128 + avg3d.z / 12 / 100 * 256;
					}
				}else{
					//Wall or non-plane
					//r = std::min<int>(std::max<int>(resid_yx*20,0),255);

					wall_check1 = resid_yx * 1000 < valid * quick_square(avg_dist/100);

					if(wall_check1 && !floor_check3 && !floor_check4)
					{
						//Using minimum distance to robot point location for hashing, less likely to be out of range.
						origin_dist = (slopeyx * minostate.x - minostate.y + yint) / sqrt(quick_square(slopeyx)+1);
						orient_yx = fmod((atan(slopeyx) + PI / 2),PI);

						fft_data[(int)(orient_yx / PI * nslope / 2)][(int)(origin_dist/100) + nodist/4]++;
						fft_data[(int)(orient_yx / PI * nslope / 2 + nslope / 2)][(int)(origin_dist/100) + nodist/4]++;

						avg_walls[y/WALL_AVG_SIZE][x/WALL_AVG_SIZE][y % WALL_AVG_SIZE][x % WALL_AVG_SIZE] = Wall(slopeyx, yint);
						valid_walls[y/WALL_AVG_SIZE][x/WALL_AVG_SIZE][y % WALL_AVG_SIZE][x % WALL_AVG_SIZE] = true;
						r = 0;
						g = 255-std::min<int>(std::max<int>(orient_yx / PI * 256,0),255);//std::min<int>(std::max<int>(yint*20+128,0),255);
						b = std::min<int>(std::max<int>(orient_yx / PI * 256,0),255);

					}else{
						r = g = b = 0x80;
						if(!wall_check1)
							fail_yx_res++;
						
						if(!floor_check1) fail_floor_check1++;
						if(!floor_check2) fail_floor_check2++;
						if(!floor_check3) fail_floor_check3++;
						if(!floor_check4) fail_floor_check4++;
					}
				}
			}

		/*	for(yo = 0; yo < 8; yo++)
				for(xo = 0; xo < 8; xo++)
				{
					frame[y*8+yo][x*8+xo][0] = r;
					frame[y*8+yo][x*8+xo][1] = g;
					frame[y*8+yo][x*8+xo][2] = b;
				}*/
		}
	}

	//std::cerr << fail_yx_res << " " << fail_floor_check1 << " " << fail_floor_check2 << " " << fail_floor_check3 << " " << fail_floor_check4 << std::endl;

	fftwf_plan fft = fftwf_plan_dft_r2c_2d(nslope, nodist, &(fft_data[0][0]), fft_out, FFTW_ESTIMATE);
	fftwf_execute(fft);
	fftwf_destroy_plan(fft);

	float mag;
	float stddev_x, stddev_y;
	float var_x, var_y;
	float mean_x, mean_y;
	float filter_x, filter_y;
	float coeff_x, coeff_y;

	stddev_x = 2;
	stddev_y = 2;
	mean_x = 0;
	mean_y = nslope / 2;
	var_x = quick_square(x);
	var_y = quick_square(y);
	coeff_x = 1 / (stddev_x * sqrt(2*PI)) / 0.4;
	coeff_y = 1 / (stddev_y * sqrt(2*PI)) / 0.4;


	for(y = 0; y < nslope; y++)
	{
		Y = (nslope / 2 + y) % nslope;
//		filter_y = coeff_y * exp(-1 * quick_square(mean_y - y) / (2*var_y)); 
		for(x = 0; x < nodist_half; x++)
		{
			/*filter_x = fabs(coeff_x * exp(-1 * quick_square(mean_x - x) / (2*var_x))); 
			fft_out[Y*nodist_half+x][0] *= filter_x * filter_y;
			fft_out[Y*nodist_half+x][1] *= filter_x * filter_y;
			continue;*/

			if(abs(y - nslope / 2) >= 8 || x >= 8)
			{
				fft_out[Y*nodist_half+x][0] = 0;
				fft_out[Y*nodist_half+x][1] = 0;
			}else{
				mag = sqrt(quick_square(fft_out[Y*nodist_half+x][0]) + quick_square(fft_out[Y*nodist_half+x][1]));

	/*			frame[y][x][0] = mag / fft_out[0][0]*256;
				frame[y][x][1] = mag / fft_out[0][0]*256;
				frame[y][x][2] = mag / fft_out[0][0]*256;*/
			}
		}
	}

	fft = fftwf_plan_dft_c2r_2d(nslope, nodist, fft_out, &(fft_data[0][0]), FFTW_ESTIMATE);
	fftwf_execute(fft);
	fftwf_destroy_plan(fft);

	float max_mag = 0, maxgrad;
	int maxgradid;

	std::set< Wall > walls;
	std::set< Wall >::iterator it_walls;

	for(y = 0; y < nslope; y++)
	{
		for(x = 0; x < nodist; x++)
		{
			mag = fft_data[y][x];
			if(mag > max_mag)
				max_mag = mag;
		}	
	}

	for(y = 0; y < nslope; y++)
	{
		for(x = 0; x < nodist; x++)
		{
			maxgrad = 0;
			maxgradid = 0;
			for(yo = -1; yo <= 1; yo++)
				for(xo = -1; xo <= 1; xo++)
					if(fft_data[y + yo][x + xo] > maxgrad)
					{
						maxgrad = fft_data[y + yo][x + xo];
						maxgradid = yo * 3 + xo;
					}

			mag = std::max<float>(fft_data[y][x],0);

			if(maxgradid != 0)
			{
				frame[y][x][0] = mag / max_mag * 255;
				frame[y][x][1] = mag / max_mag * 255;
				frame[y][x][2] = mag / max_mag * 255;
			}else if(abs(y - nslope/2) <= nslope/4){
				frame[y][x][0] = mag / max_mag * 255;
				frame[y][x][1] = 0;
				frame[y][x][2] = 0;
				
				if(mag > 125893) //10 ** 5.1
					walls.insert(Wall(fmod((float)y / nslope * 2 * PI,PI) - PI / 2,(float)x - nodist / 4.0));
			}
		}
	}

	for(it_walls = walls.begin(); it_walls != walls.end(); it_walls++)
	{
		std::cerr << " " << it_walls->orient / PI;
		std::cerr << " " << it_walls->yint;
		std::cerr << " " << log10(max_mag);
		std::cerr << std::endl;
	}

	std::cerr << std::endl;


	fftwf_free(fft_out);

/*	float prev_count = fft_data[255] > 3000 ? fft_data[255] : -1;
	float prev_count_2 = fft_data[254] > 3000 ? fft_data[254] : -1;

	for(y = 0; y < 256; y++)
	{
		if(fft_data[y] > 3000)
		{
			if(prev_count != -1 && prev_count > fft_data[y] && prev_count_2 < prev_count && prev_count_2 != -1)
				std::cerr << (y-128)*(1/81.487330864) << "\t" << fft_data[y] << std::endl;
			prev_count_2 = prev_count;
			prev_count = fft_data[y];
		}else{
			prev_count = -1;
		}
	}

	std::cerr << std::endl;*/

	return 0;
}
コード例 #13
0
// store translations into transMap
void storeTrans(ImgFetcher &fetcher, const Point2f &absHint, PairToTransData &transMap, const MaxDists &dists) {
	vector<GridPtOff> imOffs;
	if (fetcher.row_major) {
		imOffs.push_back(makeOff(-1, 0));
		imOffs.push_back(makeOff(-1, -1));
		imOffs.push_back(makeOff(0, -1));
		imOffs.push_back(makeOff(1, -1));
	} else {
		imOffs.push_back(makeOff(0, -1));
		imOffs.push_back(makeOff(-1, -1));
		imOffs.push_back(makeOff(-1, 0));
		imOffs.push_back(makeOff(-1, 1));
	}

	map<PtPair, shared_future<TransData>> pairToTransFut;
	map<GridPt, shared_future<FFTHolder>> ptToFFTFut;

	unsigned loaded = 0;
	GridPt fixPt = {{0, 0}};
	GridPt waitPt = {{0, 0}};
	Mat cur;

	fetcher.getMat(fixPt, cur);
	Size imSz = cur.size();
	unsigned fftLen = getFFTLen(imSz);

	map<GridPtOff, Mat> hintToMask;
	storeHintToMask(hintToMask, imSz, absHint, dists);

	float *tmp = (float *)fftwf_malloc_thr(sizeof(float) * fftLen);
	fftwf_plan r2cPlan = fftwf_plan_dft_r2c_2d(imSz.height, imSz.width, tmp, (fftwf_complex *)tmp, FFTW_MEASURE);
	fftwf_plan c2rPlan = fftwf_plan_dft_c2r_2d(imSz.height, imSz.width, (fftwf_complex *)tmp, tmp, FFTW_MEASURE);
	fftwf_free_thr(tmp);

	bool readDone = false;
	while (true) {
		//a dirty kind of event loop
		if (loaded > fetcher.cap || readDone) {
			//			printf("start free waitPt %d %d\n", waitPt[0], waitPt[1]);
			// free oldest image, at waitPt
			for (auto &off: imOffs) {
				// *subtract* offset to avoid duplicating pairs
				GridPt nbrPt = {{waitPt[0] - off[0], waitPt[1] - off[1]}};
				if (ptInGrid(nbrPt, fetcher)) {
					PtPair pair = {{waitPt, nbrPt}};
					shared_future<TransData> transFut;
					if (!lookupPair(pairToTransFut, pair, transFut)) {
						printf("err: future of pair %d %d to %d %d not found\n", pair[0][0], pair[0][1], pair[1][0], pair[1][1]);
						exit(1);
					}
					transMap.emplace(pair, transFut.get());
					pairToTransFut.erase(pair);
				}
			}
			fftwf_free_thr(ptToFFTFut[waitPt].get().fft);
			ptToFFTFut.erase(waitPt);

			if (!nextCoor(waitPt, fetcher)) {
				break;
			}
			loaded--;
		}

		if (!readDone) {
			//printf("emplace fft at %d %d\n", fixPt[0], fixPt[1]);
			fetcher.getMat(fixPt, cur);

			// fft only supports 32-bit float with even width, for now
			assert(cur.type() == CV_32FC1 && (int)cur.step[0] == cur.size().width * 4 && cur.step[1] == 4 && cur.size().width % 2 == 0);
			assert(cur.isContinuous());

			ptToFFTFut.emplace(fixPt, async(launch::async,
				[&r2cPlan, &absHint](Mat im) {
					return FFTHolder(im, absHint, r2cPlan);
			},
				cur
				));

			for (auto &off: imOffs) {
				GridPt nbrPt = {{fixPt[0] + off[0], fixPt[1] + off[1]}};
				if (ptInGrid(nbrPt, fetcher)) {
					PtPair pair = {{fixPt, nbrPt}};
					//					printf("emplace pair transfut %d %d, %d %d\n", pair[0][0], pair[0][1], pair[1][0], pair[1][1]);

					// needed since VS2012 async() can't take functions with too many arguments :(
					shared_future<FFTHolder> &a = ptToFFTFut[fixPt];
					shared_future<FFTHolder> &b = ptToFFTFut[nbrPt];
					pairToTransFut.emplace(pair, async(launch::async, [=] {
						return phaseCorrThr(a, b, c2rPlan, pair, absHint, hintToMask, imSz);
					}));
				}
			}

			loaded++;
			if (!nextCoor(fixPt, fetcher)) {
				readDone = true;
			}
		}
	}

	fftwf_destroy_plan(r2cPlan);
	fftwf_destroy_plan(c2rPlan);
}
コード例 #14
0
void MultiAdaptationCSF::process( BidomainArray2D *in, BidomainArray2D *out,
  BidomainArray2D *adaptationMap )
{
  const int cols = in->getCols(), rows = in->getRows();

  assert( cols == adaptationMap->getCols() );
  assert( rows == adaptationMap->getRows() );
  
  const FFTWComplexArray *freqOriginal = in->getFrequency(); 
  FFTWComplexArray freqFiltered( cols, rows );
  FFTWArray2D spatialTemp( cols, rows );
  
  fftwf_plan inverseFFT = fftwf_plan_dft_c2r_2d( rows, cols,
    freqFiltered.getData(), spatialTemp.getData(), FFTW_ESTIMATE ); // MEASURE would damage the data

  //NOT compatible with new Cygwin version of gcc.
  //pfs::Array2DImpl **filteredImage = new (pfs::Array2DImpl*)[adaptationLevelsCount]; 

  // Results of filtering in spatial domain are stored there
  pfs::Array2DImpl **filteredImage = new pfs::Array2DImpl*[adaptationLevelsCount]; 
  
  
  for( int i = 0; i < adaptationLevelsCount; i++ ) { // For each adaptation level
    
    filterFFTW( freqOriginal->getData(), freqFiltered.getData(), cols, rows, filters[i] );
      
//    dumpPFS( "fft_image.pfs", freqFiltered, cols/2+1, rows, "Y" );

    fftwf_execute(inverseFFT);

    // Copy to filteredImage and normalize
    filteredImage[i] = new pfs::Array2DImpl( cols, rows );
    for( int pix = 0; pix < cols*rows; pix++ )
      (*filteredImage[i])(pix) = spatialTemp(pix)/(cols*rows);

//     // Some debug info
//     char buf[100];
//     sprintf( buf, "csf_filtered_%g.pfs", adaptationLevels[i] );
//     dumpPFS( buf, filteredImage[i], "Y" );

    std::cerr << ".";
    
  }

  std::cerr << "\n";

  const pfs::Array2D *adaptationMapArray = adaptationMap->getSpatial();
  
  pfs::Array2D *outA = out->setSpatial(); // output array
  // Linear intepolation between adaptation levels
  {
    int ind = 0;
    for( int ind = 0; ind < rows*cols; ind++ ) {
        float adapt = (*adaptationMapArray)( ind );
            
        if( adapt < adaptationLevels[0] )
          (*outA)(ind) = (*filteredImage[0])(ind);
        else if( adapt > adaptationLevels[adaptationLevelsCount-1] )
          (*outA)(ind) = (*filteredImage[adaptationLevelsCount-1])(ind);
        else {            // interpolate
          int l;
          for( l = 1; l < adaptationLevelsCount; l++ )
            if(adapt <= adaptationLevels[l]) break;
          assert( l > 0 && l < adaptationLevelsCount );
              
          (*outA)(ind) = (*filteredImage[l-1])(ind) +
            ((*filteredImage[l])(ind)-(*filteredImage[l-1])(ind))*
            (adapt-adaptationLevels[l-1])/(adaptationLevels[l]-adaptationLevels[l-1]);
        }
      }          
  }
//   dumpPFS( "after_csf.pfs", in, "Y" );  
  
  // Clean up
  for( int i = 0; i < adaptationLevelsCount; i++ )
    delete filteredImage[i];      
  delete[] filteredImage;

  fftwf_destroy_plan(inverseFFT);
}
コード例 #15
0
static gboolean
focusblur_fft_buffer_update_work (FblurFftBuffer *fft,
                                  gint            radius)
{
  gint row, col;

  row = fft->source.width  + 2 * radius;
  col = fft->source.height + 2 * radius;

  if (fft->work.buffers)
    {
      g_warning ("buffer hadn't been cleared.");
      focusblur_fft_work_free_buffers (fft);
    }

  if (fft->work.image &&
      row == fft->work.row &&
      col == fft->work.col)
    {
      if (radius != fft->work.space)
        {
          fft->work.space = radius;
          fft->work.origin = (fft->work.col_padded + 1) * radius;
          fft->work.level = 0;
        }
      return TRUE;
    }

  focusblur_fft_buffer_clear_work (fft);

  fft->work.row = row;
  fft->work.col = col;
  fft->work.col_padded = (col + 2) & ~1;

  fft->work.nelements = row * fft->work.col_padded;
  fft->work.complex_nelements = fft->work.nelements / 2;
  fft->work.size = sizeof (fftwf_complex) * fft->work.complex_nelements;

  /* 32-bytes pair (4x complex or 8x real) processing */
  fft->work.size += 31;
  fft->work.size &= ~31;

  /* fftwf_malloc() (or distributed package) is broken. */
  fft->work.image  = fftwf_malloc (fft->work.size);
  fft->work.kernel = fftwf_malloc (fft->work.size);
  if (! fft->work.image || ! fft->work.kernel)
    {
      focusblur_fft_buffer_clear_work (fft);
      return FALSE;
    }

  fft->work.plan_r2c = fftwf_plan_dft_r2c_2d
    (row, col, (gfloat *) fft->work.image, fft->work.image, FFTW_ESTIMATE);

  fft->work.plan_c2r = fftwf_plan_dft_c2r_2d
    (row, col, fft->work.image, (gfloat *) fft->work.image, FFTW_ESTIMATE);

  if (! fft->work.plan_r2c || ! fft->work.plan_c2r)
    {
      focusblur_fft_buffer_clear_work (fft);
      return FALSE;
    }

  fft->work.space = radius;
  fft->work.origin = (fft->work.col_padded + 1) * radius;
  fft->work.level = 0;

  return TRUE;
}
コード例 #16
0
ファイル: sepvti2d.c プロジェクト: wangh0a/Ateam
int main (int argc, char *argv[])
{
  bool verb, snap;
  bool abc, adj;
  int nz, nx, nt, ns, nr;
  float dz, dx, dt, oz, ox;
  int nz0, nx0, nb;
  float oz0, ox0;
  int nkz, nkx;
  int nzpad, nxpad;
  
  float **u1, **u0;
  float *ws, *wr;
  
  sf_file file_src = NULL, file_rec = NULL;
  sf_file file_inp = NULL, file_out = NULL;
  sf_file file_mdl = NULL;
  sf_axis az = NULL, ax = NULL, at = NULL, as = NULL, ar = NULL;
  pt2d *src2d = NULL;
  pt2d *rec2d = NULL;
  scoef2d cssinc = NULL;
  scoef2d crsinc = NULL;
  float *wi = NULL, *wo = NULL;
  sf_axis ai = NULL, ao = NULL;
  scoef2d cisinc = NULL, cosinc = NULL;
  bool spt = false, rpt = false;
  bool ipt = false, opt = false;
  
  sf_init(argc, argv);
  
  if (!sf_getbool("verb", &verb)) verb = false;
  if (!sf_getbool("snap", &snap)) snap = false;
  if (!sf_getbool("adj", &adj)) adj = false;
  if (!sf_getint("nb", &nb)) nb = 4;
  if (sf_getstring("sou") != NULL) { 
    spt = true;
    if (adj) opt = true;
    else     ipt = true;
  }
  if (sf_getstring("rec") != NULL) {
    rpt = true;
    if (adj) ipt = true;
    else     opt = true;
  }
  
  file_inp = sf_input("in");
  file_mdl = sf_input("model");
  if (spt) file_src = sf_input("sou");
  if (rpt) file_rec = sf_input("rec");
  file_out = sf_output("out");

  if (ipt) at = sf_iaxa(file_inp, 2);
  else     at = sf_iaxa(file_inp, 3);
  if (spt) as = sf_iaxa(file_src, 2);
  if (rpt) ar = sf_iaxa(file_rec, 2);
  az = sf_iaxa(file_mdl, 1);
  ax = sf_iaxa(file_mdl, 2);
  nt = sf_n(at);  dt = sf_d(at);  //ot = sf_o(at);
  nz0 = sf_n(az);  dz = sf_d(az);  oz0 = sf_o(az);
  nx0 = sf_n(ax);  dx = sf_d(ax);  ox0 = sf_o(ax);

  if (spt) ns = sf_n(as);
  if (rpt) nr = sf_n(ar);
  nz = nz0 + 2 * nb;
  nx = nx0 + 2 * nb;
  oz = oz0 - nb * dz;
  ox = ox0 - nb * dx;
  abc = nb ? true : false;
  // sf_error("ox=%f ox0=%f oz=%f oz0=%f",ox,ox0,oz,oz0);
  
  nzpad = kiss_fft_next_fast_size( ((nz+1)>>1)<<1 );
  nkx = nxpad = kiss_fft_next_fast_size(nx);
  nkz = nzpad / 2 + 1;
  /* float okx = - 0.5f / dx; */
  float okx = 0.f;
  float okz = 0.f;
  float dkx = 1.f / (nxpad * dx);
  float dkz = 1.f / (nzpad * dz);

  float **vp, **eps, **del;
  vp  = sf_floatalloc2(nz, nx);
  eps = sf_floatalloc2(nz, nx);
  del = sf_floatalloc2(nz, nx);
  float **tmparray = sf_floatalloc2(nz0, nx0);
  sf_floatread(tmparray[0], nz0*nx0, file_mdl); expand2d(vp[0], tmparray[0], nz, nx, nz0, nx0);
  sf_floatread(tmparray[0], nz0*nx0, file_mdl); expand2d(eps[0], tmparray[0], nz, nx, nz0, nx0);
  sf_floatread(tmparray[0], nz0*nx0, file_mdl); expand2d(del[0], tmparray[0], nz, nx, nz0, nx0);

  float **vn, **vh;  
  float **eta, **lin_eta;
  lin_eta = NULL, vh = NULL;
 
  vn = sf_floatalloc2(nz, nx);
  vh = sf_floatalloc2(nz, nx);
  eta = sf_floatalloc2(nz, nx);
  lin_eta = sf_floatalloc2(nz, nx);

  for (int ix=0; ix<nx; ix++) {
    for (int iz=0; iz<nz; iz++){
      vp[ix][iz] *= vp[ix][iz];
      vn[ix][iz] = vp[ix][iz] * (1.f + 2.f * del[ix][iz]);
      vh[ix][iz] = vp[ix][iz] * (1.f + 2.f * eps[ix][iz]);
      eta[ix][iz] = (eps[ix][iz] - del[ix][iz]) / (1.f + 2.f * del[ix][iz]);
      lin_eta[ix][iz] = eta[ix][iz] * (1.f + 2.f * del[ix][iz]);
    }
  }


  float *kx = sf_floatalloc(nkx);
  float *kz = sf_floatalloc(nkz);
  for (int ikx=0; ikx<nkx; ++ikx) {
    kx[ikx] = okx + ikx * dkx;
    /* if (ikx >= nkx/2) kx[ikx] = (nkx - ikx) * dkx; */
    if (ikx >= nkx/2) kx[ikx] = (ikx - nkx) * dkx;
    kx[ikx] *= 2 * SF_PI;
    kx[ikx] *= kx[ikx];
  }
  for (int ikz=0; ikz<nkz; ++ikz) {
    kz[ikz] = okz + ikz * dkz;
    kz[ikz] *= 2 * SF_PI;
    kz[ikz] *= kz[ikz];
  }

  if (adj) {
    ai = ar; ao = as;
  } else {
    ai = as; ao = ar;
  }

  if (opt) {
    sf_oaxa(file_out, ao, 1);
    sf_oaxa(file_out, at, 2);
  } else {
    sf_oaxa(file_out, az, 1);
    sf_oaxa(file_out, ax, 2);
    sf_oaxa(file_out, at, 3);
  }
  sf_fileflush(file_out, NULL);

  if (spt) {
    src2d = pt2dalloc1(ns);
    pt2dread1(file_src, src2d, ns, 2);
    cssinc = sinc2d_make(ns, src2d, nz, nx, dz, dx, oz, ox);
    ws = sf_floatalloc(ns);
    if (adj) { cosinc = cssinc;  wo = ws; }
    else     { cisinc = cssinc;  wi = ws; }
  }
  if (rpt) {
    rec2d = pt2dalloc1(nr);
    pt2dread1(file_rec, rec2d, nr, 2);
    crsinc = sinc2d_make(nr, rec2d, nz, nx, dz, dx, oz, ox);
    wr = sf_floatalloc(nr);
    if (adj) { cisinc = crsinc;  wi = wr; }
    else     { cosinc = crsinc;  wo = wr; }
  }

  u0 = sf_floatalloc2(nz, nx);
  u1 = sf_floatalloc2(nz, nx);
  float *rwave = (float *) fftwf_malloc(nzpad*nxpad*sizeof(float));
  float *rwavem = (float *) fftwf_malloc(nzpad*nxpad*sizeof(float));
  fftwf_complex *cwave = (fftwf_complex *) fftwf_malloc(nkz*nkx*sizeof(fftwf_complex));
  fftwf_complex *cwavem = (fftwf_complex *) fftwf_malloc(nkz*nkx*sizeof(fftwf_complex));
  /* float *rwavem = (float *) fftwf_malloc(nzpad*nxpad*sizeof(float));
  fftwf_complex *cwave = (fftwf_complex *) fftwf_malloc(nkz*nkx*sizeof(fftwf_complex));
  fftwf_complex *cwavem = (fftwf_complex *) fftwf_malloc(nkz*nkx*sizeof(fftwf_complex)); */

  /* boundary conditions */
  float **ucut = NULL;
  float *damp = NULL;
  if (!(ipt &&opt)) ucut = sf_floatalloc2(nz0, nx0);
  damp = damp_make(nb);
    
  float wt = 1./(nxpad * nzpad);
  wt *= dt * dt;
  fftwf_plan forward_plan;
  fftwf_plan inverse_plan;
#ifdef _OPENMP
#ifdef SF_HAS_FFTW_OMP
  fftwf_init_threads();
  fftwf_plan_with_nthreads(omp_get_max_threads());
#endif
#endif
  forward_plan = fftwf_plan_dft_r2c_2d(nxpad, nzpad,
              rwave, cwave, FFTW_MEASURE); 
#ifdef _OPENMP
#ifdef SF_HAS_FFTW_OMP
  fftwf_plan_with_nthreads(omp_get_max_threads());
#endif
#endif
  inverse_plan = fftwf_plan_dft_c2r_2d(nxpad, nzpad,
              cwavem, rwavem, FFTW_MEASURE); 
  int itb, ite, itc;
  if (adj) {
    itb = nt -1; ite = -1; itc = -1;
  } else {
    itb = 0; ite = nt; itc = 1;
  }

  if (adj) {
    for (int it=0; it<nt; it++) {
      if (opt) sf_floatwrite(wo, sf_n(ao), file_out);
      else     sf_floatwrite(ucut[0], nz0*nx0, file_out);
    }
    sf_seek(file_out, 0, SEEK_SET);
  }

  float **ptrtmp = NULL;
  memset(u0[0], 0, sizeof(float)*nz*nx);
  memset(u1[0], 0, sizeof(float)*nz*nx);
  memset(rwave, 0, sizeof(float)*nzpad*nxpad);
  memset(rwavem, 0, sizeof(float)*nzpad*nxpad);
  memset(cwave, 0, sizeof(float)*nkz*nkx*2);
  memset(cwavem, 0, sizeof(float)*nkz*nkx*2);

  for (int it=itb; it!=ite; it+=itc) { if (verb) sf_warning("it = %d;",it);
#ifdef _OPENMP
    double tic = omp_get_wtime();
#endif
    if (ipt) {
      if (adj) sf_seek(file_inp, (off_t)(it)*sizeof(float)*sf_n(ai), SEEK_SET);
      sf_floatread(wi, sf_n(ai), file_inp);
      for (int i=0; i<sf_n(ai); i++)
        wi[i] *= dt* dt;
    } else {
      if (adj) sf_seek(file_inp, (off_t)(it)*sizeof(float)*nz0*nx0, SEEK_SET);
      sf_floatread(ucut[0], nz0*nx0, file_inp);
      for (int j=0; j<nx0; j++)
      for (int i=0; i<nz0; i++)
        ucut[j][i] *= dt * dt;
    }

    /* apply absorbing boundary condition: E \times u@n-1 */
    damp2d_apply(u0, damp, nz, nx, nb);
    fft_stepforward(u0, u1, rwave, rwavem, cwave, cwavem,
        vp, vn, eta, vh, eps, lin_eta, kz, kx,
        forward_plan, inverse_plan,
        nz, nx, nzpad, nxpad, nkz, nkx, wt, adj);

    // sinc2d_inject1(u0, ws[it][s_idx], cssinc[s_idx]);
    if (ipt) sinc2d_inject(u0, wi, cisinc);
    else     wfld2d_inject(u0, ucut, nz0, nx0, nb);

    /* apply absorbing boundary condition: E \times u@n+1 */
    damp2d_apply(u0, damp, nz, nx, nb);

    /* loop over pointers */
    ptrtmp = u0;  u0 = u1;  u1 = ptrtmp;
    
    if (opt) {
      if (adj) sf_seek(file_out, (off_t)(it)*sizeof(float)*sf_n(ao),SEEK_SET);
      sinc2d_extract(u0, wo, cosinc);
      sf_floatwrite(wo, sf_n(ao), file_out);
    } else {
      if (adj) sf_seek(file_out, (off_t)(it)*sizeof(float)*nz0*nx0,SEEK_SET);
      wwin2d(ucut, u0, nz0, nx0, nb);
      sf_floatwrite(ucut[0], nz0*nx0, file_out);
    }

#ifdef _OPENMP
    double toc = omp_get_wtime();
    if (verb) fprintf(stderr," clock = %lf;", toc-tic);
#endif
  } /* END OF TIME LOOP */
  return 0;
}