예제 #1
void grav_fft_init()
	int xblock2 = XRES/CELL*2;
	int yblock2 = YRES/CELL*2;
	int x, y, fft_tsize = (xblock2/2+1)*yblock2;
	float distance, scaleFactor;
	fftwf_plan plan_ptgravx, plan_ptgravy;
	if (grav_fft_status) return;

	//use fftw malloc function to ensure arrays are aligned, to get better performance
	th_ptgravx = (float*)fftwf_malloc(xblock2*yblock2*sizeof(float));
	th_ptgravy = (float*)fftwf_malloc(xblock2*yblock2*sizeof(float));
	th_ptgravxt = (fftwf_complex*)fftwf_malloc(fft_tsize*sizeof(fftwf_complex));
	th_ptgravyt = (fftwf_complex*)fftwf_malloc(fft_tsize*sizeof(fftwf_complex));
	th_gravmapbig = (float*)fftwf_malloc(xblock2*yblock2*sizeof(float));
	th_gravmapbigt = (fftwf_complex*)fftwf_malloc(fft_tsize*sizeof(fftwf_complex));
	th_gravxbig = (float*)fftwf_malloc(xblock2*yblock2*sizeof(float));
	th_gravybig = (float*)fftwf_malloc(xblock2*yblock2*sizeof(float));
	th_gravxbigt = (fftwf_complex*)fftwf_malloc(fft_tsize*sizeof(fftwf_complex));
	th_gravybigt = (fftwf_complex*)fftwf_malloc(fft_tsize*sizeof(fftwf_complex));

	//select best algorithm, could use FFTW_PATIENT or FFTW_EXHAUSTIVE but that increases the time taken to plan, and I don't see much increase in execution speed
	plan_ptgravx = fftwf_plan_dft_r2c_2d(yblock2, xblock2, th_ptgravx, th_ptgravxt, FFTW_MEASURE);
	plan_ptgravy = fftwf_plan_dft_r2c_2d(yblock2, xblock2, th_ptgravy, th_ptgravyt, FFTW_MEASURE);
	plan_gravmap = fftwf_plan_dft_r2c_2d(yblock2, xblock2, th_gravmapbig, th_gravmapbigt, FFTW_MEASURE);
	plan_gravx_inverse = fftwf_plan_dft_c2r_2d(yblock2, xblock2, th_gravxbigt, th_gravxbig, FFTW_MEASURE);
	plan_gravy_inverse = fftwf_plan_dft_c2r_2d(yblock2, xblock2, th_gravybigt, th_gravybig, FFTW_MEASURE);

	//(XRES/CELL)*(YRES/CELL)*4 is size of data array, scaling needed because FFTW calculates an unnormalized DFT
	scaleFactor = -M_GRAV/((XRES/CELL)*(YRES/CELL)*4);
	//calculate velocity map caused by a point mass
	for (y=0; y<yblock2; y++)
		for (x=0; x<xblock2; x++)
			if (x==XRES/CELL && y==YRES/CELL) continue;
			distance = sqrtf(pow(x-(XRES/CELL), 2) + pow(y-(YRES/CELL), 2));
			th_ptgravx[y*xblock2+x] = scaleFactor*(x-(XRES/CELL)) / pow(distance, 3);
			th_ptgravy[y*xblock2+x] = scaleFactor*(y-(YRES/CELL)) / pow(distance, 3);
	th_ptgravx[yblock2*xblock2/2+xblock2/2] = 0.0f;
	th_ptgravy[yblock2*xblock2/2+xblock2/2] = 0.0f;

	//transform point mass velocity maps

	//clear padded gravmap

	grav_fft_status = 1;
예제 #2
void HDRImage3c::rfftplanUpdate ()
	uint2 total = getTotalSize()-uint2(0,2);
	m_rfftplanR = fftwf_plan_dft_c2r_2d (total.y, total.x, 
			m_red, m_hdriRFFT->getRedBuffer(), FFTW_MEASURE);
	m_rfftplanG = fftwf_plan_dft_c2r_2d (total.y, total.x, 
			m_green, m_hdriRFFT->getGreenBuffer(), FFTW_MEASURE);
	m_rfftplanB = fftwf_plan_dft_c2r_2d (total.y, total.x, 
			m_blue, m_hdriRFFT->getBlueBuffer(), FFTW_MEASURE);
예제 #3
gravity_solver::gravity_solver( unsigned n )
: n_( n )
    data = new fftwf_real[ n_ * (n_+2) ];
    force = new fftwf_real[ n_ * (n_+2) ];
    cdata = reinterpret_cast<fftwf_complex*>(data);
    box_ = boxlength;
    box05_ = 0.5f * boxlength;
    plan  = fftwf_plan_dft_r2c_2d( n_, n_, data, cdata, FFTW_MEASURE ),
    iplan = fftwf_plan_dft_c2r_2d( n_, n_, cdata, data, FFTW_MEASURE );
    UnitLength_in_cm = 3.08568025e24f; //      ;  1.0 Mpc
    UnitMass_in_g    = 1.989e43f; //           ;  1.0e10 solar masses
    UnitVelocity_in_cm_per_s = 1e5f; //                ;  1 km/sec
    UnitTime_in_s = UnitLength_in_cm / UnitVelocity_in_cm_per_s;
    GRAVITY = 6.672e-8f;
    G = GRAVITY / pow(UnitLength_in_cm, 3) * UnitMass_in_g * pow(UnitTime_in_s, 2);
    Omega_m = 1.0; //0.276;
    Omega_L = 0.0; //0.724;
    aforce = 0.0;
예제 #4
파일: fft2w.c 프로젝트: 1014511134/src
void ifft2(float *out      /* [n1*n2] */, 
	   sf_complex *inp /* [nk*n2] */)
/*< 2-D inverse FFT >*/
    int i1, i2;

#ifdef SF_HAS_FFTW
    if (NULL==icfg) {
      icfg = cmplx? 
			  (fftwf_complex *) dd, 
			  (fftwf_complex *) cc[0],
			      (fftwf_complex *) dd, ff[0],
      if (NULL == icfg) sf_error("FFTW failure.");

#ifdef SF_HAS_FFTW
    for (i1=0; i1 < nk*n2; i1++)
      dd[i1] = inp[i1];

    for (i1=0; i1 < nk; i1++) {
	kiss_fft_stride(icfg2,(kiss_fft_cpx *) (inp+i1),ctrace2,nk);
	for (i2=0; i2<n2; i2++) {
	    tmp[i2][i1] = ctrace2[i2];
    for (i2=0; i2 < n2; i2++) {
	if (cmplx) {
	    kiss_fft_stride(icfg1,tmp[i2],(kiss_fft_cpx *) cc[i2],1);
	} else {
    /* FFT centering and normalization */
    for (i2=0; i2<n2; i2++) {
	for (i1=0; i1<n1; i1++) {
	    if (cmplx) {
		out[i2*n1+i1] = (((i2%2==0)==(i1%2==0))? wt:-wt) * crealf(cc[i2][i1]);
	    } else {
		out[i2*n1+i1] = (i2%2? -wt: wt)*ff[i2][i1];
예제 #5
void fft_prepare(PluginData *pd)
	gint         w = pd->image_width, h = pd->image_height;
	gint         channel_count = pd->channel_count;
	int          x, y;
	float      **image;
	guchar      *img_pixels;
	float        norm;
	image = pd->image = (float**) malloc(sizeof(float*) * channel_count);
	pd->image_freq = (fftwf_complex**) malloc(sizeof(fftwf_complex*) * channel_count);
  img_pixels = pd->img_pixels = g_new (guchar, w * h * channel_count);
  //allocate an array for each channel
  for (int channel = 0; channel < channel_count; channel ++){
	  image[channel] = (float*) fftwf_malloc(sizeof(float) * w * h);
		pd->image_freq[channel] = (fftwf_complex*) fftwf_malloc(sizeof(fftwf_complex) * (w/2+1) * h);
	// printf("Image data occupies %lu MB.\n", (sizeof(float) * w * h * channel_count) >> 20);
	// printf("Frequency data occupies %lu MB.\n", (sizeof(fftwf_complex) * (w/2+1) * h * channel_count) >> 20);
	// forward plan
	fftwf_plan plan = fftwf_plan_dft_r2c_2d(pd->image_height, pd->image_width, *image, *pd->image_freq, FFTW_ESTIMATE);
	// inverse plan (to be reused)
	pd->plan = fftwf_plan_dft_c2r_2d(pd->image_height, pd->image_width, *pd->image_freq, *image, FFTW_ESTIMATE);

	// set image region to reading mode
	gimp_pixel_rgn_init (&pd->region, pd->drawable, 0, 0, w, h, FALSE, FALSE);
	gimp_pixel_rgn_get_rect(&pd->region, img_pixels, 0, 0, w, h);
	// execute forward FFT once
	int pw = w/2+1; // physical width
	float diagonal = sqrt(h*h + w*w)/2.0;
	norm = 1.0/(w*h);
	for(int channel=0; channel<channel_count; channel++)
		// convert one color channel to float[]
		for(int i=0; i < w*h; i ++)
			 image[channel][i] =  (float) img_pixels[(i)*channel_count + channel] * norm;
		// transform the channel
		fftwf_execute_dft_r2c(plan, image[channel], pd->image_freq[channel]);
		for(int i=0; i < w*h; i ++)
			 image[channel][i] =  (float) img_pixels[(i)*channel_count + channel] * norm;
		// copy the channel again, for preview
		for(int i=0; i < w*h; i ++)
			 image[channel][i] =  (float) img_pixels[(i)*channel_count + channel];
예제 #6
파일: fft2.c 프로젝트: krushev36/src
void ifft2_allocate(sf_complex *inp /* [nk*n2] */)
/*< allocate inverse transform >*/
#ifdef SF_HAS_FFTW
    icfg = cmplx?
                             (fftwf_complex *) inp,
                             (fftwf_complex *) cc[0],
                             FFTW_BACKWARD, FFTW_MEASURE):
                                 (fftwf_complex *) inp, ff[0],
    if (NULL == icfg) sf_error("FFTW failure.");
예제 #7
파일: fft.c 프로젝트: Starlink/sextractor
/****** fft_conv ************************************************************
PROTO	void fft_conv(float *data1, float *fdata2, int *size)
PURPOSE	Optimized 2-dimensional FFT convolution using the FFTW library.
INPUT	ptr to the first image,
	ptr to the Fourier transform of the second image,
	image size vector.
NOTES	For data1 and fdata2, memory must be allocated for
	size[0]* ... * 2*(size[naxis-1]/2+1) floats (padding required).
AUTHOR	E. Bertin (IAP)
VERSION	29/03/2013
void    fft_conv(float *data1, float *fdata2, int *size)
   float		*fdata1p,*fdata2p,
			real,imag, fac;
   int			i, npix,npix2;

/* Convert axis indexing to that of FFTW */
  npix = size[0]*size[1];
  npix2 = ((size[0]/2) + 1) * size[1];

/* Forward FFT "in place" for data1 */
  if (!fplan)
    QFFTWF_MALLOC(fdata1, fftwf_complex, npix2);
    fplan = fftwf_plan_dft_r2c_2d(size[1], size[0], data1,
        (fftwf_complex *)fdata1, FFTW_ESTIMATE);

  fftwf_execute_dft_r2c(fplan, data1, fdata1);

/* Actual convolution (Fourier product) */
  fac = 1.0/npix;  
  fdata1p = (float *)fdata1;
  fdata2p = fdata2;
#pragma ivdep
  for (i=npix2; i--;)
    real = *fdata1p **fdata2p - *(fdata1p+1)**(fdata2p+1);
    imag = *(fdata1p+1)**fdata2p + *fdata1p**(fdata2p+1);
    *(fdata1p) = fac*real;
    *(fdata1p+1) = fac*imag;

/* Reverse FFT */
  if (!bplan)
    bplan = fftwf_plan_dft_c2r_2d(size[1], size[0], (fftwf_complex *)fdata1, 
        data1, FFTW_ESTIMATE);
  fftwf_execute_dft_c2r(bplan, fdata1, data1);

//  fftwf_execute(plan);

예제 #8
GLFFTWater::GLFFTWater(GLFFTWaterParams &params) {
#ifdef _WIN32
    m_h = (float *)__mingw_aligned_malloc((sizeof(float)*(params.N+2)*(params.N)), 4);
    m_dx = (float *)__mingw_aligned_malloc((sizeof(float)*(params.N+2)*(params.N)), 4);
    m_dz = (float *)__mingw_aligned_malloc((sizeof(float)*(params.N+2)*(params.N)), 4);
    m_w = (float *)__mingw_aligned_malloc((sizeof(float)*(params.N)*(params.N)), 4);
    posix_memalign((void **)&m_h,4,sizeof(float)*(params.N+2)*(params.N));
    posix_memalign((void **)&m_dx,4,sizeof(float)*(params.N+2)*(params.N));
    posix_memalign((void **)&m_dz,4,sizeof(float)*(params.N+2)*(params.N));
    posix_memalign((void **)&m_w,4,sizeof(float)*(params.N)*(params.N));

    m_htilde0 = (fftwf_complex *)fftwf_malloc(sizeof(fftwf_complex)*(params.N)*(params.N));
    m_heightmap = new float3[(params.N)*(params.N)];
    m_params = params;

    std::tr1::mt19937 prng(1337);
    std::tr1::normal_distribution<float> normal;
    std::tr1::uniform_real<float> uniform;
    std::tr1::variate_generator<std::tr1::mt19937, std::tr1::normal_distribution<float> > randn(prng,normal);
    std::tr1::variate_generator<std::tr1::mt19937, std::tr1::uniform_real<float> > randu(prng,uniform);
    for(int i=0, k=0; i<params.N; i++) {
	    float k_x = (-(params.N-1)*0.5f+i)*(2.f*3.141592654f / params.L);
	    for(int j=0; j<params.N; j++, k++) {
		    float k_y = (-(params.N-1)*0.5f+j)*(2.f*3.141592654f / params.L);
		    float A = randn();
		    float theta = randu()*2.f*3.141592654f;
		    float P = (k_x==0.f && k_y==0.0f) ? 0.f : sqrtf(phillips(k_x,k_y,m_w[k]));
		    m_htilde0[k][0] = m_htilde0[k][1] = P*A*sinf(theta);

    m_kz = new float[params.N*(params.N / 2 + 1)];
    m_kx = new float[params.N*(params.N / 2 + 1)];

    const int hN = m_params.N / 2;
    for(int y=0; y<m_params.N; y++) {
	float kz = (float) (y - hN);
	for(int x=0; x<=hN; x++) {
		float kx = (float) (x - hN);
		float k = 1.f/sqrtf(kx*kx+kz*kz);
		m_kz[y*(hN+1)+x] = kz*k;
		m_kx[y*(hN+1)+x] = kx*k;

    if(!fftwf_init_threads()) {
	cerr << "Error initializing multithreaded fft."  << endl;
    } else {
    m_fftplan = fftwf_plan_dft_c2r_2d(m_params.N, m_params.N, (fftwf_complex *)m_h, m_h, 

    glGenTextures(1, &m_texId);
    glBindTexture(GL_TEXTURE_2D, m_texId);
    glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB16F, params.N, params.N, 0, GL_RGB, GL_FLOAT, 0);
    glBindTexture(GL_TEXTURE_2D, 0);
예제 #9
void cosmo_init_particles( unsigned seed )
    fftwf_real *data = new fftwf_real[ nres * (nres+2) ];
    fftwf_complex *cdata = reinterpret_cast<fftwf_complex*>(data);
    fftwf_real *data2 = new fftwf_real[ nres * (nres+2) ];
    fftwf_complex *cdata2 = reinterpret_cast<fftwf_complex*>(data2);
    gsl_rng	*RNG = gsl_rng_alloc( gsl_rng_mt19937 );
	gsl_rng_set( RNG, seed );
    fftwf_plan plan, iplan, plan2, iplan2;
    plan  = fftwf_plan_dft_r2c_2d( nres, nres, data, cdata, FFTW_MEASURE ),
    iplan = fftwf_plan_dft_c2r_2d( nres, nres, cdata, data, FFTW_MEASURE );
    plan2  = fftwf_plan_dft_r2c_2d( nres, nres, data2, cdata2, FFTW_MEASURE ),
    iplan2 = fftwf_plan_dft_c2r_2d( nres, nres, cdata2, data2, FFTW_MEASURE );
    int nresp = nres/2+1;
    float kfac = 2.0*M_PI/boxlength;
    float gaussran1, gaussran2;
    float fftnorm = 1.0f / (float)nres * (2.0f*M_PI/boxlength);
    for( int i=0; i<nres; ++i )
        for( int j=0; j<nres; ++j )
            int idx = i*(nres+2)+j;
            data[idx] = gsl_ran_ugaussian_ratio_method( RNG ) / nres;
    fftwf_execute( plan );
    for( int i=0; i<nres; ++i )
        for( int j=0; j<nresp; ++j )
            float kx = i>=nresp? (float)(i-nres)*kfac : (float)i*kfac;
            float ky = (float)j*kfac;
            float kk = sqrtf(kx*kx+ky*ky);
            int idx = i*nresp+j;
            float ampk = cosmo_get_amp_k( kk ); //*sqrtf(kk);
            if( kk >= nresp*kfac )
                ampk = 0.0;
            cdata[idx][0] *= ampk * fftnorm;
            cdata[idx][1] *= ampk * fftnorm;
    // insert code to make random numbers independent of resolution (have rectangle outliens)
    float dx = boxlength / nres;
    float vfact = ComputeVFact( 1.0f/(1.0f+g_zstart));
    // generate x-component
    for( int i=0; i<nres; ++i )
        for( int j=0; j<nresp; ++j )
            float kx = i>=nresp? (float)(i-nres)*kfac : (float)i*kfac;
            float ky = (float)j*kfac;
            float kk = sqrtf(kx*kx+ky*ky);
            int idx = i*nresp+j; // (a+ib) * ik = iak -bk
            cdata2[idx][0] = kx/kk/kk * cdata[idx][1];
            cdata2[idx][1] = -kx/kk/kk * cdata[idx][0];
    cdata2[0][0] = 0.0f;
    cdata2[0][1] = 0.0f;
    fftwf_execute( iplan2 );
    for( int i=0; i<nres; ++i )
        for( int j=0; j<nres; ++j )
            int idx = i*(nres+2)+j;
            int ii = i*nres+j;
            P[ii].x = (float)i*dx + data2[idx];
            P[ii].vx = data2[idx] * vfact;
            P[ii].id = ii;
            P[ii].acc[0] = 0.0f;
    // generate y-component
    for( int i=0; i<nres; ++i )
        for( int j=0; j<nresp; ++j )
            float kx = i>=nresp? (float)(i-nres)*kfac : (float)i*kfac;
            float ky = (float)j*kfac;
            float kk = sqrtf(kx*kx+ky*ky);
            int idx = i*nresp+j;
            cdata2[idx][0] = ky/kk/kk * cdata[idx][1];
            cdata2[idx][1] = -ky/kk/kk * cdata[idx][0];
    cdata2[0][0] = 0.0f;
    cdata2[0][1] = 0.0f;
    fftwf_execute( iplan2 );
    for( int i=0; i<nres; ++i )
        for( int j=0; j<nres; ++j )
            int idx = i*(nres+2)+j;
            int ii = i*nres+j;
            P[ii].y = (float)j*dx + data2[idx];
            P[ii].vy = data2[idx] * vfact;
            P[ii].acc[1] = 0.0f;
    delete[] data;
    delete[] data2;
    gsl_rng_free( RNG );
예제 #10
void SetFastFFT(float *buf, DIM nsam)
	plan_fft_fast=fftwf_plan_dft_r2c_2d(nsam.y,nsam.x,buf,reinterpret_cast<fftwf_complex *>(buf),FFTW_ESTIMATE); 
	plan_ifft_fast=fftwf_plan_dft_c2r_2d(nsam.y,nsam.x,reinterpret_cast<fftwf_complex *>(buf),buf,FFTW_ESTIMATE); 
예제 #11
void ifft2d(float* buf, DIM nsam)
	fftwf_plan plan_fft=fftwf_plan_dft_c2r_2d(nsam.y,nsam.x,reinterpret_cast<fftwf_complex *>(buf),buf,FFTW_ESTIMATE); 
예제 #12
int ComWallFrame::action(IDS* main)
	int x,y,xo,yo, Y;
	Kinect::depth_buffer* dframe = main->getDepth();
	Kinect* kinect = main->getKinect();
	Minotaur* minotaur = main->getMinotaur();
	Minotaur::MinotaurState minostate = minotaur->getState();

	Point p3d[8][8];
	Point avg3d;
	Point avgbar_flat;
	int valid;
	float zvariance, xvariance, yvariance, xSS, ySS, xybar, xzbar, yzbar;
	float slopeyx, slopezx, slopezy;
	float yint, zxint, zyint;
	float resid_yx, resid_zx, resid_zy;
	uint8_t r,g,b;
	uint16_t d, d0, d1;
	float fd;
	float floor_height = 0;
	int floor_count = 0;
	float rx, ry, rz;
	float sin_ori = sin(minostate.orient);
	float cos_ori = cos(minostate.orient);
	float origin_dist;
	float avg_dist;
	float orient_yx;

	uint32_t count, max_count = 0;

	bool valid_walls[480/8/WALL_AVG_SIZE][640/8/WALL_AVG_SIZE][WALL_AVG_SIZE][WALL_AVG_SIZE];

	int nslope = 480;
	int nodist = 256;
	int nodist_half = nodist / 2 + 1;
	float fft_data[nslope][nodist];
	fftwf_complex* fft_out = (fftwf_complex*) fftwf_malloc(sizeof(fftwf_complex)*nslope*nodist_half);

	float avg_slope, avg_yint;

	int fail_yx_res = 0;
	int fail_zx_res = 0;
	int fail_zy_res = 0;

	int fail_floor_check1 = 0;
	int fail_floor_check2 = 0;
	int fail_floor_check3 = 0;
	int fail_floor_check4 = 0;

	bool wall_check1, wall_check2, wall_check3;
	bool floor_check1, floor_check2, floor_check3, floor_check4;

	if(main->getDepthCount() <= 0)
		std::cerr << "MapFrame awaiting depth data" << std::endl;
		return 1;

	for(y = 0; y < 480/8/WALL_AVG_SIZE; y++)
		for(x = 0; x < 640/8/WALL_AVG_SIZE; x++)
			for(yo = 0; yo < WALL_AVG_SIZE; yo++)
				for(xo = 0; xo < WALL_AVG_SIZE; xo++)
					valid_walls[y][x][yo][xo] = false;

	for(x = 0; x < nodist; x++)
		for(y = 0; y < nslope; y++)
			fft_data[y][x] = fft_data[y][x] = 0;

	for(y = 0; y < 480/8; y++)
		for(x = 0; x < 640/8; x++)
			avg3d = {0,0,0};
			avg_dist = 0;
			valid = 0;
			xybar = 0;

			for(yo = 0; yo < 8; yo++)
				for(xo = 0; xo < 8; xo++)
					d0 = (*dframe)[y*8+yo][x*8+xo][0];
					d1 = (*dframe)[y*8+yo][x*8+xo][1];
					d = d1;
					d = d << 8 | d0;

					if(d != 0x07FF && d <= KINECT_CALIB_DOFF)
						fd = decode_kinect_dist[d];

						avg_dist += fd;
						rx = kinect->x3d(x,y,xo,yo,fd);
						ry = kinect->y3d(x,y,xo,yo,fd);
						rz = kinect->z3d(x,y,xo,yo,fd);

						p3d[yo][xo].x = rx * cos_ori - ry * sin_ori + minostate.x;
						p3d[yo][xo].y = rx * sin_ori + ry * cos_ori + minostate.y;
						p3d[yo][xo].z = rz;

						avg3d.x += p3d[yo][xo].x;
						avg3d.y += p3d[yo][xo].y;
						avg3d.z += p3d[yo][xo].z;

						valid_points[yo][xo] = true;
						valid_points[yo][xo] = false;
						//p3d[yo][xo].valid = false;

			avg3d.x /= valid;
			avg3d.y /= valid;
			avg3d.z /= valid;

			if(valid <= (8*8)*3/4)
				//Not enough data to represent the points
				r = 0x00;
				g = 0x00;
				b = 0x00;

				//Calculate statistics for slope calculation
				zvariance = 0;
				xvariance = 0;
				yvariance = 0;
				xSS = 0, ySS = 0;
				xybar = 0, xzbar = 0, yzbar = 0;
				for(yo = 0; yo < 8; yo++)
					for(xo = 0; xo < 8; xo++)
							xvariance += quick_square(p3d[yo][xo].x-avg3d.x);
							yvariance += quick_square(p3d[yo][xo].y-avg3d.y);
							zvariance += quick_square(p3d[yo][xo].z-avg3d.z);

							xSS += quick_square(p3d[yo][xo].x);
							ySS += quick_square(p3d[yo][xo].y);
							xybar += p3d[yo][xo].x * p3d[yo][xo].y;
							xzbar += p3d[yo][xo].x * p3d[yo][xo].z;
							yzbar += p3d[yo][xo].y * p3d[yo][xo].z;

				xybar /= valid;
				xzbar /= valid;
				yzbar /= valid;
				xSS /= valid;
				ySS /= valid;

				slopeyx = (xybar - avg3d.x * avg3d.y) / (xSS - quick_square(avg3d.x));
				slopezx = (xzbar - avg3d.x * avg3d.z) / (xSS - quick_square(avg3d.x));
				slopezy = (yzbar - avg3d.y * avg3d.z) / (ySS - quick_square(avg3d.y));
				yint = avg3d.y - slopeyx * avg3d.x;
				zxint = avg3d.z - slopezx * avg3d.x;
				zyint = avg3d.z - slopezy * avg3d.y;
				resid_yx = 0;
				resid_zx = 0;
				resid_zy = 0;
				for(yo = 0; yo < 8; yo++)
					for(xo = 0; xo < 8; xo++)
							resid_yx += quick_square((p3d[yo][xo].y - slopeyx * p3d[yo][xo].x - yint));
							resid_zx += quick_square((p3d[yo][xo].z - slopezx * p3d[yo][xo].x - zxint));
							resid_zy += quick_square((p3d[yo][xo].z - slopezy * p3d[yo][xo].y - zyint));

				floor_check1 = fabs(atan(slopezx)) < 0.262;
				floor_check2 = fabs(atan(slopezy)) < 0.262;
				floor_check3 = resid_zx * 50000 < valid * quick_square(avg_dist/100);
				floor_check4 = resid_zy * 50000 < valid * quick_square(avg_dist/100);

				if(floor_check1 && floor_check2 && floor_check3 && floor_check4)
					//Floor or ceiling at a constant height from Kinect
					if(avg3d.z < -800 && avg3d.z > -1600)
						r = 0xFF;
						g = 0xFF;
						b = 0xFF;
						floor_height += avg3d.z;
						r = 0xFF;
						g = 0x00;
						b = 128 + avg3d.z / 12 / 100 * 256;
					//Wall or non-plane
					//r = std::min<int>(std::max<int>(resid_yx*20,0),255);

					wall_check1 = resid_yx * 1000 < valid * quick_square(avg_dist/100);

					if(wall_check1 && !floor_check3 && !floor_check4)
						//Using minimum distance to robot point location for hashing, less likely to be out of range.
						origin_dist = (slopeyx * minostate.x - minostate.y + yint) / sqrt(quick_square(slopeyx)+1);
						orient_yx = fmod((atan(slopeyx) + PI / 2),PI);

						fft_data[(int)(orient_yx / PI * nslope / 2)][(int)(origin_dist/100) + nodist/4]++;
						fft_data[(int)(orient_yx / PI * nslope / 2 + nslope / 2)][(int)(origin_dist/100) + nodist/4]++;

						avg_walls[y/WALL_AVG_SIZE][x/WALL_AVG_SIZE][y % WALL_AVG_SIZE][x % WALL_AVG_SIZE] = Wall(slopeyx, yint);
						valid_walls[y/WALL_AVG_SIZE][x/WALL_AVG_SIZE][y % WALL_AVG_SIZE][x % WALL_AVG_SIZE] = true;
						r = 0;
						g = 255-std::min<int>(std::max<int>(orient_yx / PI * 256,0),255);//std::min<int>(std::max<int>(yint*20+128,0),255);
						b = std::min<int>(std::max<int>(orient_yx / PI * 256,0),255);

						r = g = b = 0x80;
						if(!floor_check1) fail_floor_check1++;
						if(!floor_check2) fail_floor_check2++;
						if(!floor_check3) fail_floor_check3++;
						if(!floor_check4) fail_floor_check4++;

		/*	for(yo = 0; yo < 8; yo++)
				for(xo = 0; xo < 8; xo++)
					frame[y*8+yo][x*8+xo][0] = r;
					frame[y*8+yo][x*8+xo][1] = g;
					frame[y*8+yo][x*8+xo][2] = b;

	//std::cerr << fail_yx_res << " " << fail_floor_check1 << " " << fail_floor_check2 << " " << fail_floor_check3 << " " << fail_floor_check4 << std::endl;

	fftwf_plan fft = fftwf_plan_dft_r2c_2d(nslope, nodist, &(fft_data[0][0]), fft_out, FFTW_ESTIMATE);

	float mag;
	float stddev_x, stddev_y;
	float var_x, var_y;
	float mean_x, mean_y;
	float filter_x, filter_y;
	float coeff_x, coeff_y;

	stddev_x = 2;
	stddev_y = 2;
	mean_x = 0;
	mean_y = nslope / 2;
	var_x = quick_square(x);
	var_y = quick_square(y);
	coeff_x = 1 / (stddev_x * sqrt(2*PI)) / 0.4;
	coeff_y = 1 / (stddev_y * sqrt(2*PI)) / 0.4;

	for(y = 0; y < nslope; y++)
		Y = (nslope / 2 + y) % nslope;
//		filter_y = coeff_y * exp(-1 * quick_square(mean_y - y) / (2*var_y)); 
		for(x = 0; x < nodist_half; x++)
			/*filter_x = fabs(coeff_x * exp(-1 * quick_square(mean_x - x) / (2*var_x))); 
			fft_out[Y*nodist_half+x][0] *= filter_x * filter_y;
			fft_out[Y*nodist_half+x][1] *= filter_x * filter_y;

			if(abs(y - nslope / 2) >= 8 || x >= 8)
				fft_out[Y*nodist_half+x][0] = 0;
				fft_out[Y*nodist_half+x][1] = 0;
				mag = sqrt(quick_square(fft_out[Y*nodist_half+x][0]) + quick_square(fft_out[Y*nodist_half+x][1]));

	/*			frame[y][x][0] = mag / fft_out[0][0]*256;
				frame[y][x][1] = mag / fft_out[0][0]*256;
				frame[y][x][2] = mag / fft_out[0][0]*256;*/

	fft = fftwf_plan_dft_c2r_2d(nslope, nodist, fft_out, &(fft_data[0][0]), FFTW_ESTIMATE);

	float max_mag = 0, maxgrad;
	int maxgradid;

	std::set< Wall > walls;
	std::set< Wall >::iterator it_walls;

	for(y = 0; y < nslope; y++)
		for(x = 0; x < nodist; x++)
			mag = fft_data[y][x];
			if(mag > max_mag)
				max_mag = mag;

	for(y = 0; y < nslope; y++)
		for(x = 0; x < nodist; x++)
			maxgrad = 0;
			maxgradid = 0;
			for(yo = -1; yo <= 1; yo++)
				for(xo = -1; xo <= 1; xo++)
					if(fft_data[y + yo][x + xo] > maxgrad)
						maxgrad = fft_data[y + yo][x + xo];
						maxgradid = yo * 3 + xo;

			mag = std::max<float>(fft_data[y][x],0);

			if(maxgradid != 0)
				frame[y][x][0] = mag / max_mag * 255;
				frame[y][x][1] = mag / max_mag * 255;
				frame[y][x][2] = mag / max_mag * 255;
			}else if(abs(y - nslope/2) <= nslope/4){
				frame[y][x][0] = mag / max_mag * 255;
				frame[y][x][1] = 0;
				frame[y][x][2] = 0;
				if(mag > 125893) //10 ** 5.1
					walls.insert(Wall(fmod((float)y / nslope * 2 * PI,PI) - PI / 2,(float)x - nodist / 4.0));

	for(it_walls = walls.begin(); it_walls != walls.end(); it_walls++)
		std::cerr << " " << it_walls->orient / PI;
		std::cerr << " " << it_walls->yint;
		std::cerr << " " << log10(max_mag);
		std::cerr << std::endl;

	std::cerr << std::endl;


/*	float prev_count = fft_data[255] > 3000 ? fft_data[255] : -1;
	float prev_count_2 = fft_data[254] > 3000 ? fft_data[254] : -1;

	for(y = 0; y < 256; y++)
		if(fft_data[y] > 3000)
			if(prev_count != -1 && prev_count > fft_data[y] && prev_count_2 < prev_count && prev_count_2 != -1)
				std::cerr << (y-128)*(1/81.487330864) << "\t" << fft_data[y] << std::endl;
			prev_count_2 = prev_count;
			prev_count = fft_data[y];
			prev_count = -1;

	std::cerr << std::endl;*/

	return 0;
예제 #13
// store translations into transMap
void storeTrans(ImgFetcher &fetcher, const Point2f &absHint, PairToTransData &transMap, const MaxDists &dists) {
	vector<GridPtOff> imOffs;
	if (fetcher.row_major) {
		imOffs.push_back(makeOff(-1, 0));
		imOffs.push_back(makeOff(-1, -1));
		imOffs.push_back(makeOff(0, -1));
		imOffs.push_back(makeOff(1, -1));
	} else {
		imOffs.push_back(makeOff(0, -1));
		imOffs.push_back(makeOff(-1, -1));
		imOffs.push_back(makeOff(-1, 0));
		imOffs.push_back(makeOff(-1, 1));

	map<PtPair, shared_future<TransData>> pairToTransFut;
	map<GridPt, shared_future<FFTHolder>> ptToFFTFut;

	unsigned loaded = 0;
	GridPt fixPt = {{0, 0}};
	GridPt waitPt = {{0, 0}};
	Mat cur;

	fetcher.getMat(fixPt, cur);
	Size imSz = cur.size();
	unsigned fftLen = getFFTLen(imSz);

	map<GridPtOff, Mat> hintToMask;
	storeHintToMask(hintToMask, imSz, absHint, dists);

	float *tmp = (float *)fftwf_malloc_thr(sizeof(float) * fftLen);
	fftwf_plan r2cPlan = fftwf_plan_dft_r2c_2d(imSz.height, imSz.width, tmp, (fftwf_complex *)tmp, FFTW_MEASURE);
	fftwf_plan c2rPlan = fftwf_plan_dft_c2r_2d(imSz.height, imSz.width, (fftwf_complex *)tmp, tmp, FFTW_MEASURE);

	bool readDone = false;
	while (true) {
		//a dirty kind of event loop
		if (loaded > fetcher.cap || readDone) {
			//			printf("start free waitPt %d %d\n", waitPt[0], waitPt[1]);
			// free oldest image, at waitPt
			for (auto &off: imOffs) {
				// *subtract* offset to avoid duplicating pairs
				GridPt nbrPt = {{waitPt[0] - off[0], waitPt[1] - off[1]}};
				if (ptInGrid(nbrPt, fetcher)) {
					PtPair pair = {{waitPt, nbrPt}};
					shared_future<TransData> transFut;
					if (!lookupPair(pairToTransFut, pair, transFut)) {
						printf("err: future of pair %d %d to %d %d not found\n", pair[0][0], pair[0][1], pair[1][0], pair[1][1]);
					transMap.emplace(pair, transFut.get());

			if (!nextCoor(waitPt, fetcher)) {

		if (!readDone) {
			//printf("emplace fft at %d %d\n", fixPt[0], fixPt[1]);
			fetcher.getMat(fixPt, cur);

			// fft only supports 32-bit float with even width, for now
			assert(cur.type() == CV_32FC1 && (int)cur.step[0] == cur.size().width * 4 && cur.step[1] == 4 && cur.size().width % 2 == 0);

			ptToFFTFut.emplace(fixPt, async(launch::async,
				[&r2cPlan, &absHint](Mat im) {
					return FFTHolder(im, absHint, r2cPlan);

			for (auto &off: imOffs) {
				GridPt nbrPt = {{fixPt[0] + off[0], fixPt[1] + off[1]}};
				if (ptInGrid(nbrPt, fetcher)) {
					PtPair pair = {{fixPt, nbrPt}};
					//					printf("emplace pair transfut %d %d, %d %d\n", pair[0][0], pair[0][1], pair[1][0], pair[1][1]);

					// needed since VS2012 async() can't take functions with too many arguments :(
					shared_future<FFTHolder> &a = ptToFFTFut[fixPt];
					shared_future<FFTHolder> &b = ptToFFTFut[nbrPt];
					pairToTransFut.emplace(pair, async(launch::async, [=] {
						return phaseCorrThr(a, b, c2rPlan, pair, absHint, hintToMask, imSz);

			if (!nextCoor(fixPt, fetcher)) {
				readDone = true;

예제 #14
void MultiAdaptationCSF::process( BidomainArray2D *in, BidomainArray2D *out,
  BidomainArray2D *adaptationMap )
  const int cols = in->getCols(), rows = in->getRows();

  assert( cols == adaptationMap->getCols() );
  assert( rows == adaptationMap->getRows() );
  const FFTWComplexArray *freqOriginal = in->getFrequency(); 
  FFTWComplexArray freqFiltered( cols, rows );
  FFTWArray2D spatialTemp( cols, rows );
  fftwf_plan inverseFFT = fftwf_plan_dft_c2r_2d( rows, cols,
    freqFiltered.getData(), spatialTemp.getData(), FFTW_ESTIMATE ); // MEASURE would damage the data

  //NOT compatible with new Cygwin version of gcc.
  //pfs::Array2DImpl **filteredImage = new (pfs::Array2DImpl*)[adaptationLevelsCount]; 

  // Results of filtering in spatial domain are stored there
  pfs::Array2DImpl **filteredImage = new pfs::Array2DImpl*[adaptationLevelsCount]; 
  for( int i = 0; i < adaptationLevelsCount; i++ ) { // For each adaptation level
    filterFFTW( freqOriginal->getData(), freqFiltered.getData(), cols, rows, filters[i] );
//    dumpPFS( "fft_image.pfs", freqFiltered, cols/2+1, rows, "Y" );


    // Copy to filteredImage and normalize
    filteredImage[i] = new pfs::Array2DImpl( cols, rows );
    for( int pix = 0; pix < cols*rows; pix++ )
      (*filteredImage[i])(pix) = spatialTemp(pix)/(cols*rows);

//     // Some debug info
//     char buf[100];
//     sprintf( buf, "csf_filtered_%g.pfs", adaptationLevels[i] );
//     dumpPFS( buf, filteredImage[i], "Y" );

    std::cerr << ".";

  std::cerr << "\n";

  const pfs::Array2D *adaptationMapArray = adaptationMap->getSpatial();
  pfs::Array2D *outA = out->setSpatial(); // output array
  // Linear intepolation between adaptation levels
    int ind = 0;
    for( int ind = 0; ind < rows*cols; ind++ ) {
        float adapt = (*adaptationMapArray)( ind );
        if( adapt < adaptationLevels[0] )
          (*outA)(ind) = (*filteredImage[0])(ind);
        else if( adapt > adaptationLevels[adaptationLevelsCount-1] )
          (*outA)(ind) = (*filteredImage[adaptationLevelsCount-1])(ind);
        else {            // interpolate
          int l;
          for( l = 1; l < adaptationLevelsCount; l++ )
            if(adapt <= adaptationLevels[l]) break;
          assert( l > 0 && l < adaptationLevelsCount );
          (*outA)(ind) = (*filteredImage[l-1])(ind) +
//   dumpPFS( "after_csf.pfs", in, "Y" );  
  // Clean up
  for( int i = 0; i < adaptationLevelsCount; i++ )
    delete filteredImage[i];      
  delete[] filteredImage;

예제 #15
static gboolean
focusblur_fft_buffer_update_work (FblurFftBuffer *fft,
                                  gint            radius)
  gint row, col;

  row = fft->source.width  + 2 * radius;
  col = fft->source.height + 2 * radius;

  if (fft->work.buffers)
      g_warning ("buffer hadn't been cleared.");
      focusblur_fft_work_free_buffers (fft);

  if (fft->work.image &&
      row == fft->work.row &&
      col == fft->work.col)
      if (radius != fft->work.space)
          fft->work.space = radius;
          fft->work.origin = (fft->work.col_padded + 1) * radius;
          fft->work.level = 0;
      return TRUE;

  focusblur_fft_buffer_clear_work (fft);

  fft->work.row = row;
  fft->work.col = col;
  fft->work.col_padded = (col + 2) & ~1;

  fft->work.nelements = row * fft->work.col_padded;
  fft->work.complex_nelements = fft->work.nelements / 2;
  fft->work.size = sizeof (fftwf_complex) * fft->work.complex_nelements;

  /* 32-bytes pair (4x complex or 8x real) processing */
  fft->work.size += 31;
  fft->work.size &= ~31;

  /* fftwf_malloc() (or distributed package) is broken. */
  fft->work.image  = fftwf_malloc (fft->work.size);
  fft->work.kernel = fftwf_malloc (fft->work.size);
  if (! fft->work.image || ! fft->work.kernel)
      focusblur_fft_buffer_clear_work (fft);
      return FALSE;

  fft->work.plan_r2c = fftwf_plan_dft_r2c_2d
    (row, col, (gfloat *) fft->work.image, fft->work.image, FFTW_ESTIMATE);

  fft->work.plan_c2r = fftwf_plan_dft_c2r_2d
    (row, col, fft->work.image, (gfloat *) fft->work.image, FFTW_ESTIMATE);

  if (! fft->work.plan_r2c || ! fft->work.plan_c2r)
      focusblur_fft_buffer_clear_work (fft);
      return FALSE;

  fft->work.space = radius;
  fft->work.origin = (fft->work.col_padded + 1) * radius;
  fft->work.level = 0;

  return TRUE;
예제 #16
파일: sepvti2d.c 프로젝트: wangh0a/Ateam
int main (int argc, char *argv[])
  bool verb, snap;
  bool abc, adj;
  int nz, nx, nt, ns, nr;
  float dz, dx, dt, oz, ox;
  int nz0, nx0, nb;
  float oz0, ox0;
  int nkz, nkx;
  int nzpad, nxpad;
  float **u1, **u0;
  float *ws, *wr;
  sf_file file_src = NULL, file_rec = NULL;
  sf_file file_inp = NULL, file_out = NULL;
  sf_file file_mdl = NULL;
  sf_axis az = NULL, ax = NULL, at = NULL, as = NULL, ar = NULL;
  pt2d *src2d = NULL;
  pt2d *rec2d = NULL;
  scoef2d cssinc = NULL;
  scoef2d crsinc = NULL;
  float *wi = NULL, *wo = NULL;
  sf_axis ai = NULL, ao = NULL;
  scoef2d cisinc = NULL, cosinc = NULL;
  bool spt = false, rpt = false;
  bool ipt = false, opt = false;
  sf_init(argc, argv);
  if (!sf_getbool("verb", &verb)) verb = false;
  if (!sf_getbool("snap", &snap)) snap = false;
  if (!sf_getbool("adj", &adj)) adj = false;
  if (!sf_getint("nb", &nb)) nb = 4;
  if (sf_getstring("sou") != NULL) { 
    spt = true;
    if (adj) opt = true;
    else     ipt = true;
  if (sf_getstring("rec") != NULL) {
    rpt = true;
    if (adj) ipt = true;
    else     opt = true;
  file_inp = sf_input("in");
  file_mdl = sf_input("model");
  if (spt) file_src = sf_input("sou");
  if (rpt) file_rec = sf_input("rec");
  file_out = sf_output("out");

  if (ipt) at = sf_iaxa(file_inp, 2);
  else     at = sf_iaxa(file_inp, 3);
  if (spt) as = sf_iaxa(file_src, 2);
  if (rpt) ar = sf_iaxa(file_rec, 2);
  az = sf_iaxa(file_mdl, 1);
  ax = sf_iaxa(file_mdl, 2);
  nt = sf_n(at);  dt = sf_d(at);  //ot = sf_o(at);
  nz0 = sf_n(az);  dz = sf_d(az);  oz0 = sf_o(az);
  nx0 = sf_n(ax);  dx = sf_d(ax);  ox0 = sf_o(ax);

  if (spt) ns = sf_n(as);
  if (rpt) nr = sf_n(ar);
  nz = nz0 + 2 * nb;
  nx = nx0 + 2 * nb;
  oz = oz0 - nb * dz;
  ox = ox0 - nb * dx;
  abc = nb ? true : false;
  // sf_error("ox=%f ox0=%f oz=%f oz0=%f",ox,ox0,oz,oz0);
  nzpad = kiss_fft_next_fast_size( ((nz+1)>>1)<<1 );
  nkx = nxpad = kiss_fft_next_fast_size(nx);
  nkz = nzpad / 2 + 1;
  /* float okx = - 0.5f / dx; */
  float okx = 0.f;
  float okz = 0.f;
  float dkx = 1.f / (nxpad * dx);
  float dkz = 1.f / (nzpad * dz);

  float **vp, **eps, **del;
  vp  = sf_floatalloc2(nz, nx);
  eps = sf_floatalloc2(nz, nx);
  del = sf_floatalloc2(nz, nx);
  float **tmparray = sf_floatalloc2(nz0, nx0);
  sf_floatread(tmparray[0], nz0*nx0, file_mdl); expand2d(vp[0], tmparray[0], nz, nx, nz0, nx0);
  sf_floatread(tmparray[0], nz0*nx0, file_mdl); expand2d(eps[0], tmparray[0], nz, nx, nz0, nx0);
  sf_floatread(tmparray[0], nz0*nx0, file_mdl); expand2d(del[0], tmparray[0], nz, nx, nz0, nx0);

  float **vn, **vh;  
  float **eta, **lin_eta;
  lin_eta = NULL, vh = NULL;
  vn = sf_floatalloc2(nz, nx);
  vh = sf_floatalloc2(nz, nx);
  eta = sf_floatalloc2(nz, nx);
  lin_eta = sf_floatalloc2(nz, nx);

  for (int ix=0; ix<nx; ix++) {
    for (int iz=0; iz<nz; iz++){
      vp[ix][iz] *= vp[ix][iz];
      vn[ix][iz] = vp[ix][iz] * (1.f + 2.f * del[ix][iz]);
      vh[ix][iz] = vp[ix][iz] * (1.f + 2.f * eps[ix][iz]);
      eta[ix][iz] = (eps[ix][iz] - del[ix][iz]) / (1.f + 2.f * del[ix][iz]);
      lin_eta[ix][iz] = eta[ix][iz] * (1.f + 2.f * del[ix][iz]);

  float *kx = sf_floatalloc(nkx);
  float *kz = sf_floatalloc(nkz);
  for (int ikx=0; ikx<nkx; ++ikx) {
    kx[ikx] = okx + ikx * dkx;
    /* if (ikx >= nkx/2) kx[ikx] = (nkx - ikx) * dkx; */
    if (ikx >= nkx/2) kx[ikx] = (ikx - nkx) * dkx;
    kx[ikx] *= 2 * SF_PI;
    kx[ikx] *= kx[ikx];
  for (int ikz=0; ikz<nkz; ++ikz) {
    kz[ikz] = okz + ikz * dkz;
    kz[ikz] *= 2 * SF_PI;
    kz[ikz] *= kz[ikz];

  if (adj) {
    ai = ar; ao = as;
  } else {
    ai = as; ao = ar;

  if (opt) {
    sf_oaxa(file_out, ao, 1);
    sf_oaxa(file_out, at, 2);
  } else {
    sf_oaxa(file_out, az, 1);
    sf_oaxa(file_out, ax, 2);
    sf_oaxa(file_out, at, 3);
  sf_fileflush(file_out, NULL);

  if (spt) {
    src2d = pt2dalloc1(ns);
    pt2dread1(file_src, src2d, ns, 2);
    cssinc = sinc2d_make(ns, src2d, nz, nx, dz, dx, oz, ox);
    ws = sf_floatalloc(ns);
    if (adj) { cosinc = cssinc;  wo = ws; }
    else     { cisinc = cssinc;  wi = ws; }
  if (rpt) {
    rec2d = pt2dalloc1(nr);
    pt2dread1(file_rec, rec2d, nr, 2);
    crsinc = sinc2d_make(nr, rec2d, nz, nx, dz, dx, oz, ox);
    wr = sf_floatalloc(nr);
    if (adj) { cisinc = crsinc;  wi = wr; }
    else     { cosinc = crsinc;  wo = wr; }

  u0 = sf_floatalloc2(nz, nx);
  u1 = sf_floatalloc2(nz, nx);
  float *rwave = (float *) fftwf_malloc(nzpad*nxpad*sizeof(float));
  float *rwavem = (float *) fftwf_malloc(nzpad*nxpad*sizeof(float));
  fftwf_complex *cwave = (fftwf_complex *) fftwf_malloc(nkz*nkx*sizeof(fftwf_complex));
  fftwf_complex *cwavem = (fftwf_complex *) fftwf_malloc(nkz*nkx*sizeof(fftwf_complex));
  /* float *rwavem = (float *) fftwf_malloc(nzpad*nxpad*sizeof(float));
  fftwf_complex *cwave = (fftwf_complex *) fftwf_malloc(nkz*nkx*sizeof(fftwf_complex));
  fftwf_complex *cwavem = (fftwf_complex *) fftwf_malloc(nkz*nkx*sizeof(fftwf_complex)); */

  /* boundary conditions */
  float **ucut = NULL;
  float *damp = NULL;
  if (!(ipt &&opt)) ucut = sf_floatalloc2(nz0, nx0);
  damp = damp_make(nb);
  float wt = 1./(nxpad * nzpad);
  wt *= dt * dt;
  fftwf_plan forward_plan;
  fftwf_plan inverse_plan;
#ifdef _OPENMP
  forward_plan = fftwf_plan_dft_r2c_2d(nxpad, nzpad,
              rwave, cwave, FFTW_MEASURE); 
#ifdef _OPENMP
  inverse_plan = fftwf_plan_dft_c2r_2d(nxpad, nzpad,
              cwavem, rwavem, FFTW_MEASURE); 
  int itb, ite, itc;
  if (adj) {
    itb = nt -1; ite = -1; itc = -1;
  } else {
    itb = 0; ite = nt; itc = 1;

  if (adj) {
    for (int it=0; it<nt; it++) {
      if (opt) sf_floatwrite(wo, sf_n(ao), file_out);
      else     sf_floatwrite(ucut[0], nz0*nx0, file_out);
    sf_seek(file_out, 0, SEEK_SET);

  float **ptrtmp = NULL;
  memset(u0[0], 0, sizeof(float)*nz*nx);
  memset(u1[0], 0, sizeof(float)*nz*nx);
  memset(rwave, 0, sizeof(float)*nzpad*nxpad);
  memset(rwavem, 0, sizeof(float)*nzpad*nxpad);
  memset(cwave, 0, sizeof(float)*nkz*nkx*2);
  memset(cwavem, 0, sizeof(float)*nkz*nkx*2);

  for (int it=itb; it!=ite; it+=itc) { if (verb) sf_warning("it = %d;",it);
#ifdef _OPENMP
    double tic = omp_get_wtime();
    if (ipt) {
      if (adj) sf_seek(file_inp, (off_t)(it)*sizeof(float)*sf_n(ai), SEEK_SET);
      sf_floatread(wi, sf_n(ai), file_inp);
      for (int i=0; i<sf_n(ai); i++)
        wi[i] *= dt* dt;
    } else {
      if (adj) sf_seek(file_inp, (off_t)(it)*sizeof(float)*nz0*nx0, SEEK_SET);
      sf_floatread(ucut[0], nz0*nx0, file_inp);
      for (int j=0; j<nx0; j++)
      for (int i=0; i<nz0; i++)
        ucut[j][i] *= dt * dt;

    /* apply absorbing boundary condition: E \times u@n-1 */
    damp2d_apply(u0, damp, nz, nx, nb);
    fft_stepforward(u0, u1, rwave, rwavem, cwave, cwavem,
        vp, vn, eta, vh, eps, lin_eta, kz, kx,
        forward_plan, inverse_plan,
        nz, nx, nzpad, nxpad, nkz, nkx, wt, adj);

    // sinc2d_inject1(u0, ws[it][s_idx], cssinc[s_idx]);
    if (ipt) sinc2d_inject(u0, wi, cisinc);
    else     wfld2d_inject(u0, ucut, nz0, nx0, nb);

    /* apply absorbing boundary condition: E \times u@n+1 */
    damp2d_apply(u0, damp, nz, nx, nb);

    /* loop over pointers */
    ptrtmp = u0;  u0 = u1;  u1 = ptrtmp;
    if (opt) {
      if (adj) sf_seek(file_out, (off_t)(it)*sizeof(float)*sf_n(ao),SEEK_SET);
      sinc2d_extract(u0, wo, cosinc);
      sf_floatwrite(wo, sf_n(ao), file_out);
    } else {
      if (adj) sf_seek(file_out, (off_t)(it)*sizeof(float)*nz0*nx0,SEEK_SET);
      wwin2d(ucut, u0, nz0, nx0, nb);
      sf_floatwrite(ucut[0], nz0*nx0, file_out);

#ifdef _OPENMP
    double toc = omp_get_wtime();
    if (verb) fprintf(stderr," clock = %lf;", toc-tic);
  } /* END OF TIME LOOP */
  return 0;