예제 #1
0
파일: cfft3w.c 프로젝트: 1014511134/src
void icfft3_allocate(sf_complex *inp /* [nk*n2*n3] */)
/*< allocate inverse transform >*/
{
#ifdef SF_HAS_FFTW
    icfg = fftwf_plan_dft_3d(n3,n2,n1,
		     (fftwf_complex *) inp, 
		     (fftwf_complex *) cc[0][0],
		     FFTW_BACKWARD, FFTW_MEASURE);
    if (NULL == icfg) sf_error("FFTW failure.");
#endif
}
예제 #2
0
void ifft3dCPU(T1* d_data, int nx, int ny, int nz)
{
	cout << "Running backward xform 3d" << endl;
	fftwf_plan plan;
	plan = fftwf_plan_dft_3d(nz,
			ny, nx, (fftwf_complex*) d_data,
			(fftwf_complex*) d_data, FFTW_BACKWARD, FFTW_ESTIMATE);

	// Inverse transform 'gridData_d' in place.
	//fftwf_print_plan(plan);
	fftwf_execute(plan);
	fftwf_destroy_plan(plan);
}
예제 #3
0
파일: FFT.cpp 프로젝트: pengwg/fastrecon
void FFT::plan()
{
    m_in.clear();
    m_plan.clear();

    std::cout << "Create CPU FFT plans for " << m_num_threads << " threads" << std::endl;

    // Create plans used by each threads
    std::lock_guard<std::mutex> lock(m_mutex);

    for (unsigned i = 0; i < m_num_threads; i++)
    {
        auto in = (fftwf_complex *)fftwf_malloc(sizeof(fftwf_complex) * m_size.x * m_size.y * m_size.z);
        fftwf_plan plan;
        if (m_dim == 2)
            plan = fftwf_plan_dft_2d(m_size.y, m_size.x, in, in, m_sign, FFTW_ESTIMATE | FFTW_DESTROY_INPUT);
        else if (m_dim ==3)
            plan = fftwf_plan_dft_3d(m_size.z, m_size.y, m_size.x, in, in, m_sign, FFTW_ESTIMATE | FFTW_DESTROY_INPUT);

        m_in.push_back(in);
        m_plan.push_back(plan);
    }
}
예제 #4
0
void decomplowrank3ds(float *ldataxx,float *rdataxx,float *fmidxx,
                      float *ldatayy,float *rdatayy,float *fmidyy,
                      float *ldatazz,float *rdatazz,float *fmidzz,
                      float *ldataxy,float *rdataxy,float *fmidxy,
                      float *ldataxz,float *rdataxz,float *fmidxz,
                      float *ldatayz,float *rdatayz,float *fmidyz,
                      float *px, float *py, float *pz, int *ijkx, int *ijky, int *ijkz,
                      int nx, int ny, int nz, int m, int n,
                      int m2xx, int n2xx, int m2yy, int n2yy, int m2zz, int n2zz,
                      int m2xy, int n2xy, int m2xz, int n2xz, int m2yz, int n2yz)
/*< decomplowrank3ds: S-wave vector decomposition based on low-rank decomposition >*/
{
       int   i, im, im2, jn2, ikx, iky, ikz, nxz;
       float sum1, sum2, *wp;

       nxz=nx*nz;

#ifdef SF_HAS_FFTW  /* using FFTW in Madagascar */
 
       sf_warning("============= using SF_HAS_FFTW ====");

       sf_complex *xin, *xout;
       sf_complex *pxx, *pyy, *pzz; 

       fftwf_plan xp;
       fftwf_plan xpi;

       xin=sf_complexalloc(m);
       xout=sf_complexalloc(n);
       pxx=sf_complexalloc(n);
       pyy=sf_complexalloc(n);
       pzz=sf_complexalloc(n);

       xp=fftwf_plan_dft_3d(ny,nx,nz, (fftwf_complex *) xin, (fftwf_complex *) xout,
			    FFTW_FORWARD,FFTW_ESTIMATE);

       xpi=fftwf_plan_dft_3d(ny,nx,nz,(fftwf_complex *) xin, (fftwf_complex *) xout,
			    FFTW_BACKWARD,FFTW_ESTIMATE);

       /* FFT: from (y,x,z) to (ky, kx, kz) domain */
       for(i=0;i<m;i++){
           xin[i]=sf_cmplx(px[i], 0.);
           px[i] = 0.0;
       }
       fftwf_execute(xp);
       for(i=0;i<n;i++) pxx[i] = xout[i];

       for(i=0;i<m;i++){
           xin[i]=sf_cmplx(py[i], 0.);
           py[i] = 0.0;
       }
       fftwf_execute(xp);
       for(i=0;i<n;i++) pyy[i] = xout[i];

       for(i=0;i<m;i++){
           xin[i]=sf_cmplx(pz[i], 0.);
           pz[i] = 0.0;
       }
       fftwf_execute(xp);
       for(i=0;i<n;i++) pzz[i] = xout[i];

       /* n2 IFFT from (ky, kx, kz) to (y, x, z) domain*/
       wp = sf_floatalloc(m*n2xx);
       for(jn2=0;jn2<n2xx;jn2++)
       {
           i=0;
           int jn2n=jn2*n;
           for(iky=0;iky<ny;iky++)
           {
              int iynxz=ijky[iky]*nxz;
              int ii=jn2n+iynxz;
              for(ikx=0;ikx<nx;ikx++)
              {
                 int ixnz=ijkx[ikx]*nz;
                 int iii=ii+ixnz;
                 for(ikz=0;ikz<nz;ikz++)
                 {
                   xin[i]=rdataxx[iii+ijkz[ikz]]*pxx[i];          
                   i++;
                 }
               }
            }
            /* (kx,kz) to (x, z) domain */
            fftwf_execute(xpi);

            for(im=0;im<m;im++)
                wp[jn2*m+im] = creal(xout[im])/n;
       }

       /* Matrix multiplication in space-domain */ 
       for(im=0;im<m;im++)
       {
         sum1=0.0;
         for(im2=0;im2<m2xx;im2++)
         {
           sum2=0.0;
           for(jn2=0;jn2<n2xx;jn2++)
              sum2 += fmidxx[im2*n2xx+jn2]*wp[jn2*m+im];

           sum1 += ldataxx[im*m2xx+im2]*sum2;
         }/*im2 loop*/
         px[im] = sum1;
       } 
       free(wp);

       /* n2 IFFT from (kx, kz) to (x, z) domain*/
       wp = sf_floatalloc(m*n2xy);
       for(jn2=0;jn2<n2xy;jn2++)
       {
           i=0;
           int jn2n=jn2*n;
           for(iky=0;iky<ny;iky++)
           {
              int iynxz=ijky[iky]*nxz;
              int ii=jn2n+iynxz;
              for(ikx=0;ikx<nx;ikx++)
              {
                 int ixnz=ijkx[ikx]*nz;
                 int iii=ii+ixnz;
                 for(ikz=0;ikz<nz;ikz++)
                 {
                   xin[i]=rdataxy[iii+ijkz[ikz]]*pyy[i];          
                   i++;
                 }
               }
            }
            /* (kx,kz) to (x, z) domain */
            fftwf_execute(xpi);

            for(im=0;im<m;im++)
                wp[jn2*m+im] = creal(xout[im])/n;
       }

       /* Matrix multiplication in space-domain */ 
       for(im=0;im<m;im++)
       {
         sum1=0.0;
         for(im2=0;im2<m2xy;im2++)
         {
           sum2=0.0;
           for(jn2=0;jn2<n2xy;jn2++)
              sum2 += fmidxy[im2*n2xy+jn2]*wp[jn2*m+im];

           sum1 += ldataxy[im*m2xy+im2]*sum2;
         }/*im2 loop*/
         px[im] -= sum1;  /* Bxy of qS-wave is negative of Bxy of qP-wave*/
       } 
       free(wp);

       /* n2 IFFT from (kx, kz) to (x, z) domain*/
       wp = sf_floatalloc(m*n2xz);
       for(jn2=0;jn2<n2xz;jn2++)
       {
           i=0;
           int jn2n=jn2*n;
           for(iky=0;iky<ny;iky++)
           {
              int iynxz=ijky[iky]*nxz;
              int ii=jn2n+iynxz;
              for(ikx=0;ikx<nx;ikx++)
              {
                 int ixnz=ijkx[ikx]*nz;
                 int iii=ii+ixnz;
                 for(ikz=0;ikz<nz;ikz++)
                 {
                   xin[i]=rdataxz[iii+ijkz[ikz]]*pzz[i];          
                   i++;
                 }
               }
            }
            /* (kx,kz) to (x, z) domain */
            fftwf_execute(xpi);

            for(im=0;im<m;im++)
                wp[jn2*m+im] = creal(xout[im])/n;
       }

       /* Matrix multiplication in space-domain */ 
       for(im=0;im<m;im++)
       {
         sum1=0.0;
         for(im2=0;im2<m2xz;im2++)
         {
           sum2=0.0;
           for(jn2=0;jn2<n2xz;jn2++)
              sum2 += fmidxz[im2*n2xz+jn2]*wp[jn2*m+im];

           sum1 += ldataxz[im*m2xz+im2]*sum2;
         }/*im2 loop*/
         px[im] -= sum1;
       } 
       free(wp);

       /* n2 IFFT from (kx, kz) to (x, z) domain*/
       wp = sf_floatalloc(m*n2yy);
       for(jn2=0;jn2<n2yy;jn2++)
       {
           i=0;
           int jn2n=jn2*n;
           for(iky=0;iky<ny;iky++)
           {
              int iynxz=ijky[iky]*nxz;
              int ii=jn2n+iynxz;
              for(ikx=0;ikx<nx;ikx++)
              {
                 int ixnz=ijkx[ikx]*nz;
                 int iii=ii+ixnz;
                 for(ikz=0;ikz<nz;ikz++)
                 {
                   xin[i]=rdatayy[iii+ijkz[ikz]]*pyy[i];          
                   i++;
                 }
               }
            }
            /* (kx,kz) to (x, z) domain */
            fftwf_execute(xpi);

            for(im=0;im<m;im++)
                wp[jn2*m+im] = creal(xout[im])/n;
       }

       /* Matrix multiplication in space-domain */ 
       for(im=0;im<m;im++)
       {
         sum1=0.0;
         for(im2=0;im2<m2yy;im2++)
         {
           sum2=0.0;
           for(jn2=0;jn2<n2yy;jn2++)
              sum2 += fmidyy[im2*n2yy+jn2]*wp[jn2*m+im];

           sum1 += ldatayy[im*m2yy+im2]*sum2;
         }/*im2 loop*/
         py[im] = sum1;
       } 
       free(wp);

       /* n2 IFFT from (kx, kz) to (x, z) domain*/
       wp = sf_floatalloc(m*n2yz);
       for(jn2=0;jn2<n2yz;jn2++)
       {
           i=0;
           int jn2n=jn2*n;
           for(iky=0;iky<ny;iky++)
           {
              int iynxz=ijky[iky]*nxz;
              int ii=jn2n+iynxz;
              for(ikx=0;ikx<nx;ikx++)
              {
                 int ixnz=ijkx[ikx]*nz;
                 int iii=ii+ixnz;
                 for(ikz=0;ikz<nz;ikz++)
                 {
                   xin[i]=rdatayz[iii+ijkz[ikz]]*pzz[i];          
                   i++;
                 }
               }
            }
            /* (kx,kz) to (x, z) domain */
            fftwf_execute(xpi);

            for(im=0;im<m;im++)
                wp[jn2*m+im] = creal(xout[im])/n;
       }

       /* Matrix multiplication in space-domain */ 
       for(im=0;im<m;im++)
       {
         sum1=0.0;
         for(im2=0;im2<m2yz;im2++)
         {
           sum2=0.0;
           for(jn2=0;jn2<n2yz;jn2++)
              sum2 += fmidyz[im2*n2yz+jn2]*wp[jn2*m+im];

           sum1 += ldatayz[im*m2yz+im2]*sum2;
         }/*im2 loop*/
         py[im] -= sum1;
       } 
       free(wp);

       /* n2 IFFT from (kx, kz) to (x, z) domain*/
       wp = sf_floatalloc(m*n2xy);
       for(jn2=0;jn2<n2xy;jn2++)
       {
           i=0;
           int jn2n=jn2*n;
           for(iky=0;iky<ny;iky++)
           {
              int iynxz=ijky[iky]*nxz;
              int ii=jn2n+iynxz;
              for(ikx=0;ikx<nx;ikx++)
              {
                 int ixnz=ijkx[ikx]*nz;
                 int iii=ii+ixnz;
                 for(ikz=0;ikz<nz;ikz++)
                 {
                   xin[i]=rdataxy[iii+ijkz[ikz]]*pxx[i];          
                   i++;
                 }
               }
            }
            /* (kx,kz) to (x, z) domain */
            fftwf_execute(xpi);

            for(im=0;im<m;im++)
                wp[jn2*m+im] = creal(xout[im])/n;
       }

       /* Matrix multiplication in space-domain */ 
       for(im=0;im<m;im++)
       {
         sum1=0.0;
         for(im2=0;im2<m2xy;im2++)
         {
           sum2=0.0;
           for(jn2=0;jn2<n2xy;jn2++)
              sum2 += fmidxy[im2*n2xy+jn2]*wp[jn2*m+im];

           sum1 += ldataxy[im*m2xy+im2]*sum2;
         }/*im2 loop*/
         py[im] -= sum1;
       } 
       free(wp);

       /* n2 IFFT from (kx, kz) to (x, z) domain*/
       wp = sf_floatalloc(m*n2zz);
       for(jn2=0;jn2<n2zz;jn2++)
       {
           i=0;
           int jn2n=jn2*n;
           for(iky=0;iky<ny;iky++)
           {
              int iynxz=ijky[iky]*nxz;
              int ii=jn2n+iynxz;
              for(ikx=0;ikx<nx;ikx++)
              {
                 int ixnz=ijkx[ikx]*nz;
                 int iii=ii+ixnz;
                 for(ikz=0;ikz<nz;ikz++)
                 {
                   xin[i]=rdatazz[iii+ijkz[ikz]]*pzz[i];          
                   i++;
                 }
               }
            }
            /* (kx,kz) to (x, z) domain */
            fftwf_execute(xpi);

            for(im=0;im<m;im++)
                wp[jn2*m+im] = creal(xout[im])/n;
       }

       /* Matrix multiplication in space-domain */ 
       for(im=0;im<m;im++)
       {
         sum1=0.0;
         for(im2=0;im2<m2zz;im2++)
         {
           sum2=0.0;
           for(jn2=0;jn2<n2zz;jn2++)
              sum2 += fmidzz[im2*n2zz+jn2]*wp[jn2*m+im];

           sum1 += ldatazz[im*m2zz+im2]*sum2;
         }/*im2 loop*/
         pz[im] = sum1;
       } 
       free(wp);

       /* n2 IFFT from (kx, kz) to (x, z) domain*/
       wp = sf_floatalloc(m*n2yz);
       for(jn2=0;jn2<n2yz;jn2++)
       {
           i=0;
           int jn2n=jn2*n;
           for(iky=0;iky<ny;iky++)
           {
              int iynxz=ijky[iky]*nxz;
              int ii=jn2n+iynxz;
              for(ikx=0;ikx<nx;ikx++)
              {
                 int ixnz=ijkx[ikx]*nz;
                 int iii=ii+ixnz;
                 for(ikz=0;ikz<nz;ikz++)
                 {
                   xin[i]=rdatayz[iii+ijkz[ikz]]*pyy[i];          
                   i++;
                 }
               }
            }
            /* (kx,kz) to (x, z) domain */
            fftwf_execute(xpi);

            for(im=0;im<m;im++)
                wp[jn2*m+im] = creal(xout[im])/n;
       }

       /* Matrix multiplication in space-domain */ 
       for(im=0;im<m;im++)
       {
         sum1=0.0;
         for(im2=0;im2<m2yz;im2++)
         {
           sum2=0.0;
           for(jn2=0;jn2<n2yz;jn2++)
              sum2 += fmidyz[im2*n2yz+jn2]*wp[jn2*m+im];

           sum1 += ldatayz[im*m2yz+im2]*sum2;
         }/*im2 loop*/
         pz[im] -= sum1;
       } 
       free(wp);

       /* n2 IFFT from (kx, kz) to (x, z) domain*/
       wp = sf_floatalloc(m*n2xz);
       for(jn2=0;jn2<n2xz;jn2++)
       {
           i=0;
           int jn2n=jn2*n;
           for(iky=0;iky<ny;iky++)
           {
              int iynxz=ijky[iky]*nxz;
              int ii=jn2n+iynxz;
              for(ikx=0;ikx<nx;ikx++)
              {
                 int ixnz=ijkx[ikx]*nz;
                 int iii=ii+ixnz;
                 for(ikz=0;ikz<nz;ikz++)
                 {
                   xin[i]=rdataxz[iii+ijkz[ikz]]*pxx[i];          
                   i++;
                 }
               }
            }
            /* (kx,kz) to (x, z) domain */
            fftwf_execute(xpi);

            for(im=0;im<m;im++)
                wp[jn2*m+im] = creal(xout[im])/n;
       }

       /* Matrix multiplication in space-domain */ 
       for(im=0;im<m;im++)
       {
         sum1=0.0;
         for(im2=0;im2<m2xz;im2++)
         {
           sum2=0.0;
           for(jn2=0;jn2<n2xz;jn2++)
              sum2 += fmidxz[im2*n2xz+jn2]*wp[jn2*m+im];

           sum1 += ldataxz[im*m2xz+im2]*sum2;
         }/*im2 loop*/
         pz[im] -= sum1;
       } 
       free(wp);

       fftwf_destroy_plan(xp);
       fftwf_destroy_plan(xpi);

       free(pxx);
       free(pyy);
       free(pzz);
       free(xin);
       free(xout);
#else  /* using FFTW in user's own computer */
       sf_warning("============= using user installed FFTW ====");
#endif
}
int main(int argc, char **argv)
{
  size_t size;
  fftwf_complex *data;
  fftwf_plan plan;

  if(argc >= 2)
  {
    size = atoi(argv[1]);
    if (size <= 0)
    {
      fprintf(stderr, "ERROR, matrix size <= 0 !\n");
      return EXIT_FAILURE;
    }
  }
  else
  {
    fprintf(stderr, "ERROR, pass matrix size as 1st parameter !\n");
    return EXIT_FAILURE;
  }

  const size_t N = size * size * size;

  data = (fftwf_complex*)_mm_malloc(sizeof(fftw_complex) * N, 64);
  if (data == NULL)
  {
    fprintf(stderr, "ERROR, _mm_malloc() !\n");
    return EXIT_FAILURE;
  }

  PapiCounterList papi_routines;
  papi_routines.AddRoutine("fftw");

  // NUMA First touch
  #pragma omp parallel for
  for (size_t i = 0; i < N; ++i)
    data[i][0] = data[i][1] = 1.0;


  fprintf(stdout, "** FFTW 3D OMP **\n");
  fprintf(stdout, "* OMP_NUM_THREADS: %d\n", omp_get_max_threads());
  fprintf(stdout, "* Size of Matrix: %dx%dx%d\n", (int)size, (int)size, (int)size);

  // fftw threads plan
  fftwf_plan_with_nthreads(omp_get_max_threads());
  // fftw compute plan
  plan = fftwf_plan_dft_3d(size, size, size,
                                      data, data,
                                      FFTW_FORWARD, FFTW_MEASURE);
  papi_routines["fftw"].Start();
  // compute results
  const double tstart = omp_get_wtime();
  fftwf_execute(plan);
  const double tend = omp_get_wtime();
  papi_routines["fftw"].Stop();

  printf("* Wall time: %fs\n\n", tend - tstart);
  papi_routines.PrintScreen();
  // free memory
  _mm_free(data);
  fftwf_destroy_plan(plan);

  return EXIT_SUCCESS;
}
예제 #6
0
파일: fft3.c 프로젝트: 1014511134/src
void fft3(float *inp      /* [n1*n2*n3] */, 
	  sf_complex *out /* [nk*n2*n3] */)
/*< 3-D FFT >*/
{
    int i1, i2, i3;
    float f;

  #ifdef SF_HAS_FFTW
    if (NULL==cfg) {
	cfg = cmplx? 
	    fftwf_plan_dft_3d(n3,n2,n1,
			      (fftwf_complex *) cc[0][0], 
			      (fftwf_complex *) out,
			      FFTW_FORWARD, FFTW_MEASURE):
	    fftwf_plan_dft_r2c_3d(n3,n2,n1,
				  ff[0][0], (fftwf_complex *) out,
				  FFTW_MEASURE);
	if (NULL == cfg) sf_error("FFTW failure.");
    }
#endif  
    
    /* FFT centering */    
    for (i3=0; i3<n3; i3++) {
	for (i2=0; i2<n2; i2++) {
	    for (i1=0; i1<n1; i1++) {
		f = inp[(i3*n2+i2)*n1+i1];
		if (cmplx) {
		    cc[i3][i2][i1] = sf_cmplx((((i3%2==0)==(i2%2==0))==(i1%2==0))? f:-f,0.);
		} else {
		    ff[i3][i2][i1] = ((i3%2==0)==(i2%2==0))? f:-f;
		}
	    }
	}
    }

#ifdef SF_HAS_FFTW
    fftwf_execute(cfg);
#else

    /* FFT over first axis */
    for (i3=0; i3 < n3; i3++) {
	for (i2=0; i2 < n2; i2++) {
	    if (cmplx) {
		kiss_fft_stride(cfg1,(kiss_fft_cpx *) cc[i3][i2],tmp[i3][i2],1);
	    } else {
		kiss_fftr (cfg,ff[i3][i2],tmp[i3][i2]);
	    }
	}
    }

    /* FFT over second axis */
    for (i3=0; i3 < n3; i3++) {
	for (i1=0; i1 < nk; i1++) {
	    kiss_fft_stride(cfg2,tmp[i3][0]+i1,ctrace2,nk);
	    for (i2=0; i2 < n2; i2++) {
		tmp[i3][i2][i1]=ctrace2[i2];
	    }
	}
    }

    /* FFT over third axis */
    for (i2=0; i2 < n2; i2++) {
	for (i1=0; i1 < nk; i1++) {
	    kiss_fft_stride(cfg3,tmp[0][0]+i2*nk+i1,ctrace3,nk*n2);
	    for (i3=0; i3<n3; i3++) {
		out[(i3*n2+i2)*nk+i1] = trace3[i3];
	    }
	}
    } 
   
#endif

}
예제 #7
0
void CLSimulator::initializeFFTW()
{
    _distances_split = (fftwf_complex *)fftwf_malloc(_nFFT * sizeof(fftwf_complex));
    _sVals_split = (fftwf_complex *)fftwf_malloc(_nFFT * sizeof(fftwf_complex));
    _convolution_split = (fftwf_complex *)fftwf_malloc(_nFFT * sizeof(fftwf_complex));
    _distances_f_split = (fftwf_complex *)fftwf_malloc(_nFFT * sizeof(fftwf_complex));
    _sVals_f_split = (fftwf_complex *)fftwf_malloc(_nFFT * sizeof(fftwf_complex));
    _convolution_f_split = (fftwf_complex *)fftwf_malloc(_nFFT * sizeof(fftwf_complex));

    assert(_nX >= 1 && _nY >= 1 && _nZ >= 1);
    assert((_nX >= _nY) && (_nY >= _nZ));

    if (_nY == 1)
    {
        _p_distances_fftw = fftwf_plan_dft_1d(_nFFT, _distances_split, _distances_f_split, FFTW_FORWARD, FFTW_ESTIMATE);
        _p_sVals_fftw = fftwf_plan_dft_1d(_nFFT, _sVals_split, _sVals_f_split, FFTW_FORWARD, FFTW_ESTIMATE);
        _p_inv_fftw = fftwf_plan_dft_1d(_nFFT, _convolution_f_split, _convolution_split, FFTW_BACKWARD, FFTW_ESTIMATE);
    } else if (_nZ == 1)
    {
        _p_distances_fftw = fftwf_plan_dft_2d(_nFFTx, _nFFTy, _distances_split, _distances_f_split, FFTW_FORWARD, FFTW_ESTIMATE);
        _p_sVals_fftw = fftwf_plan_dft_2d(_nFFTx, _nFFTy, _sVals_split, _sVals_f_split, FFTW_FORWARD, FFTW_ESTIMATE);
        _p_inv_fftw = fftwf_plan_dft_2d(_nFFTx, _nFFTy, _convolution_f_split, _convolution_split, FFTW_BACKWARD, FFTW_ESTIMATE);
    } else
    {
        _p_distances_fftw = fftwf_plan_dft_3d(_nFFTx, _nFFTy, _nFFTz, _distances_split, _distances_f_split, FFTW_FORWARD, FFTW_ESTIMATE);
        _p_sVals_fftw = fftwf_plan_dft_3d(_nFFTx, _nFFTy, _nFFTz, _sVals_split, _sVals_f_split, FFTW_FORWARD, FFTW_ESTIMATE);
        _p_inv_fftw = fftwf_plan_dft_3d(_nFFTx, _nFFTy, _nFFTz, _convolution_f_split, _convolution_split, FFTW_BACKWARD, FFTW_ESTIMATE);
    }

    for (size_t i = 0; i < _nFFT; ++i)
    {
        _sVals_split[i][0] = 0;
        _sVals_split[i][1] = 0;
    }

    for (size_t x_idx = 0, x_val = _nX - 1; x_idx < _nX; ++x_idx, --x_val) {
        for (size_t y_idx = 0, y_val = _nY - 1; y_idx < _nY; ++y_idx, --y_val) {
            float distance = sqrt(pow(float(x_val), 2.0f) + pow(float(y_val), 2.0f));
            _distances_split[x_idx + y_idx * _nFFTx][0] = _f_w_EE((float(distance)));
            _distances_split[x_idx + y_idx * _nFFTx][1] = 0;
        }
    }

    for (size_t x_idx = 0, x_val = _nX - 1; x_idx < _nX; ++x_idx, --x_val) {
        for (size_t y_idx = _nY, y_val = 1; y_idx < _nFFTy - 1; ++y_idx, ++y_val) {
            float distance = sqrt(pow(float(x_val), 2.0f) + pow(float(y_val), 2.0f));
            _distances_split[x_idx + y_idx * _nFFTx][0] = _f_w_EE((float(distance)));
            _distances_split[x_idx + y_idx * _nFFTx][1] = 0;
        }
    }

    if (_nY > 1)
    {
        for (size_t x_idx = 0; x_idx < _nFFTx; ++x_idx) {
            _distances_split[x_idx + (_nFFTy - 1) * _nFFTx][0] = 0;
            _distances_split[x_idx + (_nFFTy - 1) * _nFFTx][1] = 0;
        }
    }

    for (size_t x_idx = _nX, x_val = 1; x_idx < _nFFTx - 1; ++x_idx, ++x_val) {
        for (size_t y_idx = 0, y_val = _nY - 1; y_idx < _nY; ++y_idx, --y_val) {
            float distance = sqrt(pow(float(x_val), 2.0f) + pow(float(y_val), 2.0f));
            _distances_split[x_idx + y_idx * _nFFTx][0] = _f_w_EE((float(distance)));
            _distances_split[x_idx + y_idx * _nFFTx][1] = 0;
        }
    }

    for (size_t y_idx = 0; y_idx < _nFFTy; ++y_idx) {
        _distances_split[(_nFFTx - 1) + y_idx * _nFFTx][0] = 0;
        _distances_split[(_nFFTx - 1) + y_idx * _nFFTx][1] = 0;
    }

    for (size_t x_idx = _nX, x_val = 1; x_idx < _nFFTx - 1; ++x_idx, ++x_val) {
        for (size_t y_idx = _nY, y_val = 1; y_idx < _nFFTy - 1; ++y_idx, ++y_val) {
            float distance = sqrt(pow(float(x_val), 2.0f) + pow(float(y_val), 2.0f));
            _distances_split[x_idx + y_idx * _nFFTx][0] = _f_w_EE((float(distance)));
            _distances_split[x_idx + y_idx * _nFFTx][1] = 0;
        }
    }

    fftwf_execute(_p_distances_fftw);
}
예제 #8
0
파일: seplowrank.c 프로젝트: 1014511134/src
void seplowrank3d(float *ldata,float *rdata,float *fmid, float *p, int *ijkx, int *ijky, int *ijkz,
                      int nx, int ny, int nz, int m, int n, int m2, int n2, int iflag)
/*< seplowrank3d: wave-mode separation based on low-rank decomposition >*/
{
       int i, im, im2, jn2, ikx, iky, ikz, nxz;
       float sum1, sum2, *wp;

       wp = sf_floatalloc(m*n2);

       nxz=nx*nz;

#ifdef SF_HAS_FFTW  /* using FFTW in Madagascar */
 
       sf_complex *xx, *xin, *xout;

       fftwf_plan xp;
       fftwf_plan xpi;

       xin=sf_complexalloc(m);
       xout=sf_complexalloc(n);
       xx=sf_complexalloc(n);

       xp=fftwf_plan_dft_3d(ny,nx,nz, (fftwf_complex *) xin, (fftwf_complex *) xout,
			    FFTW_FORWARD,FFTW_ESTIMATE);

       xpi=fftwf_plan_dft_3d(ny,nx,nz,(fftwf_complex *) xin, (fftwf_complex *) xout,
			    FFTW_BACKWARD,FFTW_ESTIMATE);

       /* FFT: from (x,z) to (kx, kz) domain */

       if(iflag==1)
           for(i=0;i<m;i++) xin[i]=sf_cmplx(p[i], 0.);
       else 
           for(i=0;i<m;i++) xin[i]=sf_cmplx(0.0, p[i]);

       fftwf_execute(xp);
           
       for(i=0;i<n;i++) xx[i] = xout[i];

       /* n2 IFFT from (kx, kz) to (x, z) domain */
       for(jn2=0;jn2<n2;jn2++)
       {
           i=0;
           int jn2n=jn2*n;
           for(iky=0;iky<ny;iky++)
           {
              int iynxz=ijky[iky]*nxz;
              int ii=jn2n+iynxz;
              for(ikx=0;ikx<nx;ikx++)
              {
                 int ixnz=ijkx[ikx]*nz;
                 int iii=ii+ixnz;
                 for(ikz=0;ikz<nz;ikz++)
                 {
                   xin[i]=rdata[iii+ijkz[ikz]]*xx[i];
                   i++;
                 }
             }
        }

       /* (kx,kz) to (x, z) domain */
       fftwf_execute(xpi);

       for(im=0;im<m;im++)
           wp[jn2*m+im] = creal(xout[im])/n;
    }

   fftwf_destroy_plan(xp);
   fftwf_destroy_plan(xpi);

   free(xx);
   free(xin);
   free(xout);

   /* Matrix multiplication in space-domain */
   for(im=0;im<m;im++)
   {
         sum1=0.0;
         for(im2=0;im2<m2;im2++)
         {
           sum2=0.0;
           for(jn2=0;jn2<n2;jn2++)
              sum2 += fmid[im2*n2+jn2]*wp[jn2*m+im];

           sum1 += ldata[im*m2+im2]*sum2;
        }/*im2 loop*/
        p[im] = sum1;
  } 

#endif

  free(wp);
}
예제 #9
0
파일: sepdivcurl.c 프로젝트: 1014511134/src
void sepdiv3dD(double *rk, float *x, int *ijkx, int *ijky, int *ijkz, int nx,int ny,int nz,int m,int n, int iflag)
/*< sepdiv3d: separating wave-modes based on divergence >*/
{
       int i, im, jm, ikx, iky, ikz, nxz;

	   nxz=nx*nz;
#ifdef SF_HAS_FFTW  // using FFTW in Madagascar
 
       sf_complex *xin, *xout;

       fftwf_plan xp;
       fftwf_plan xpi;

       xin=sf_complexalloc(m);
       xout=sf_complexalloc(n);

       xp=fftwf_plan_dft_3d(ny,nx,nz, (fftwf_complex *) xin, (fftwf_complex *) xout,
			    FFTW_FORWARD,FFTW_ESTIMATE);

       xpi=fftwf_plan_dft_3d(ny,nx,nz,(fftwf_complex *) xin, (fftwf_complex *) xout,
			    FFTW_BACKWARD,FFTW_ESTIMATE);

       /* FFT: from (y,x,z) to (ky, kx, kz) domain */

       if(iflag==1)
           for(i=0;i<m;i++) xin[i]=sf_cmplx(x[i], 0.);
       else 
           for(i=0;i<m;i++) xin[i]=sf_cmplx(0.0, x[i]);

       fftwf_execute(xp);
           
       /* IFFT from (ky, kx, kz) to (y, x, z) domain*/
       // Note: Spectrum of the operator is differently orderred as the spectrum after FFT
	   i=0;
       for(iky=0;iky<ny;iky++)
       {
          int iyxnz=ijky[iky]*nxz;
          for(ikx=0;ikx<nx;ikx++)
          {
             int ixnz=iyxnz+ijkx[ikx]*nz;
             for(ikz=0;ikz<nz;ikz++)
             {
                xin[i]=(float)rk[ixnz+ijkz[ikz]]*xout[i];          
                i++;
             }
		  }
	   }
       // (kx,kz) to (x, z) domain
       fftwf_execute(xpi);
       for(im=0;im<m;im++)
          x[im] = creal(xout[im])/n;

       fftwf_destroy_plan(xp);
       fftwf_destroy_plan(xpi);
       free(xin);
       free(xout);

#else  // using FFTW in user's own computer
       //sf_warning("============= using user installed FFTW ====");

       fftw_complex *xin, *xout;

       fftw_plan xp;
       fftw_plan xpi;

       xin=fftw_complexalloc(m);
       xout=fftw_complexalloc(n);

       xp=fftw_plan_dft_2d(nx,nz, (fftw_complex *) xin, (fftw_complex *) xout,
			    FFTW_FORWARD,FFTW_ESTIMATE);
       xpi=fftw_plan_dft_2d(nx,nz,(fftw_complex *) xin, (fftw_complex *) xout,
			    FFTW_BACKWARD,FFTW_ESTIMATE);

       /* FFT: from (x,z) to (kx, kz) domain */
       for(i=0;i<m;i++)
       {
          xin[i][0]=x[i];
          xin[i][1]=0.;
       }

       fftw_execute(xp);
           
       if(iflag!=1) for(i=0;i<n;i++) xout[i] *= sf_cmplx(0.0, 1.0);

       /* Note: Spectrum of the operator is differently orderred as the spectrum after FFT */ 
       /* IFFT from (ky, kx, kz) to (y, x, z) domain*/
	   i=0;
       for(iky=0;iky<ny;iky++)
       {
          int iyxnz=ijky[iky]*nxz;
          for(ikx=0;ikx<nx;ikx++)
          {
              int ixnz=iyxnz+ijkx[ikx]*nz;
              for(ikz=0;ikz<nz;ikz++)
              {
                xin[i]=(float)rk[ixnz+ijkz[ikz]]*xout[i];          
                i++;
              }
		  }
	   }
       /* (kx,kz) to (x, z) domain */
       fftw_execute(xpi);

       for(im=0;im<m;im++)
          x[im] = xout[im][0]/n;
      
       fftw_destroy_plan(xp);
       fftw_destroy_plan(xpi);
       free(xin);
       free(xout);

#endif
}