Esempio n. 1
0
static void test_cov(){/*not good */
    rand_t rstat;
    int seed=4;
    double r0=0.2;
    double dx=1./64;
    long N=1+1024;
    long nx=N;
    long ny=N;
    long nframe=1;
    seed_rand(&rstat, seed);
    map_t *atm=mapnew(nx, ny, dx,dx, NULL);
    cmat *atmhat=cnew((N+1)*3,(N+1)*3);
    dmat *atmhattot=dnew((N+1)*3,(N+1)*3);
    //cfft2plan(atmhat,-1);
    //cfft2plan(atmhat, 1);
    dset((dmat*)atm,1);
    cembedd(atmhat, (dmat*)atm, 0);
    cfft2(atmhat, -1);
    cabs22d(&atmhattot, 1, atmhat, 1);
    ccpd(&atmhat, atmhattot);
    cfft2i(atmhat, 1);
    cfftshift(atmhat);
    dmat *denom=dnew((N+1)*3,(N+1)*3);
    dmat *cov=dnew((N+1)*3,(N+1)*3);
    creal2d(&denom, 0, atmhat, 1);
    writebin(denom, "denom.bin");
    
    dzero(atmhattot);
    for(long i=0; i<nframe; i++){
	info("%ld of %ld\n", i, nframe);
	
	for(long j=0; j<nx*ny; j++){
	    atm->p[j]=randn(&rstat);
	}
	fractal_do((dmat*)atm, dx, r0,L0,ninit);
	/*mapwrite(atm, "atm_%ld.bin", i); */
	cembedd(atmhat, (dmat*)atm, 0);
	cfft2(atmhat, -1);
	cabs22d(&atmhattot, 1, atmhat, 1);

	if(i==0 || (i+1)%10==0){
	    dscale(atmhattot, 1./(i+1));
	    ccpd(&atmhat, atmhattot);
	    writebin(atmhattot, "atm_psf_%ld.bin",i+1);
	    cfft2i(atmhat, 1);
	    cfftshift(atmhat);
	    creal2d(&cov, 0, atmhat, 1);
	    for(long k=0; k<cov->nx*cov->ny; k++){
		cov->p[k]/=denom->p[k];
	    }
	    writebin(cov, "atm_cov_%ld.bin",i+1);
	}
    }
}
Esempio n. 2
0
/**
   Convert PSD into time series.*/
dmat* psd2time(const dmat *psdin, rand_t *rstat, double dt, int nstepin){
    if(!psdin){
	error("psdin cannot be null\n");
    }
    long nstep=nextpow2(nstepin);
    double df=1./(dt*nstep);
    dmat *fs=dlinspace(0, df, nstep);
    dmat *psd=NULL;
    if(psdin->ny==1){//[alpha, beta, fmin, fmax] discribes power law with cut on/off freq.
	psd=dnew(nstep, 1);
	double alpha=psdin->p[0];
	double beta=psdin->p[1];
	long i0=1, imax=nstep;
	if(psdin->nx>2){
	    i0=(long)round(psdin->p[2]/df);
	    if(i0<1) i0=1;
	}
	if(psdin->nx>3){
	    imax=(long)round(psdin->p[3]/df);
	}
	info("fmin=%g, fmax=%g, df=%g, i0=%ld, imax=%ld\n", 
	     psdin->p[2], psdin->p[3], df, i0, imax);
	for(long i=i0; i<imax; i++){
	    psd->p[i]=beta*pow(i*df, alpha);
	}
    }else if(psdin->ny==2){
	if(psdin->nx<2){ 
	    error("Invalid PSD\n");
	}
	psd=dinterp1(psdin, 0, fs, 1e-40);
	psd->p[0]=0;/*disable pistion. */
    }else{
	error("psdin is invalid format.\n");
    }
    cmat *wshat=cnew(nstep, 1);
    //cfft2plan(wshat, -1);
    for(long i=0; i<nstep; i++){
	wshat->p[i]=sqrt(psd->p[i]*df)*COMPLEX(randn(rstat), randn(rstat));
    }
    cfft2(wshat, -1);
    dmat *out=NULL;
    creal2d(&out, 0, wshat, 1);
    cfree(wshat);
    dfree(psd);
    dfree(fs);
    dresize(out, nstepin, 1);
    return out;
}
Esempio n. 3
0
/*
static int test_fft_speed_small(){
    int nis=128;
    int *is=mycalloc(nis,int);
    dmat *tim=dnew(nis,1);
    for(int ii=0; ii<nis; ii++){
	is[ii]=ii+1;
    }
    ccell *ac=cellnew(nis,1);
    rand_t stat;
    seed_rand(&stat,1);
    for(int ii=0; ii<nis; ii++){
	ac->p[ii]=cnew(is[ii],is[ii]);
	//cfft2plan(ac->p[ii],-1);
	crandn(ac->p[ii],20,&stat);
    }
    TIC;
    for(int ii=0; ii<nis; ii++){
	info2("size %4d: ",is[ii]);
	tic;
	for(int i=0; i<1000; i++){
	    cfft2(ac->p[ii],-1);
	}
	toc2("fft");
	tim->p[ii]=toc3;
    }
    writebin(tim,"fft_timing");
}

static void test_fft_speed(){
    int nis=2048;
    int *is=mycalloc(nis,int);
    dmat *tim=dnew(nis,1);
    for(int ii=0; ii<nis; ii++){
	is[ii]=(ii+1)*2;
    }
    ccell *ac=cellnew(nis,1);
    rand_t stat;
    seed_rand(&stat,1);
    TIC;
    for(int ii=0; ii<nis; ii++){
	info2("size %4d: ",is[ii]);
	tic;
	ac->p[ii]=cnew(is[ii],is[ii]);
	//cfft2plan(ac->p[ii],-1);
	crandn(ac->p[ii],20,&stat);
	toc("plan");
    }
    toc("plan");
    for(int ii=0; ii<nis; ii++){
	info2("size %4d: ",is[ii]);
	tic;
	int nrepeat;
	if(is[ii]<300)
	    nrepeat=100;
	else if(is[ii]<1000)
	    nrepeat=10;
	else
	    nrepeat=1;

	for(int i=0; i<nrepeat; i++){
	    cfft2(ac->p[ii],-1);
	}
	toc2("fft");
	tim->p[ii]=toc3/nrepeat;
    }
    writebin(tim,"fft_timing");
    }*/
static void test_fft_special(){
    int nis=2;
    int *is=mycalloc(nis,int);
    dmat *tim=dnew(nis,1);
    is[0]=3824;
    is[1]=4096;
    ccell *ac=ccellnew(nis,1);
    rand_t rstat;
    seed_rand(&rstat,1);
    TIC;
    for(int ii=0; ii<nis; ii++){
	info2("size %4d: ",is[ii]);
	tic;
	ac->p[ii]=cnew(is[ii],is[ii]);
	//cfft2plan(ac->p[ii],-1);
	//cfft2partialplan(ac->p[ii],512,-1);
	crandn(ac->p[ii],20,&rstat);
	toc("plan");
    }

    for(int ii=0; ii<nis; ii++){
	info2("size %4d: ",is[ii]);
	tic;
	int nrepeat;
	if(is[ii]<300)
	    nrepeat=100;
	else if(is[ii]<1000)
	    nrepeat=10;
	else
	    nrepeat=4;

	for(int i=0; i<nrepeat; i++){
	    cfft2(ac->p[ii],-1);
	}
	toc2("fft");
	for(int i=0; i<nrepeat; i++){
	    cfft2partial(ac->p[ii],512,-1);
	}
	toc2("fft2partial");
	tim->p[ii]=toc3/nrepeat;
    }
    writebin(tim,"fft_timing");

}
Esempio n. 4
0
dmat *psd1d(const dmat *v, /**<[in] The data sequence*/
	    long nseg      /**<[in] Number of overlapping segments*/
    ){
    long nx;
    long ncol;
    if(v->nx==1){
	nx=v->ny;
	ncol=1;
    }else{
	nx=v->nx;
	ncol=v->ny;
    }
    if(nseg<=1) nseg=1;
    const int lseg2=nx/(nseg+1);
    const int lseg=lseg2*2;
    dmat *psd=dnew(lseg2+1, ncol);
    cmat *hat=cnew(lseg, 1);
    //cfft2plan(hat, -1);
    for(long icol=0; icol<ncol; icol++){
	double *ppsd=psd->p+icol*(lseg2+1);
	for(int iseg=0; iseg<nseg; iseg++){
	    double* p=v->p+icol*nx+iseg*lseg2;
	    for(int ix=0; ix<lseg; ix++){
		hat->p[ix]=p[ix]*W_J(ix, lseg2);
	    }
	    cfft2(hat, -1);
	    ppsd[0]+=cabs2(hat->p[0]);
	    for(int ix=1; ix<lseg2; ix++){
		ppsd[ix]+=cabs2(hat->p[ix])+cabs2(hat->p[lseg-ix]);
	    }
	    ppsd[lseg2]+=cabs2(hat->p[lseg2]);
	}
    }
    double sumwt=0;
    for(int ix=0; ix<lseg; ix++){
	sumwt+=pow(W_J(ix, lseg2), 2);
    }
    sumwt*=lseg*nseg;
    dscale(psd, 1./sumwt);
    cfree(hat);
    return psd;
}
Esempio n. 5
0
/*
  Compute cxx on atm to compare against L2, invpsd, fractal.
*/
static void test_cxx(){
    rand_t rstat;
    int seed=4;
    double r0=0.2;
    double dx=1./4;
    long N=16;
    long nx=N;
    long ny=N;
    long nframe=40960;
    seed_rand(&rstat, seed);
    {
	dmat *cxx=dnew(N*N,N*N);
	map_t *atm=mapnew(nx+1, ny+1, dx, dx,NULL);
	for(long i=0; i<nframe; i++){
	    info("%ld of %ld\n", i, nframe);
	    for(long j=0; j<(nx+1)*(ny+1); j++){
		atm->p[j]=randn(&rstat);
	    }
	    fractal_do((dmat*)atm, dx, r0, L0, ninit);
	    dmat *sec=dsub((dmat*)atm, 0, nx, 0, ny);
	    dmat *atmvec=dref_reshape(sec, nx*ny, 1);
	    dmm(&cxx,1, atmvec,atmvec,"nt",1);
	    dfree(atmvec);
	    dfree(sec);
	}
	dscale(cxx, 1./nframe);
	writebin(cxx, "cxx_fractal");
	dfree(cxx);
	mapfree(atm);
    }
    {
	dmat *cxx=dnew(N*N,N*N);
	dmat *spect=turbpsd(nx, ny, dx, r0, 100, 0, 0.5);
	spect->p[0]=spect->p[1];
	cmat *atm=cnew(nx, ny);
	//cfft2plan(atm, -1);
	dmat *atmr=dnew(nx*ny,1);
	dmat *atmi=dnew(nx*ny,1);
	for(long ii=0; ii<nframe; ii+=2){
	    info("%ld of %ld\n", ii, nframe);
	    for(long i=0; i<atm->nx*atm->ny; i++){
		atm->p[i]=COMPLEX(randn(&rstat), randn(&rstat))*spect->p[i];
	    }
	    cfft2(atm, -1);
	    for(long i=0; i<atm->nx*atm->ny; i++){
		atmr->p[i]=creal(atm->p[i]);
		atmi->p[i]=cimag(atm->p[i]);
	    }
	    dmm(&cxx,1, atmr,atmr,"nt",1);
	    dmm(&cxx,1, atmi,atmi,"nt",1);
	}
	dscale(cxx, 1./nframe);
	writebin(cxx, "cxx_fft");
	dfree(cxx);
	dfree(atmr);
	dfree(atmi);
	cfree(atm);
    }
    loc_t *loc=mksqloc_auto(16,16,1./4,1./4);
    locwrite(loc,"loc");
    dmat *B=stfun_kolmogorov(loc, r0);
    writebin(B, "B_theory");
}
Esempio n. 6
0
static void test_psd(){
    rand_t rstat;
    int seed=4;
    double r0=0.2;
    double dx=1./64;
    long N=1024;
    long nx=N;
    long ny=N;
    long ratio=1;
    long xskip=nx*(ratio-1)/2;
    long yskip=ny*(ratio-1)/2;
    long nframe=512;
    seed_rand(&rstat, seed);
    if(1){
	map_t *atm=mapnew(nx+1, ny+1, dx,dx, NULL);
	cmat *hat=cnew(nx*ratio, ny*ratio);
	//cfft2plan(hat, -1);
	dmat *hattot=dnew(nx*ratio, ny*ratio);

	for(long i=0; i<nframe; i++){
	    info2("%ld of %ld\n", i, nframe);
	    for(long j=0; j<(nx+1)*(ny+1); j++){
		atm->p[j]=randn(&rstat);
	    }
	    fractal_do((dmat*)atm, dx, r0,L0,ninit);
	    czero(hat);
	    for(long iy=0; iy<ny; iy++){
		for(long ix=0; ix<nx; ix++){
		    IND(hat,ix+xskip,iy+yskip)=IND(atm,ix,iy);
		}
	    }
	    cfftshift(hat);
	    cfft2i(hat, -1);
	    cabs22d(&hattot, 1, hat, 1);
	}
	dscale(hattot, 1./nframe);
	dfftshift(hattot);
	writebin(hattot, "PSD_fractal");
    }
    {
	dmat *spect=turbpsd(nx, ny, dx, r0, 100, 0, 0.5);
	writebin(spect, "spect");
	cmat *hat=cnew(nx*ratio, ny*ratio);
	//cfft2plan(hat, -1);
	dmat *hattot=dnew(nx*ratio, ny*ratio);
	cmat *atm=cnew(nx, ny);
	//cfft2plan(atm, -1);
	dmat *atmr=dnew(atm->nx, atm->ny);
	dmat *atmi=dnew(atm->nx, atm->ny);
	cmat*  phat=hat;
	dmat*  patmr=atmr;
	dmat*  patmi=atmi;
	for(long ii=0; ii<nframe; ii+=2){
	    info2("%ld of %ld\n", ii, nframe);
	    for(long i=0; i<atm->nx*atm->ny; i++){
		atm->p[i]=COMPLEX(randn(&rstat), randn(&rstat))*spect->p[i];
	    }
	    cfft2(atm, -1);
	    for(long i=0; i<atm->nx*atm->ny; i++){
		atmr->p[i]=creal(atm->p[i]);
		atmi->p[i]=cimag(atm->p[i]);
	    }
	    czero(hat);
	    for(long iy=0; iy<ny; iy++){
		for(long ix=0; ix<nx; ix++){
		    IND(phat,ix+xskip,iy+yskip)=IND(patmr,ix,iy);
		}
	    }
	    cfftshift(hat);
	    cfft2i(hat, -1);
	    cabs22d(&hattot, 1, hat, 1);
	    czero(hat);
	    for(long iy=0; iy<ny; iy++){
		for(long ix=0; ix<nx; ix++){
		    IND(phat,ix+xskip,iy+yskip)=IND(patmi,ix,iy);
		}
	    }
	    cfftshift(hat);
	    cfft2i(hat, -1);
	    cabs22d(&hattot, 1, hat, 1);
	}
	dscale(hattot, 1./nframe);
	dfftshift(hattot);
	writebin(hattot, "PSD_fft");
    }
}
Esempio n. 7
0
int main(int argc, char* argv[])
{
  bool verb,sub,os;
  int it,iz,im,ikx,ikz,ix,i,j;     /* index variables */
  int nt,nz,nx, m2, nk, nzx, nz2, nx2, nzx2, n2, pad1,nth;
  sf_complex c,old;

  /* I/O arrays*/
  sf_complex *ww,*curr,*prev,*cwave,*cwavem,**wave,**lt, **rt;
  sf_complex *snap;
  float *rr;
    
  sf_file Fw,Fr,Fo;    /* I/O files */
  sf_axis at,az,ax;    /* cube axes */
  sf_file left, right;

  /*MPI related*/
  int cpuid,numprocs;
  int provided;
  int n_local, o_local;
  int ozx2;
  sf_complex *sendbuf, *recvbuf;
  int *rcounts, *displs;

  //MPI_Init(&argc,&argv);
  MPI_Init_thread(&argc,&argv,MPI_THREAD_FUNNELED,&provided);
  threads_ok = provided >= MPI_THREAD_FUNNELED;

  sf_init(argc,argv);

  MPI_Comm_rank(MPI_COMM_WORLD, &cpuid);
  MPI_Comm_size(MPI_COMM_WORLD, &numprocs);

  if(!sf_getbool("verb",&verb)) verb=false; /* verbosity */
  if(!sf_getbool("os",&os)) os=true; /* one-step flag */
  if (os) {
    sf_warning("One-step wave extrapolation");
    if(!sf_getbool("sub",&sub)) sub=false; /* subtraction flag */
  } else {
    sf_warning("Two-step wave extrapolation");
    if(!sf_getbool("sub",&sub)) sub=true; /* subtraction flag */
  }

  /* setup I/O files */
  Fw = sf_input ("--input" );
  Fo = sf_output("--output");
  Fr = sf_input ("ref");

  if (SF_COMPLEX != sf_gettype(Fw)) sf_error("Need complex input");
  if (SF_FLOAT != sf_gettype(Fr)) sf_error("Need float ref");

  sf_settype(Fo,SF_COMPLEX);

  /* Read/Write axes */
  at = sf_iaxa(Fw,1); nt = sf_n(at); 
  az = sf_iaxa(Fr,1); nz = sf_n(az); 
  ax = sf_iaxa(Fr,2); nx = sf_n(ax); 

  if (cpuid==0) {
    sf_oaxa(Fo,az,1); 
    sf_oaxa(Fo,ax,2); 
    //sf_setn(at,1);
    sf_oaxa(Fo,at,3);
  }
    
  if (!sf_getint("pad1",&pad1)) pad1=1; /* padding factor on the first axis */

#pragma omp parallel
  {
    nth = omp_get_num_threads();
  }
  if (verb) sf_warning(">>>> Using %d threads <<<<<", nth);

  nk = cfft2_init(pad1,nz,nx,&nz2,&nx2,&n_local,&o_local);
  sf_warning("Cpuid=%d,n0=%d,n1=%d,local_n0=%d,local_0_start=%d",cpuid,nz2,nx2,n_local,o_local);

  nzx = nz*nx;
  //  nzx2 = nz2*nx2;
  nzx2 = n_local*nz2;
  ozx2 = o_local*nz2;

  /* propagator matrices */
  left = sf_input("left");
  right = sf_input("right");

  if (!sf_histint(left,"n1",&n2) || n2 != nzx) sf_error("Need n1=%d in left",nzx);
  if (!sf_histint(left,"n2",&m2))  sf_error("Need n2= in left");
    
  if (!sf_histint(right,"n1",&n2) || n2 != m2) sf_error("Need n1=%d in right",m2);
  if (!sf_histint(right,"n2",&n2) || n2 != nk) sf_error("Need n2=%d in right",nk);
  
  lt = sf_complexalloc2(nzx,m2);
  rt = sf_complexalloc2(m2,nk);

  sf_complexread(lt[0],nzx*m2,left);
  sf_complexread(rt[0],m2*nk,right);

  sf_fileclose(left);
  sf_fileclose(right);

  /* read wavelet & reflectivity */
  ww=sf_complexalloc(nt);
  sf_complexread(ww,nt ,Fw);

  rr=sf_floatalloc(nzx); 
  sf_floatread(rr,nzx,Fr);

  curr   = sf_complexalloc(nzx2);
  if (!os) prev = sf_complexalloc(nzx2);
  else prev = NULL;

  cwave  = sf_complexalloc(nzx2);
  cwavem = sf_complexalloc(nzx2);
  wave   = sf_complexalloc2(nzx2,m2);

  for (iz=0; iz < nzx2; iz++) {
    curr[iz] = sf_cmplx(0.,0.);
    if (!os) prev[iz] = sf_cmplx(0.,0.);
  }

  sendbuf = curr;
  if (cpuid==0) {
    snap = sf_complexalloc(nz2*nx2);
    recvbuf = snap;
    rcounts = sf_intalloc(numprocs);
    displs  = sf_intalloc(numprocs);
  } else {
    snap = NULL;
    recvbuf = NULL;
    rcounts = NULL;
    displs = NULL;
  }

  MPI_Gather(&nzx2, 1, MPI_INT, rcounts, 1, MPI_INT, 0, MPI_COMM_WORLD);
  MPI_Gather(&ozx2, 1, MPI_INT, displs, 1, MPI_INT, 0, MPI_COMM_WORLD);

  /* MAIN LOOP */
  for (it=0; it<nt; it++) {
    if(verb) sf_warning("it=%d;",it);

    /* matrix multiplication */
    cfft2(curr,cwave);

    for (im = 0; im < m2; im++) {
      //for (ik = 0; ik < nk; ik++) {
      for (ikx = 0; ikx < n_local; ikx++) {
        for (ikz = 0; ikz < nz2; ikz++) {
          i = ikz + (o_local+ikx)*nz2;
          j = ikz + ikx*nz2;
#ifdef SF_HAS_COMPLEX_H
          cwavem[j] = cwave[j]*rt[i][im];
#else
          cwavem[j] = sf_cmul(cwave[j],rt[i][im]);
#endif
        }
      }
      icfft2(wave[im],cwavem);
    }

    for (ix = 0; ix < n_local && (ix+o_local)<nx ; ix++) {
      for (iz=0; iz < nz; iz++) {
        i = iz + (o_local+ix)*nz;  /* original grid */
        j = iz + ix*nz2; /* padded grid */
#ifdef SF_HAS_COMPLEX_H
        c = ww[it] * rr[i]; // source term
#else
        c = sf_crmul(ww[it], rr[i]); // source term
#endif
        if (sub) c += curr[j];
        if (!os) {
          old = curr[j];
#ifdef SF_HAS_COMPLEX_H
          c += sub? (old-prev[j]) : -prev[j];
#else
          c = sf_cadd(c,sub? sf_csub(old,prev[j]) : sf_cneg(prev[j]));
#endif
          prev[j] = old;
        }
        for (im = 0; im < m2; im++) {
#ifdef SF_HAS_COMPLEX_H
          c += lt[im][i]*wave[im][j];
#else
          c += sf_cmul(lt[im][i], wave[im][j]);
#endif
        }

        curr[j] = c;
      }
    }

    /* write wavefield to output */
    MPI_Gatherv(sendbuf, nzx2, MPI_COMPLEX, recvbuf, rcounts, displs, MPI_COMPLEX, 0, MPI_COMM_WORLD);

    if (cpuid==0) {
      for (ix = 0; ix < nx; ix++)
        sf_complexwrite(snap+ix*nz2,nz,Fo);
    }

  }

  if(verb) sf_warning("."); 
  cfft2_finalize();

  MPI_Finalize();
  exit (0);
}
Esempio n. 8
0
int main()
{
/* 
   SSE version of cfft2 - uses INTEL intrinsics
       W. Petersen, SAM. Math. ETHZ 2 May, 2002 
*/
   int first,i,icase,it,n;
   float seed,error,fnm1,sign,z0,z1,ggl();
   float t1,ln2,mflops;
   void cffti(),cfft2();

   first = 1;
   seed  = 331.0;
   for(icase=0;icase<2;icase++){
   if(first){
      for(i=0;i<2*N;i+=2){
         z0 = ggl(&seed);     /* real part of array */
         z1 = ggl(&seed);     /* imaginary part of array */
         x[i] = z0;
         z[i] = z0;           /* copy of initial real data */
         x[i+1] = z1;
         z[i+1] = z1;         /* copy of initial imag. data */
      }
   } else {
      for(i=0;i<2*N;i+=2){
         z0 = 0;              /* real part of array */
         z1 = 0;              /* imaginary part of array */
         x[i] = z0;
         z[i] = z0;           /* copy of initial real data */
         x[i+1] = z1;
         z[i+1] = z1;         /* copy of initial imag. data */
      }
   }
/* initialize sine/cosine tables */
   n = N;
   cffti(n,w);
/* transform forward, back */
   if(first){
      sign = 1.0;
      cfft2(n,x,y,w,sign);
      sign = -1.0;
      cfft2(n,y,x,w,sign);
/* results should be same as initial multiplied by N */
      fnm1 = 1.0/((float) n);
      error = 0.0;
      for(i=0;i<2*N;i+=2){
         error += (z[i] - fnm1*x[i])*(z[i] - fnm1*x[i]) +
                  (z[i+1] - fnm1*x[i+1])*(z[i+1] - fnm1*x[i+1]);
      }
      error = sqrt(fnm1*error);
      printf(" for n=%d, fwd/bck error=%e\n",N,error);
      first = 0;
   } else {
      unsigned j = 0;
      for(it=0;it<20000;it++){
         sign = +1.0;
         cfft2(n,x,y,w,sign);
         sign = -1.0;
         cfft2(n,y,x,w,sign);
      }
      printf(" for n=%d\n",n);
      for (i = 0; i<N; ++i) {
        printf("%g  ", w[i]);
        j++;
        if (j == 4) {
          printf("\n");
          j = 0;
        }
      }
   }
   }
   return 0;
}
Esempio n. 9
0
int propnewc(sf_complex **ini, sf_complex **lt, sf_complex **rt, int nz, int nx, int nt, int m2, int nkzx, char *mode, int pad1, int snap, sf_complex **cc, sf_complex ***wvfld, bool verb, bool correct, sf_complex *alpha, sf_complex *beta)
/*^*/
{
    /* index variables */
    int it,iz,ix,im,ik,i,j,wfit;
    int nz2,nx2,nk,nzx2;
    sf_complex c;
    /* wavefield */
    sf_complex **wave,**wave2, *curr, *currm, *cwave, *cwavem, *curr1, *curr2;

    nk = cfft2_init(pad1,nz,nx,&nz2,&nx2);
    nzx2 = nz2*nx2;

    if (nk!=nkzx) sf_error("nk discrepancy!");

    curr = sf_complexalloc(nzx2);
    if (correct) {
	curr1 = sf_complexalloc(nzx2);
	curr2 = sf_complexalloc(nzx2);
    }
    currm  = sf_complexalloc(nzx2);
    
    cwave  = sf_complexalloc(nk);
    cwavem = sf_complexalloc(nk);
    wave   = sf_complexalloc2(nk,m2);
    wave2  = sf_complexalloc2(nzx2,m2);

    icfft2_allocate(cwavem);

    /* initialization */
    for (ix = 0; ix < nx2; ix++) {
	for (iz=0; iz < nz2; iz++) {
	    j = iz+ix*nz2;
	    if (ix<nx && iz<nz)
		curr[j] = ini[ix][iz];
	    else 
		curr[j] = sf_cmplx(0.,0.);
	}
    }
    wfit = 0;

    /* MAIN LOOP */
    for (it=0; it<nt; it++) {
	if(verb) sf_warning("it=%d;",it);
	
	/* outout wavefield */
	if(snap>0) {
	    if(it%snap==0 && wfit<=(int)(nt-1)/snap) {
		for (ix=0; ix<nx; ix++)
		    for (iz=0; iz<nz; iz++)
			wvfld[wfit][ix][iz] = curr[iz+ix*nz2];
		wfit++;
	    }
	}

	if (mode[0]=='m') {

	    /* matrix multiplication */
	    for (im = 0; im < m2; im++) {
		for (ix = 0; ix < nx; ix++) {
		    for (iz=0; iz < nz; iz++) {
			i = iz+ix*nz;  /* original grid */
			j = iz+ix*nz2; /* padded grid */
#ifdef SF_HAS_COMPLEX_H
			currm[j] = lt[im][i]*curr[j];
#else
			currm[j] = sf_cmul(lt[im][i], curr[j]);
#endif
		    }
		}
		cfft2(currm,wave[im]);
	    }
	    
	    for (ik = 0; ik < nk; ik++) {
		c = sf_cmplx(0.,0.);
		for (im = 0; im < m2; im++) {
#ifdef SF_HAS_COMPLEX_H
		    c += wave[im][ik]*rt[ik][im];
#else
		    c += sf_cmul(wave[im][ik],rt[ik][im]); //complex multiplies complex
#endif
		}
		cwave[ik] = c;
	    }

	    /* matrix multiplication */
	    for (im = 0; im < m2; im++) {
		for (ik = 0; ik < nk; ik++) {
#ifdef SF_HAS_COMPLEX_H
		    cwavem[ik] = cwave[ik]*rt[ik][im];
#else
		    cwavem[ik] = sf_cmul(cwave[ik],rt[ik][im]); //complex multiplies complex
#endif
		}
		icfft2(wave2[im],cwavem);
	    }
	    
	    for (ix = 0; ix < nx; ix++) {
		for (iz=0; iz < nz; iz++) {
		    i = iz+ix*nz;  /* original grid */
		    j = iz+ix*nz2; /* padded grid */
		    c = sf_cmplx(0.,0.);
		    for (im = 0; im < m2; im++) {
#ifdef SF_HAS_COMPLEX_H
			c += lt[im][i]*wave2[im][j];
#else
			c += sf_cmul(lt[im][i], wave2[im][j]);
#endif
		    }
		    curr[j] = c;
		}
	    }

	    if (correct) {
		for (ix = 0; ix < nx2; ix++) {
		    for (iz=0; iz < nz2; iz++) {
			i = iz+ix*nz;  /* original grid */
			j = iz+ix*nz2; /* padded grid */
			if (ix<nx && iz<nz) {
#ifdef SF_HAS_COMPLEX_H
			    currm[j] = curr[j]/alpha[i];
#else
			    currm[j] = sf_cdiv(curr[j],alpha[i]);
#endif
			} else {
			    currm[j] = sf_cmplx(0.,0.);
			}
		    }
		}
		cfft2(currm,cwave);
		
		for (ik = 0; ik < nk; ik++) {
#ifdef SF_HAS_COMPLEX_H
		    cwavem[ik] = cwave[ik]/beta[ik];
#else
		    cwavem[ik] = sf_cdiv(cwave[ik],beta[ik]);
#endif
		}
		icfft2(curr1,cwavem);
		
		for (ix = nx; ix < nx2; ix++) {
		    for (iz=nz; iz < nz2; iz++) {
			j = iz+ix*nz2; /* padded grid */	
			curr1[j] = sf_cmplx(0.,0.);
		    }
		}

		/**/
		cfft2(curr,cwave);
		
		for (ik = 0; ik < nk; ik++) {
#ifdef SF_HAS_COMPLEX_H
		    cwavem[ik] = cwave[ik]/conjf(beta[ik]);
#else
		    cwavem[ik] = sf_cdiv(cwave[ik],conjf(beta[ik]));
#endif
		}
		icfft2(curr,cwavem);
		
		for (ix = 0; ix < nx2; ix++) {
		    for (iz=0; iz < nz2; iz++) {
			i = iz+ix*nz;  /* original grid */
			j = iz+ix*nz2; /* padded grid */
			if (ix<nx && iz<nz) {
#ifdef SF_HAS_COMPLEX_H
			    curr2[j] = curr[j]/conjf(alpha[i]);
#else
			    curr2[j] = sf_cdiv(curr[j],conjf(alpha[i]));
#endif
			} else {
			    curr2[j] = sf_cmplx(0.,0.);
			}
		    }
		}

		for (ix = 0; ix < nx2; ix++) {
		    for (iz=0; iz < nz2; iz++) {
			j = iz+ix*nz2; /* padded grid */
#ifdef SF_HAS_COMPLEX_H
			curr[j] = (curr1[j] + curr2[j])/2.;
#else
			curr[j] = sf_crmul(curr1[j]+curr2[j],0.5);
#endif
		    }
		}
	    }
	    
	} else if (mode[0]=='x') {
	 
	    cfft2(curr,cwave);

	    /* matrix multiplication */
	    for (im = 0; im < m2; im++) {
		for (ik = 0; ik < nk; ik++) {
#ifdef SF_HAS_COMPLEX_H
		    cwavem[ik] = cwave[ik]*rt[ik][im];
#else
		    cwavem[ik] = sf_cmul(cwave[ik],rt[ik][im]); //complex multiplies complex
#endif
		}
		icfft2(wave2[im],cwavem);
	    }
	    
	    for (ix = 0; ix < nx; ix++) {
		for (iz=0; iz < nz; iz++) {
		    i = iz+ix*nz;  /* original grid */
		    j = iz+ix*nz2; /* padded grid */
		    c = sf_cmplx(0.,0.);
		    for (im = 0; im < m2; im++) {
#ifdef SF_HAS_COMPLEX_H
			c += lt[im][i]*wave2[im][j];
#else
			c += sf_cmul(lt[im][i], wave2[im][j]);
#endif
		    }
		    curr[j] = c;
		}
	    }

	    /* matrix multiplication */
	    for (im = 0; im < m2; im++) {
		for (ix = 0; ix < nx; ix++) {
		    for (iz=0; iz < nz; iz++) {
			i = iz+ix*nz;  /* original grid */
			j = iz+ix*nz2; /* padded grid */
#ifdef SF_HAS_COMPLEX_H
			currm[j] = lt[im][i]*curr[j];
#else
			currm[j] = sf_cmul(lt[im][i], curr[j]);
#endif
		    }
		}
		cfft2(currm,wave[im]);
	    }
	    
	    for (ik = 0; ik < nk; ik++) {
		c = sf_cmplx(0.,0.);
		for (im = 0; im < m2; im++) {
#ifdef SF_HAS_COMPLEX_H
		    c += wave[im][ik]*rt[ik][im];
#else
		    c += sf_cmul(wave[im][ik],rt[ik][im]); //complex multiplies complex
#endif
		}
		cwavem[ik] = c;
	    }
	    
	    icfft2(curr,cwavem);

	    if (correct) {
		for (ix = 0; ix < nx2; ix++) {
		    for (iz=0; iz < nz2; iz++) {
			i = iz+ix*nz;  /* original grid */
			j = iz+ix*nz2; /* padded grid */
			if (ix<nx && iz<nz) {
#ifdef SF_HAS_COMPLEX_H
			    currm[j] = curr[j]/alpha[i];
#else
			    currm[j] = sf_cdiv(curr[j],alpha[i]);
#endif
			} else {
			    currm[j] = sf_cmplx(0.,0.);
			}
		    }
		}
		cfft2(currm,cwave);
		
		for (ik = 0; ik < nk; ik++) {
#ifdef SF_HAS_COMPLEX_H
		    cwavem[ik] = cwave[ik]/beta[ik];
#else
		    cwavem[ik] = sf_cdiv(cwave[ik],beta[ik]);
#endif
		}
		icfft2(curr,cwavem);
		
		for (ix = nx; ix < nx2; ix++) {
		    for (iz=nz; iz < nz2; iz++) {
			j = iz+ix*nz2; /* padded grid */	
			curr[j] = sf_cmplx(0.,0.);
		    }
		}
	    }
	    
	} else if (mode[0]=='n') {

	    /* matrix multiplication */
	    for (im = 0; im < m2; im++) {
		for (ix = 0; ix < nx; ix++) {
		    for (iz=0; iz < nz; iz++) {
			i = iz+ix*nz;  /* original grid */
			j = iz+ix*nz2; /* padded grid */
#ifdef SF_HAS_COMPLEX_H
			currm[j] = lt[im][i]*curr[j];
#else
			currm[j] = sf_cmul(lt[im][i], curr[j]);
#endif
		    }
		}
		cfft2(currm,wave[im]);
	    }
	    
	    for (ik = 0; ik < nk; ik++) {
		c = sf_cmplx(0.,0.);
		for (im = 0; im < m2; im++) {
#ifdef SF_HAS_COMPLEX_H
		    c += wave[im][ik]*rt[ik][im];
#else
		    c += sf_cmul(wave[im][ik],rt[ik][im]); //complex multiplies complex
#endif
		}
		cwavem[ik] = c;
	    }
	    icfft2(curr,cwavem);

	    /* matrix multiplication */
	    for (im = 0; im < m2; im++) {
		for (ix = 0; ix < nx; ix++) {
		    for (iz=0; iz < nz; iz++) {
			i = iz+ix*nz;  /* original grid */
			j = iz+ix*nz2; /* padded grid */
#ifdef SF_HAS_COMPLEX_H
			currm[j] = lt[im][i]*curr[j];
#else
			currm[j] = sf_cmul(lt[im][i], curr[j]);
#endif
		    }
		}
		cfft2(currm,wave[im]);
	    }
	    
	    for (ik = 0; ik < nk; ik++) {
		c = sf_cmplx(0.,0.);
		for (im = 0; im < m2; im++) {
#ifdef SF_HAS_COMPLEX_H
		    c += wave[im][ik]*rt[ik][im];
#else
		    c += sf_cmul(wave[im][ik],rt[ik][im]); //complex multiplies complex
#endif
		}
		cwavem[ik] = c;
	    }
	    icfft2(curr,cwavem);
	    
	} else if (mode[0]=='p') {

	    cfft2(curr,cwave);

	    /* matrix multiplication */
	    for (im = 0; im < m2; im++) {
		for (ik = 0; ik < nk; ik++) {
#ifdef SF_HAS_COMPLEX_H
		    cwavem[ik] = cwave[ik]*rt[ik][im];
#else
		    cwavem[ik] = sf_cmul(cwave[ik],rt[ik][im]); //complex multiplies complex
#endif
		}
		icfft2(wave2[im],cwavem);
	    }
	    
	    for (ix = 0; ix < nx; ix++) {
		for (iz=0; iz < nz; iz++) {
		    i = iz+ix*nz;  /* original grid */
		    j = iz+ix*nz2; /* padded grid */
		    c = sf_cmplx(0.,0.);
		    for (im = 0; im < m2; im++) {
#ifdef SF_HAS_COMPLEX_H
			c += lt[im][i]*wave2[im][j];
#else
			c += sf_cmul(lt[im][i], wave2[im][j]);
#endif
		    }
		    curr[j] = c;
		}
	    }

	    cfft2(curr,cwave);

	    /* matrix multiplication */
	    for (im = 0; im < m2; im++) {
		for (ik = 0; ik < nk; ik++) {
#ifdef SF_HAS_COMPLEX_H
		    cwavem[ik] = cwave[ik]*rt[ik][im];
#else
		    cwavem[ik] = sf_cmul(cwave[ik],rt[ik][im]); //complex multiplies complex
#endif
		}
		icfft2(wave2[im],cwavem);
	    }
	    
	    for (ix = 0; ix < nx; ix++) {
		for (iz=0; iz < nz; iz++) {
		    i = iz+ix*nz;  /* original grid */
		    j = iz+ix*nz2; /* padded grid */
		    c = sf_cmplx(0.,0.);
		    for (im = 0; im < m2; im++) {
#ifdef SF_HAS_COMPLEX_H
			c += lt[im][i]*wave2[im][j];
#else
			c += sf_cmul(lt[im][i], wave2[im][j]);
#endif
		    }
		    curr[j] = c;
		}
	    }

	} else sf_error("Check mode parameter!");

    } /* time stepping */
    if(verb) sf_warning("."); 
    /* output final result*/
    for (ix=0; ix<nx; ix++)
	for (iz=0; iz<nz; iz++)
	    cc[ix][iz] = curr[iz+ix*nz2];
    
    cfft2_finalize();
    return 0;
}
Esempio n. 10
0
int lrosfor2(sf_complex ***wavfld, float **sill, sf_complex **rcd, bool verb,
	     sf_complex **lt, sf_complex **rt, int m2,
	     geopar geop, sf_complex *ww, float *rr, int pad1)
/*< low-rank one-step forward modeling >*/
{
    int it,iz,im,ik,ix,i,j;     /* index variables */
    int nxb,nzb,dx,dz,spx,spz,gpz,gpx,gpl,snpint,dt,nth=1,wfit;
    int nt,nz,nx, nk, nzx, nz2, nx2, nzx2;
    sf_complex c;
    sf_complex *cwave, *cwavem;
    sf_complex **wave, *curr;

    nx = geop->nx;
    nz = geop->nz;
    nxb = geop->nxb;
    nzb = geop->nzb;
    dx = geop->dx;
    dz = geop->dz;

    spx = geop->spx;
    spz = geop->spz;
    gpz  = geop->gpz;
    gpx  = geop->gpx;
    gpl  = geop->gpl;
    snpint = geop->snpint;
    
    nt = geop->nt;
    dt = geop->dt;

#ifdef _OPENMP
#pragma omp parallel  
{
    nth = omp_get_num_threads();
}
    sf_warning(">>>> Using %d threads <<<<<", nth);
#endif
    
    /*Matrix dimensions*/
    nk = cfft2_init(pad1,nzb,nxb,&nz2,&nx2);
    nzx = nzb*nxb;
    nzx2 = nz2*nx2;

    curr   = sf_complexalloc(nzx2);
    cwave  = sf_complexalloc(nk);
    cwavem = sf_complexalloc(nk);
    wave   = sf_complexalloc2(nzx2,m2);

    icfft2_allocate(cwavem);

#ifdef _OPENMP
#pragma omp parallel for private(iz)
#endif
    for (iz=0; iz < nzx2; iz++) {
	curr[iz] = sf_cmplx(0.,0.);
    }

    /*Main loop*/
    wfit = 0;
    for (it = 0; it < nt; it++) {
	if (verb) sf_warning("Forward source it=%d/%d;", it, nt-1);
	
	/*matrix multiplication*/
	cfft2(curr,cwave);

	for (im = 0; im < m2; im++) {
#ifdef _OPENMP
#pragma omp parallel for private(ik)
#endif
	    for (ik = 0; ik < nk; ik++) {
#ifdef SF_HAS_COMPLEX_H
		cwavem[ik] = cwave[ik]*rt[ik][im];
#else
		cwavem[ik] = sf_cmul(cwave[ik],rt[ik][im]);
#endif
	    }
	    icfft2(wave[im],cwavem);
	}

#ifdef _OPENMP
#pragma omp parallel for private(ix,iz,i,j,im,c) shared(curr,lt,wave)
#endif
	for (ix = 0; ix < nxb; ix++) {
	    for (iz=0; iz < nzb; iz++) {
		i = iz+ix*nzb;  /* original grid */
		j = iz+ix*nz2; /* padded grid */
		if ((it*dt)<=geop->trunc) {
#ifdef SF_HAS_COMPLEX_H
		  c = ww[it] * rr[i]; // source term
#else
		  c = sf_crmul(ww[it], rr[i]); // source term
#endif
		} else {
		  c = sf_cmplx(0.,0.);
		}
		for (im = 0; im < m2; im++) {
#ifdef SF_HAS_COMPLEX_H
		    c += lt[im][i]*wave[im][j];
#else
		    c += sf_cmul(lt[im][i], wave[im][j]);
#endif
		}
		curr[j] = c;
	    }
	}

#ifdef _OPENMP
#pragma omp parallel for private(ix,j)
#endif	 
	for ( ix =0 ; ix < gpl; ix++) {
	    j = (gpz+geop->top)+(ix+gpx+geop->lft)*nz2; /* padded grid */
	    rcd[ix][it] = curr[j];
	}
	
	if ( it%snpint == 0 ) {
#ifdef _OPENMP
#pragma omp parallel for private(ix,iz,j)
#endif
	    for ( ix = 0; ix < nx; ix++) {
		for ( iz = 0; iz<nz; iz++ ) { 
		    j = (iz+geop->top)+(ix+geop->lft)*nz2; /* padded grid */
		    wavfld[wfit][ix][iz] = curr[j];
		    sill[ix][iz] += pow(hypotf(crealf(curr[j]),cimagf(curr[j])),2);
		    //sill[ix][iz] += pow(hypotf(crealf(wavfld[wfit][ix][iz]),cimagf(wavfld[wfit][ix][iz])),2);
		}
	    }
	    wfit++;
	}
    } /*Main loop*/
    if (verb) sf_warning(".");
    cfft2_finalize();
    return wfit;
}
Esempio n. 11
0
int main(int argc, char* argv[])
{
    bool verb,complx,sub,os;
    int it,iz,im,ik,ix,i,j;     /* index variables */
    int nt,nz,nx, m2, nk, nzx, nz2, nx2, nzx2, n2, pad1,nth;
    sf_complex c,old;

    /* I/O arrays*/
    sf_complex *ww,*curr,*prev,*cwave,*cwavem,**wave,**lt, **rt;
    float *rcurr,*rr;

    sf_file Fw,Fr,Fo;    /* I/O files */
    sf_axis at,az,ax;    /* cube axes */
    sf_file left, right;


    sf_init(argc,argv);
    if(!sf_getbool("verb",&verb)) verb=false; /* verbosity */
    if(!sf_getbool("cmplx",&complx)) complx=true; /* outputs complex wavefield */
    if(!sf_getbool("os",&os)) os=true; /* one-step flag */
    if (os) {
        sf_warning("One-step wave extrapolation");
        if(!sf_getbool("sub",&sub)) sub=false; /* subtraction flag */
    } else {
        sf_warning("Two-step wave extrapolation");
        if(!sf_getbool("sub",&sub)) sub=true; /* subtraction flag */
    }

    /* setup I/O files */
    Fw = sf_input ("in" );
    Fo = sf_output("out");
    Fr = sf_input ("ref");

    if (SF_COMPLEX != sf_gettype(Fw)) sf_error("Need complex input");
    if (SF_FLOAT != sf_gettype(Fr)) sf_error("Need float ref");

    if(complx)
        sf_settype(Fo,SF_COMPLEX);
    else
        sf_settype(Fo,SF_FLOAT);

    /* Read/Write axes */
    at = sf_iaxa(Fw,1);
    nt = sf_n(at);
    az = sf_iaxa(Fr,1);
    nz = sf_n(az);
    ax = sf_iaxa(Fr,2);
    nx = sf_n(ax);

    sf_oaxa(Fo,az,1);
    sf_oaxa(Fo,ax,2);
    sf_oaxa(Fo,at,3);

    if (!sf_getint("pad1",&pad1)) pad1=1; /* padding factor on the first axis */

#ifdef _OPENMP
    #pragma omp parallel
    {
        nth = omp_get_num_threads();
    }
    if (verb) sf_warning(">>>> Using %d threads <<<<<", nth);
#endif

    nk = cfft2_init(pad1,nz,nx,&nz2,&nx2);

    nzx = nz*nx;
    nzx2 = nz2*nx2;

    /* propagator matrices */
    left = sf_input("left");
    right = sf_input("right");

    if (!sf_histint(left,"n1",&n2) || n2 != nzx) sf_error("Need n1=%d in left",nzx);
    if (!sf_histint(left,"n2",&m2))  sf_error("Need n2= in left");

    if (!sf_histint(right,"n1",&n2) || n2 != m2) sf_error("Need n1=%d in right",m2);
    if (!sf_histint(right,"n2",&n2) || n2 != nk) sf_error("Need n2=%d in right",nk);

    lt = sf_complexalloc2(nzx,m2);
    rt = sf_complexalloc2(m2,nk);

    sf_complexread(lt[0],nzx*m2,left);
    sf_complexread(rt[0],m2*nk,right);

    sf_fileclose(left);
    sf_fileclose(right);

    /* read wavelet & reflectivity */
    ww=sf_complexalloc(nt);
    sf_complexread(ww,nt ,Fw);

    rr=sf_floatalloc(nzx);
    sf_floatread(rr,nzx,Fr);

    curr   = sf_complexalloc(nzx2);
    if (!os) prev = sf_complexalloc(nzx2);
    else prev = NULL;
    if(!complx) rcurr  = sf_floatalloc(nzx2);
    else rcurr=NULL;

    cwave  = sf_complexalloc(nk);
    cwavem = sf_complexalloc(nk);
    wave   = sf_complexalloc2(nzx2,m2);

    for (iz=0; iz < nzx2; iz++) {
        curr[iz] = sf_cmplx(0.,0.);
        if (!os) prev[iz] = sf_cmplx(0.,0.);
        if(!complx) rcurr[iz]= 0.;
    }

    /* MAIN LOOP */
    for (it=0; it<nt; it++) {
        if(verb) sf_warning("it=%d;",it);

        /* matrix multiplication */
        cfft2(curr,cwave);

        for (im = 0; im < m2; im++) {
            for (ik = 0; ik < nk; ik++) {
#ifdef SF_HAS_COMPLEX_H
                cwavem[ik] = cwave[ik]*rt[ik][im];
#else
                cwavem[ik] = sf_cmul(cwave[ik],rt[ik][im]); //complex multiplies complex
#endif
            }
            icfft2(wave[im],cwavem);
        }

        for (ix = 0; ix < nx; ix++) {
            for (iz=0; iz < nz; iz++) {
                i = iz+ix*nz;  /* original grid */
                j = iz+ix*nz2; /* padded grid */
#ifdef SF_HAS_COMPLEX_H
                c = ww[it] * rr[i]; // source term
#else
                c = sf_crmul(ww[it], rr[i]); // source term
#endif
                if (sub) c += curr[j];
                if (!os) {
                    old = curr[j];
#ifdef SF_HAS_COMPLEX_H
                    c += sub? (old-prev[j]) : -prev[j];
#else
                    c = sf_cadd(c,sub? sf_csub(old,prev[j]) : sf_cneg(prev[j]));
#endif
                    prev[j] = old;
                }
                for (im = 0; im < m2; im++) {
#ifdef SF_HAS_COMPLEX_H
                    c += lt[im][i]*wave[im][j];
#else
                    c += sf_cmul(lt[im][i], wave[im][j]);
#endif
                }

                curr[j] = c;
                if (!complx) rcurr[j] = crealf(c);

            }

            /* write wavefield to output */
            if (complx)
                sf_complexwrite(curr+ix*nz2,nz,Fo);
            else
                sf_floatwrite(rcurr+ix*nz2,nz,Fo);
        }
    }
    if(verb) sf_warning(".");
    cfft2_finalize();
    exit (0);
}
Esempio n. 12
0
int main(int argc, char *argv[])
{
	bool wantwf, verb;

	int ix, iz, is, it, wfit, im, ik, i, j, itau;
    int ns, nx, nz, nt, wfnt, rnx, rnz, nzx, rnzx, vnx, ntau, htau, nds;
	int scalet, snap, snapshot, fnx, fnz, fnzx, nk, nb;
	int rectx, rectz, repeat, gpz, n, m, pad1, trunc, spx, spz;

	float dt, t0, z0, dz, x0, dx, s0, ds, wfdt, srctrunc;
    float dtau, tau0, tau;

	int nr, ndr, nr0;

	char *path1, *path2, number[5], *left, *right;

	double tstart, tend;
	struct timeval tim;

	/*wavenumber domain tapering*/
	int taper;
	float *ktp;
	float ktmp,kx_trs,kz_trs,thresh;
	float dkx,dkz,kx0,kz0;
	float kx,kz;
	int nkz;

	sf_complex c, **lt, **rt;
	sf_complex *ww, **dd, ***dd3;
	float ***img1, **img2, ***mig1, **mig2;
    float *rr, **ccr, **sill, ***fwf, ***bwf;
	sf_complex *cwave, *cwavem, **wave, *curr;

	sf_axis at, ax, az, atau;

	sf_file Fdat, Fsrc, Fimg1, Fimg2;
	sf_file Ffwf, Fbwf, Fvel;
	sf_file Fleft, Fright;

	int cpuid, numprocs, nth, nspad, iturn;
    float *sendbuf, *recvbuf;
	sf_complex *sendbufc, *recvbufc;
	MPI_Comm comm=MPI_COMM_WORLD;

	MPI_Init(&argc, &argv);
	MPI_Comm_rank(comm, &cpuid);
	MPI_Comm_size(comm, &numprocs);

	sf_init(argc, argv);

#ifdef _OPENMP
#pragma omp parallel
	{
		nth=omp_get_num_threads();
	}
	sf_warning(">>> Using %d threads <<<", nth);
#endif

	gettimeofday(&tim, NULL);
	tstart=tim.tv_sec+(tim.tv_usec/1000000.0);

	if (!sf_getint("taper",&taper)) taper=0; /* tapering in the frequency domain */
	if (!sf_getfloat("thresh",&thresh)) thresh=0.92; /* tapering threshold */

	if(!sf_getbool("wantwf", &wantwf)) wantwf=false;
    if(!sf_getbool("verb", &verb)) verb=false;
	if(!sf_getint("pad1", &pad1)) pad1=1;
	/* padding factor on the first axis */

	if(!sf_getint("nb", &nb)) sf_error("Need nb= ");
	if(!sf_getfloat("srctrunc", &srctrunc)) srctrunc=0.4;
	if(!sf_getint("rectx", &rectx)) rectx=2;
	if(!sf_getint("rectz", &rectz)) rectz=2;
	if(!sf_getint("repeat", &repeat)) repeat=2;

	if(!sf_getint("scalet", &scalet)) scalet=1;
	if(!sf_getint("snap", &snap)) snap=100;
	/* interval of the output wavefield */
	if(!sf_getint("snapshot", &snapshot)) snapshot=0;
	/* print out the wavefield snapshots of this shot */
    if(!sf_getint("nds", &nds)) sf_error("Need nds=!");
    
    /* source and receiver positions */
	if(!sf_getint("gpz", &gpz)) sf_error("Need gpz=");
	if(!sf_getint("spx", &spx)) sf_error("Need spx=");
	if(!sf_getint("spz", &spz)) sf_error("Need spz=");
    
    /* tau parameters */
    if(!sf_getint("ntau", &ntau)) sf_error("Need ntau=");
    if(!sf_getfloat("dtau", &dtau)) sf_error("Need dtau=");
    if(!sf_getfloat("tau0", &tau0)) sf_error("Need tau0=");

	/* geometry parameters */
	if(!sf_getint("rnx", &rnx)) sf_error("Need rnx=");
	if(!sf_getint("ndr", &ndr)) ndr=1;
	if(!sf_getint("nr0", &nr0)) nr0=0;

	/* input/output files */
	Fdat=sf_input("--input");
	Fimg1=sf_output("--output");
    Fimg2=sf_output("Fimg2");
    Fsrc=sf_input("Fsrc");
    Fvel=sf_input("Fpadvel");

	if(wantwf){
		Ffwf=sf_output("Ffwf");
        Fbwf=sf_output("Fbwf");
	}

	at=sf_iaxa(Fsrc, 1); nt=sf_n(at); dt=sf_d(at); t0=sf_o(at);
    ax=sf_iaxa(Fvel, 2); vnx=sf_n(ax); dx=sf_d(ax); x0=sf_o(ax);
	az=sf_iaxa(Fvel, 1); rnz=sf_n(az); dz=sf_d(az); z0=sf_o(az);
    if(!sf_histint(Fdat, "n2", &nr)) sf_error("Need n2= in input!");
    if(!sf_histint(Fdat, "n3", &ns)) sf_error("Need n3= in input!");
    if(!sf_histfloat(Fdat, "d3", &ds)) sf_error("Need d3= in input!");
    if(!sf_histfloat(Fdat, "o3", &s0)) sf_error("Need o3= in input!");
    
    wfnt=(nt-1)/scalet+1;
    wfdt=dt*scalet;
    
    /* double check the geometry parameters */
    if(nds != (int)(ds/dx)) sf_error("Need ds/dx= %d", nds);
	//sf_warning("s0=%g, x0+(rnx-1)*dx/2=%g", s0, x0+(rnx-1)*dx/2);
    //if(s0 != x0+(rnx-1)*dx/2) sf_error("Wrong origin information!");
    if(vnx != nds*(ns-1)+rnx) sf_error("Wrong dimension in x axis!");

    /* set up the output files */
    atau=sf_iaxa(Fsrc, 1);
    sf_setn(atau, ntau);
    sf_setd(atau, dtau);
    sf_seto(atau, tau0);
    sf_setlabel(atau, "Tau");
    sf_setunit(atau, "s");
    
    sf_oaxa(Fimg1, az, 1);
    sf_oaxa(Fimg1, ax, 2);
    sf_oaxa(Fimg1, atau, 3);
    sf_oaxa(Fimg2, az, 1);
    sf_oaxa(Fimg2, ax, 2);
    sf_putint(Fimg2, "n3", 1);
    sf_settype(Fimg1, SF_FLOAT);
    sf_settype(Fimg2, SF_FLOAT);
    
    if(wantwf){
		sf_setn(ax, rnx);
        sf_seto(ax, -(rnx-1)*dx/2.0);
        sf_oaxa(Ffwf, az, 1);
        sf_oaxa(Ffwf, ax, 2);
        sf_putint(Ffwf, "n3", (wfnt-1)/snap+1);
        sf_putfloat(Ffwf, "d3", snap*wfdt);
        sf_putfloat(Ffwf, "o3", t0);
        sf_putstring(Ffwf, "label3", "Time");
        sf_putstring(Ffwf, "unit3", "s");
        sf_settype(Ffwf, SF_FLOAT);
        
        sf_oaxa(Fbwf, az, 1);
        sf_oaxa(Fbwf, ax, 2);
        sf_putint(Fbwf, "n3", (wfnt-1)/snap+1);
        sf_putfloat(Fbwf, "d3", -snap*wfdt);
        sf_putfloat(Fbwf, "o3", (wfnt-1)*wfdt);
        sf_putstring(Fbwf, "label3", "Time");
        sf_putstring(Fbwf, "unit3", "s");
        sf_settype(Fbwf, SF_FLOAT);
	}
	
    nx=rnx+2*nb; nz=rnz+2*nb;
	nzx=nx*nz; rnzx=rnz*rnx;
    nk=cfft2_init(pad1, nz, nx, &fnz, &fnx);
	fnzx=fnz*fnx;

	if(ns%numprocs==0) nspad=ns;
	else nspad=(ns/numprocs+1)*numprocs;
    
	/* print axies parameters for double check */
    sf_warning("cpuid=%d, numprocs=%d, nspad=%d", cpuid, numprocs, nspad);
	sf_warning("nt=%d, dt=%g, scalet=%d, wfnt=%d, wfdt=%g",nt, dt, scalet, wfnt, wfdt);
	sf_warning("vnx=%d, nx=%d, dx=%g, nb=%d, rnx=%d", vnx, nx, dx, nb, rnx);
	sf_warning("nr=%d, ndr=%d, nr0=%g", nr, ndr, nr0);
	sf_warning("nz=%d, rnz=%d, dz=%g, z0=%g", nz, rnz, dz, z0);
	sf_warning("spx=%d, spz=%d, gpz=%d", spx, spz, gpz);
	sf_warning("ns=%d, ds=%g, s0=%g", ns, ds, s0);
    sf_warning("ntau=%d, dtau=%g, tau0=%g", ntau, dtau, tau0);
    sf_warning("nzx=%d, fnzx=%d, nk=%d", nzx, fnzx, nk);

	/* allocate storage and read data */
	ww=sf_complexalloc(nt);
	sf_complexread(ww, nt, Fsrc);
	sf_fileclose(Fsrc);
	
    gpz=gpz+nb;
    spz=spz+nb;
    spx=spx+nb;
	nr0=nr0+nb;
    trunc=srctrunc/dt+0.5;
    
	dd=sf_complexalloc2(nt, nr);
	if(cpuid==0) dd3=sf_complexalloc3(nt, nr, numprocs);
	rr=sf_floatalloc(nzx);
	reflgen(nz, nx, spz, spx, rectz, rectx, repeat, rr);
    
    fwf=sf_floatalloc3(rnz, rnx, wfnt);
    bwf=sf_floatalloc3(rnz, rnx, wfnt);
    img1=sf_floatalloc3(rnz, vnx, ntau);
    img2=sf_floatalloc2(rnz, vnx);
    mig1=sf_floatalloc3(rnz, rnx, ntau);
    mig2=sf_floatalloc2(rnz, rnx);
    
    ccr=sf_floatalloc2(rnz, rnx);
    sill=sf_floatalloc2(rnz, rnx);
    
    curr=sf_complexalloc(fnzx);
	cwave=sf_complexalloc(nk);
	cwavem=sf_complexalloc(nk);
	icfft2_allocate(cwavem);

	if (taper!=0) {
		dkz = 1./(fnz*dz); kz0 = -0.5/dz;
		dkx = 1./(fnx*dx); kx0 = -0.5/dx;
		nkz = fnz;

		sf_warning("dkz=%f,dkx=%f,kz0=%f,kx0=%f",dkz,dkx,kz0,kx0);
		sf_warning("nk=%d,nkz=%d,nkx=%d",nk,nkz,fnx);

		kx_trs = thresh*fabs(0.5/dx);
		kz_trs = thresh*fabs(0.5/dz);
		sf_warning("Applying kz tapering below %f",kz_trs);
		sf_warning("Applying kx tapering below %f",kx_trs);
		ktp = sf_floatalloc(nk);
		/* constructing the tapering op */
		for (ix=0; ix < fnx; ix++) {
			kx = kx0+ix*dkx;
			for (iz=0; iz < nkz; iz++) {
				kz = kz0+iz*dkz;
				ktmp = 1.;
				if (fabs(kx) > kx_trs)
					ktmp *= powf((2*kx_trs - fabs(kx))/(kx_trs),2);
				if (fabs(kz) > kz_trs)
					ktmp *= powf((2*kz_trs - fabs(kz))/(kz_trs),2);
				ktp[iz+ix*nkz] = ktmp;
			}
		}
	}

	/* initialize image tables that would be used for summing images */
#ifdef _OPENMP
#pragma omp parallel for private(ix, iz, itau)
#endif
    for(ix=0; ix<vnx; ix++){
        for(iz=0; iz<rnz; iz++){
            img2[ix][iz]=0.;
            for(itau=0; itau<ntau; itau++){
                img1[itau][ix][iz]=0.;
            }
        }
    }

	path1=sf_getstring("path1");
	path2=sf_getstring("path2");
	if(path1==NULL) path1="./mat/left";
	if(path2==NULL) path2="./mat/right";

	/* shot loop */
	for (iturn=0; iturn*numprocs<nspad; iturn++){
		is=iturn*numprocs+cpuid;
        
        /* read data */
		if(cpuid==0){
			sf_seek(Fdat, ((off_t) is)*((off_t) nr)*((off_t) nt)*sizeof(float complex), SEEK_SET);

			if((iturn+1)*numprocs<=ns){
				sf_complexread(dd3[0][0], nr*nt*numprocs, Fdat);
			}else{
				sf_complexread(dd3[0][0], nr*nt*(ns-iturn*numprocs), Fdat);
				for(is=ns; is<nspad; is++)
					for(ix=0; ix<nr; ix++)
						for(it=0; it<nt; it++)
							dd3[is-iturn*numprocs][ix][it]=sf_cmplx(0.,0.);
				is=iturn*numprocs;
			}

			sendbufc=dd3[0][0];
			recvbufc=dd[0];
		}else{
			sendbufc=NULL;
			recvbufc=dd[0];
		}
		MPI_Scatter(sendbufc, nt*nr, MPI_COMPLEX, recvbufc, nt*nr, MPI_COMPLEX, 0, comm);

		if(is<ns){ /* effective shot loop */

			/* construct the names of left and right matrices */
			left=sf_charalloc(strlen(path1));
			right=sf_charalloc(strlen(path2));
			strcpy(left, path1);
			strcpy(right, path2);
			sprintf(number, "%d", is+1);
			strcat(left, number);
			strcat(right, number);

			Fleft=sf_input(left);
			Fright=sf_input(right);

			if(!sf_histint(Fleft, "n1", &n) || n != nzx) sf_error("Need n1=%d in Fleft", nzx);
			if(!sf_histint(Fleft, "n2", &m)) sf_error("No n2 in Fleft");
			if(!sf_histint(Fright, "n1", &n) || n != m) sf_error("Need n1=%d in Fright", m);
			if(!sf_histint(Fright, "n2", &n) || n != nk) sf_error("Need n2=%d in Fright", nk);

			/* allocate storage for each shot migration */
			lt=sf_complexalloc2(nzx, m);
			rt=sf_complexalloc2(m, nk);
			sf_complexread(lt[0], nzx*m, Fleft);
			sf_complexread(rt[0], m*nk, Fright);
			sf_fileclose(Fleft);
			sf_fileclose(Fright);

			/* initialize curr and imaging variables */
#ifdef _OPENMP
#pragma omp parallel for private(iz)
#endif
			for(iz=0; iz<fnzx; iz++){
				curr[iz]=sf_cmplx(0.,0.);
			}
#ifdef _OPENMP
#pragma omp parallel for private(ix, iz, itau)
#endif
			for(ix=0; ix<rnx; ix++){
				for(iz=0; iz<rnz; iz++){
					mig2[ix][iz]=0.;
					ccr[ix][iz]=0.;
					sill[ix][iz]=0.;
					for(itau=0; itau<ntau; itau++){
						mig1[itau][ix][iz]=0.;
					}
				}
			}

			/* wave */
			wave=sf_complexalloc2(fnzx, m);

			/* snapshot */
			if(wantwf && is==snapshot) wantwf=true;
			else wantwf=false;

			/* forward propagation */
			wfit=0;
			for(it=0; it<nt; it++){
				if(verb) sf_warning("Forward propagation it=%d/%d",it+1, nt);

				cfft2(curr, cwave);
				for(im=0; im<m; im++){
#ifdef _OPENMP
#pragma omp parallel for private(ik)
#endif
					for(ik=0; ik<nk; ik++){
#ifdef SF_HAS_COMPLEX_H
						cwavem[ik]=cwave[ik]*rt[ik][im];
#else
						cwavem[ik]=sf_cmul(cwave[ik],rt[ik][im]);
#endif
					}
					icfft2(wave[im],cwavem);
				}

#ifdef _OPENMP
#pragma omp parallel for private(ix, iz, i, j, im, c) shared(curr, it)
#endif
				for(ix=0; ix<nx; ix++){
					for(iz=0; iz<nz; iz++){
						i=iz+ix*nz;
						j=iz+ix*fnz;

						if(it<trunc){
#ifdef SF_HAS_COMPLEX_H
							c=ww[it]*rr[i];
#else
							c=sf_crmul(ww[it],rr[i]);
#endif
						}else{
							c=sf_cmplx(0.,0.);
						}

						//                    c += curr[j];

						for(im=0; im<m; im++){
#ifdef SF_HAS_COMPLEX_H
							c += lt[im][i]*wave[im][j];
#else
							c += sf_cmul(lt[im][i], wave[im][j]);
#endif
						}
						curr[j]=c;
					}
				}

				if (taper!=0) {
					if (it%taper == 0) {
						cfft2(curr,cwave);
						for (ik = 0; ik < nk; ik++) {
#ifdef SF_HAS_COMPLEX_H
							cwavem[ik] = cwave[ik]*ktp[ik];
#else
							cwavem[ik] = sf_crmul(cwave[ik],ktp[ik]);
#endif
						}
						icfft2(curr,cwavem);
					}
				}

				if(it%scalet==0){
#ifdef _OPENMP
#pragma omp parallel for private(ix, iz)
#endif
					for(ix=0; ix<rnx; ix++){
						for(iz=0; iz<rnz; iz++){
							fwf[wfit][ix][iz]=crealf(curr[(ix+nb)*fnz+(iz+nb)]);
						}
					}
					wfit++;
				}
			} //end of it

			/* check wfnt */
			if(wfit != wfnt) sf_error("At this point, wfit should be equal to wfnt");

			/* backward propagation starts from here... */
#ifdef _OPENMP
#pragma omp parallel for private(iz)
#endif
			for(iz=0; iz<fnzx; iz++){
				curr[iz]=sf_cmplx(0.,0.);
			}

			wfit=wfnt-1;
			for(it=nt-1; it>=0; it--){
				if(verb) sf_warning("Backward propagation it=%d/%d",it+1, nt);
#ifdef _OPENMP
#pragma omp parallel for private(ix)
#endif
				for(ix=0; ix<nr; ix++){
					curr[(nr0+ix*ndr)*fnz+gpz]+=dd[ix][it];
				}

				cfft2(curr, cwave);

				for(im=0; im<m; im++){
#ifdef _OPENMP
#pragma omp parallel for private(ik)
#endif
					for(ik=0; ik<nk; ik++){
#ifdef SF_HAS_COMPLEX_H
						cwavem[ik]=cwave[ik]*conjf(rt[ik][im]);
#else
						cwavem[ik]=sf_cmul(cwave[ik],conjf(rt[ik][im]));
#endif
					}
					icfft2(wave[im],cwavem);
				}

#ifdef _OPENMP
#pragma omp parallel for private(ix, iz, i, j, im, c) shared(curr, it)
#endif
				for(ix=0; ix<nx; ix++){
					for(iz=0; iz<nz; iz++){
						i=iz+ix*nz;
						j=iz+ix*fnz;

						//                    c=curr[j];
						c=sf_cmplx(0.,0.);

						for(im=0; im<m; im++){
#ifdef SF_HAS_COMPLEX_H
							c += conjf(lt[im][i])*wave[im][j];
#else
							c += sf_cmul(conjf(lt[im][i]), wave[im][j]);
#endif
						}
						curr[j]=c;
					}
				}

				if (taper!=0) {
					if (it%taper == 0) {
						cfft2(curr,cwave);
						for (ik = 0; ik < nk; ik++) {
#ifdef SF_HAS_COMPLEX_H
							cwavem[ik] = cwave[ik]*ktp[ik];
#else
							cwavem[ik] = sf_crmul(cwave[ik],ktp[ik]);
#endif
						}
						icfft2(curr,cwavem);
					}
				}

				if(it%scalet==0){
#ifdef _OPENMP
#pragma omp parallel for private(ix, iz)
#endif
					for(ix=0; ix<rnx; ix++){
						for(iz=0; iz<rnz; iz++){
							bwf[wfit][ix][iz]=crealf(curr[(ix+nb)*fnz+(iz+nb)]);
							ccr[ix][iz] += fwf[wfit][ix][iz]*bwf[wfit][ix][iz];
							sill[ix][iz] += fwf[wfit][ix][iz]*fwf[wfit][ix][iz];
						}
					}
					wfit--;
				}
			} //end of it
			if(wfit != -1) sf_error("Check program! The final wfit should be -1!");

			/* free storage */
			free(*rt); free(rt);
			free(*lt); free(lt);
			free(*wave); free(wave);
			free(left); free(right);

			/* normalized image */
#ifdef _OPENMP
#pragma omp parallel for private(ix, iz)
#endif
			for (ix=0; ix<rnx; ix++){
				for(iz=0; iz<rnz; iz++){
					mig2[ix][iz]=ccr[ix][iz]/(sill[ix][iz]+SF_EPS);
					//		sill[ix][iz]=0.;
				}
			}

			/* time-shift imaging condition */
			for(itau=0; itau<ntau; itau++){
				//sf_warning("itau/ntau=%d/%d", itau+1, ntau);
				tau=itau*dtau+tau0;
				htau=tau/wfdt;

				for(it=abs(htau); it<wfnt-abs(htau); it++){
#ifdef _OPENMP
#pragma omp parallel for private(ix, iz)
#endif
					for(ix=0; ix<rnx; ix++){
						for(iz=0; iz<rnz; iz++){
							mig1[itau][ix][iz]+=fwf[it+htau][ix][iz]*bwf[it-htau][ix][iz];
							//	sill[ix][iz]+=fwf[it+htau][ix][iz]*fwf[it+htau][ix][iz];
						} // end of iz
					} // end of ix
				} // end of it


				//#ifdef _OPENMP
				//#pragma omp parallel for private(ix, iz)
				//#endif 
				/* source illumination */
				//	for(ix=0; ix<rnx; ix++){
				//		for(iz=0; iz<rnz; iz++){
				//			mig1[itau][ix][iz] = mig1[itau][ix][iz]/(sill[ix][iz]+SF_EPS);
				//		}
				//	} 
			} //end of itau

			/* output wavefield snapshot */
			if(wantwf){
				for(it=0; it<wfnt; it++){
					if(it%snap==0){
						sf_floatwrite(fwf[it][0], rnzx, Ffwf);
						sf_floatwrite(bwf[wfnt-1-it][0], rnzx, Fbwf);
					}
				}
				sf_fileclose(Ffwf);
				sf_fileclose(Fbwf);
			}

			/* add all the shot images that are on the same node */
#ifdef _OPENMP
#pragma omp parallel for private(itau, ix, iz)
#endif
			for(itau=0; itau<ntau; itau++){
				for(ix=0; ix<rnx; ix++){
					for(iz=0; iz<rnz; iz++){
						img1[itau][ix+is*nds][iz] += mig1[itau][ix][iz];
					}
				}
			}

#ifdef _OPENMP
#pragma omp parallel for private(ix, iz)
#endif
			for(ix=0; ix<rnx; ix++){
				for(iz=0; iz<rnz; iz++){
					img2[ix+is*nds][iz] += mig2[ix][iz];
				}
			}
		} // end of is<ns
	} // end of iturn
	////////////////end of ishot
	MPI_Barrier(comm);

	cfft2_finalize();
    sf_fileclose(Fdat);
    
    free(ww); free(rr);
	free(*dd); free(dd);
	if(cpuid==0) {free(**dd3); free(*dd3); free(dd3);}
	free(cwave); free(cwavem); free(curr);
    free(*ccr); free(ccr);
    free(*sill); free(sill);
    free(**fwf); free(*fwf); free(fwf);
    free(**bwf); free(*bwf); free(bwf);
    free(**mig1); free(*mig1); free(mig1);
    free(*mig2); free(mig2);
    
    /* sum image */
    if(cpuid==0){
        sendbuf=(float *)MPI_IN_PLACE;
        recvbuf=img1[0][0];
    }else{
        sendbuf=img1[0][0];
        recvbuf=NULL;
    }
    MPI_Reduce(sendbuf, recvbuf, ntau*vnx*rnz, MPI_FLOAT, MPI_SUM, 0, comm);
    
    if(cpuid==0){
        sendbuf=MPI_IN_PLACE;
        recvbuf=img2[0];
    }else{
        sendbuf=img2[0];
        recvbuf=NULL;
    }
    MPI_Reduce(sendbuf, recvbuf, vnx*rnz, MPI_FLOAT, MPI_SUM, 0, comm);
    
    /* output image */
    if(cpuid==0){
        sf_floatwrite(img1[0][0], ntau*vnx*rnz, Fimg1);
        sf_floatwrite(img2[0], vnx*rnz, Fimg2);
    }
	MPI_Barrier(comm);

	sf_fileclose(Fimg1);
    sf_fileclose(Fimg2);
    free(**img1); free(*img1); free(img1);
    free(*img2); free(img2);
    
	gettimeofday(&tim, NULL);
	tend=tim.tv_sec+(tim.tv_usec/1000000.0);
	sf_warning(">> The computing time is %.3lf minutes <<", (tend-tstart)/60.);

	MPI_Finalize();
	exit(0);
}
Esempio n. 13
0
int prop1Pa(sf_complex *input, sf_complex *output, sf_complex *lt, sf_complex *rt, int nz, int nx, int nkzx, int m2)
/*< Just nsps(-) >*/
{
    int iz, ix, im, ik, i, j;
    int nz2, nx2, nk, nzx, nzx2;
    int pad1 = 1;
    sf_complex **wave, **wave2, *curr, *currm, *cwave, *cwavem, c;

    nk = cfft2_init(pad1,nz,nx,&nz2,&nx2);
    if (nk!=nkzx) sf_error("nk discrepancy!");
    
    nzx = nz*nx;
    nzx2 = nz2*nx2;

    curr   = sf_complexalloc(nzx2);
    currm  = sf_complexalloc(nzx2);
    
    cwave  = sf_complexalloc(nk);
    cwavem = sf_complexalloc(nk);
    
    wave   = sf_complexalloc2(nk,m2);
    wave2  = sf_complexalloc2(nzx2,m2);

    icfft2_allocate(cwave);

    /* initialization */
    for (ix = 0; ix < nx2; ix++) {
	for (iz=0; iz < nz2; iz++) {
            i = iz+ix*nz;
	    j = iz+ix*nz2;
	    if (ix<nx && iz<nz)
		curr[j] = input[i];
	    else 
		curr[j] = sf_cmplx(0.,0.);
	}
    }
        
        /* nsps(-) */
         
	/* matrix multiplication */
	for (im = 0; im < m2; im++) {
	    for (ix = 0; ix < nx; ix++) {
		for (iz=0; iz < nz; iz++) {
		    i = iz+ix*nz;  /* original grid */
		    j = iz+ix*nz2; /* padded grid */
#ifdef SF_HAS_COMPLEX_H
		    currm[j] = conjf(lt[im*nzx+i])*curr[j];
#else
		    currm[j] = sf_cmul(conjf(lt[im*nzx+i]), curr[j]);
#endif
		}
	    }
	    cfft2(currm,wave[im]);
	}
	
	for (ik = 0; ik < nk; ik++) {
	    c = sf_cmplx(0.,0.);
	    for (im = 0; im < m2; im++) {
#ifdef SF_HAS_COMPLEX_H
		c += wave[im][ik]*conjf(rt[ik*m2+im]);
#else
		c += sf_cmul(wave[im][ik],conjf(rt[ik*m2+im]));
#endif
	    }
	    cwave[ik] = c;
	}
	
	/* saving a pair of FFTs */
	icfft2(curr,cwave);

    /* output final result*/
    for (ix = 0; ix < nx; ix++) {
	for (iz=0; iz < nz; iz++) {
            i = iz+ix*nz;
	    j = iz+ix*nz2;
	    output[i] = curr[j];
	}
    }
    
    cfft2_finalize();
    return 0;
}
Esempio n. 14
0
int main(int argc, char* argv[])
{
    bool verb,complx,sub,os;
    int it,iz,im,ik,ix,i,j;     /* index variables */
    int nt,nz,nx, m2, nk, nzx, nz2, nx2, nzx2, n2, pad1,nth;
    sf_complex c,old;
    int snap;

    /* I/O arrays*/
    sf_complex *ww,*curr,*prev,*cwave,*cwavem,**wave,**lt, **rt;
    float *rcurr,*rr;
    
    sf_file Fw,Fr,Fo;    /* I/O files */
    sf_axis at,az,ax;    /* cube axes */
    sf_file left, right;
    sf_file snaps;

    /*for tapering*/
    float dt,dx,dz,dkx,dkz,kx0,kz0,kx,kz,ktmp,kx_trs,kz_trs,thresh;
    float *ktp;
    int taper;

    sf_init(argc,argv);
    if(!sf_getbool("verb",&verb)) verb=false; /* verbosity */
    if(!sf_getbool("cmplx",&complx)) complx=true; /* outputs complex wavefield */
    if(!sf_getbool("os",&os)) os=true; /* one-step flag */
    if (os) {
      sf_warning("One-step wave extrapolation");
      if(!sf_getbool("sub",&sub)) sub=false; /* subtraction flag */
    } else {
      sf_warning("Two-step wave extrapolation");
      if(!sf_getbool("sub",&sub)) sub=true; /* subtraction flag */
    }
    if (!sf_getint("taper",&taper)) taper=0; /* tapering in the frequency domain */
    if (!sf_getfloat("thresh",&thresh)) thresh=0.92; /* tapering threshold */

    /* setup I/O files */
    Fw = sf_input ("in" );
    Fo = sf_output("out");
    Fr = sf_input ("ref");

    if (SF_COMPLEX != sf_gettype(Fw)) sf_error("Need complex input");
    if (SF_FLOAT != sf_gettype(Fr)) sf_error("Need float ref");

    if(complx)
	sf_settype(Fo,SF_COMPLEX);
    else
	sf_settype(Fo,SF_FLOAT);

    /* Read/Write axes */
    at = sf_iaxa(Fw,1); nt = sf_n(at); dt = sf_d(at);
    az = sf_iaxa(Fr,1); nz = sf_n(az); dz = sf_d(az);
    ax = sf_iaxa(Fr,2); nx = sf_n(ax); dx = sf_d(ax);

    sf_oaxa(Fo,az,1); 
    sf_oaxa(Fo,ax,2); 

    if (!sf_getint("snap",&snap)) snap=0;
    /* interval for snapshots */

    if (snap > 0) {
	snaps = sf_output("snaps");
	/* (optional) snapshot file */
	
	sf_oaxa(snaps,az,1); 
	sf_oaxa(snaps,ax,2);
	sf_oaxa(snaps,at,3);

	sf_putint(snaps,"n3",nt/snap);
	sf_putfloat(snaps,"d3",dt*snap);
	sf_putfloat(snaps,"o3",0.);

        if(complx)
          sf_settype(snaps,SF_COMPLEX);
        else
          sf_settype(snaps,SF_FLOAT);
    } else {
	snaps = NULL;
    }
    
    if (!sf_getint("pad1",&pad1)) pad1=1; /* padding factor on the first axis */

#ifdef _OPENMP
#pragma omp parallel
    {
      nth = omp_get_num_threads();
    }
    if (verb) sf_warning(">>>> Using %d threads <<<<<", nth);
#endif

    nk = cfft2_init(pad1,nz,nx,&nz2,&nx2);

    nzx = nz*nx;
    nzx2 = nz2*nx2;

    /* propagator matrices */
    left = sf_input("left");
    right = sf_input("right");

    if (!sf_histint(left,"n1",&n2) || n2 != nzx) sf_error("Need n1=%d in left",nzx);
    if (!sf_histint(left,"n2",&m2))  sf_error("Need n2= in left");
    
    if (!sf_histint(right,"n1",&n2) || n2 != m2) sf_error("Need n1=%d in right",m2);
    if (!sf_histint(right,"n2",&n2) || n2 != nk) sf_error("Need n2=%d in right",nk);
  
    lt = sf_complexalloc2(nzx,m2);
    rt = sf_complexalloc2(m2,nk);

    sf_complexread(lt[0],nzx*m2,left);
    sf_complexread(rt[0],m2*nk,right);

    sf_fileclose(left);
    sf_fileclose(right);

    /* read wavelet & reflectivity */
    ww=sf_complexalloc(nt);  
    sf_complexread(ww,nt ,Fw);

    rr=sf_floatalloc(nzx); 
    sf_floatread(rr,nzx,Fr);

    curr   = sf_complexalloc(nzx2);
    if (!os) prev = sf_complexalloc(nzx2);
    else prev = NULL;
    if(!complx) rcurr  = sf_floatalloc(nzx2);
    else rcurr=NULL;

    cwave  = sf_complexalloc(nk);
    cwavem = sf_complexalloc(nk);
    wave   = sf_complexalloc2(nzx2,m2);

    /*icfft2_allocate(cwavem);*/

    for (iz=0; iz < nzx2; iz++) {
	curr[iz] = sf_cmplx(0.,0.);
	if (!os) prev[iz] = sf_cmplx(0.,0.);
	if(!complx) rcurr[iz]= 0.;
    }

    if (taper!=0) {
      dkz = 1./(nz2*dz); kz0 = -0.5/dz;
      dkx = 1./(nx2*dx); kx0 = -0.5/dx;
      kx_trs = thresh*fabs(0.5/dx);
      kz_trs = thresh*fabs(0.5/dz);
      sf_warning("dkz=%f,dkx=%f,kz0=%f,kx0=%f",dkz,dkx,kz0,kx0);
      sf_warning("nk=%d,nkz=%d,nkx=%d",nk,nz2,nx2);
      sf_warning("Applying kz tapering below %f",kz_trs);
      sf_warning("Applying kx tapering below %f",kx_trs);
      ktp = sf_floatalloc(nk);
      /* constructing the tapering op */
      for (ix=0; ix < nx2; ix++) {
	kx = kx0+ix*dkx;
	for (iz=0; iz < nz2; iz++) {
	  kz = kz0+iz*dkz;
	  ktmp = 1.;
	  if (fabs(kx) > kx_trs)
	    ktmp *= (fabs(kx)>kx_trs)? powf((fabs(kx0)-fabs(kx)+kx_trs)/kx0,2) : 1.;
	  if (fabs(kz) > kz_trs)
	    ktmp *= (fabs(kz)>kz_trs)? powf((fabs(kz0)-fabs(kz)+kz_trs)/kz0,2) : 1.;
     	  ktp[iz+ix*nz2] = ktmp;
	}
      }
    }

    /* MAIN LOOP */
    for (it=0; it<nt; it++) {
	if(verb) sf_warning("it=%d;",it);

	/* matrix multiplication */
	cfft2(curr,cwave);

	for (im = 0; im < m2; im++) {
	    for (ik = 0; ik < nk; ik++) {
#ifdef SF_HAS_COMPLEX_H
		cwavem[ik] = cwave[ik]*rt[ik][im];
#else
		cwavem[ik] = sf_cmul(cwave[ik],rt[ik][im]); //complex multiplies complex
#endif
	    }
	    icfft2(wave[im],cwavem);
	}

	for (ix = 0; ix < nx; ix++) {
	    for (iz=0; iz < nz; iz++) {
	        i = iz+ix*nz;  /* original grid */
		j = iz+ix*nz2; /* padded grid */
#ifdef SF_HAS_COMPLEX_H
		c = ww[it] * rr[i]; // source term
#else
		c = sf_crmul(ww[it], rr[i]); // source term
#endif
		if (sub) c += curr[j];
		if (!os) {
		  old = curr[j];
#ifdef SF_HAS_COMPLEX_H
		  c += sub? (old-prev[j]) : -prev[j];
#else
		  c = sf_cadd(c,sub? sf_csub(old,prev[j]) : sf_cneg(prev[j]));
#endif
		  prev[j] = old;
		}
		for (im = 0; im < m2; im++) {
#ifdef SF_HAS_COMPLEX_H
		    c += lt[im][i]*wave[im][j];
#else
		    c += sf_cmul(lt[im][i], wave[im][j]);
#endif
		}

		curr[j] = c;
		if (!complx) rcurr[j] = crealf(c);

	    }

	    if (NULL != snaps && 0 == it%snap) {
              /* write wavefield snapshots */
	    if (complx)
              sf_complexwrite(curr+ix*nz2,nz,snaps);
            else
              sf_floatwrite(rcurr+ix*nz2,nz,snaps);
	    }
	}

	if (taper!=0) {
	  if (it%taper == 0) {
	    cfft2(curr,cwave);
	    for (ik = 0; ik < nk; ik++) {
#ifdef SF_HAS_COMPLEX_H
	      cwavem[ik] = cwave[ik]*ktp[ik];
#else
	      cwavem[ik] = sf_crmul(cwave[ik],ktp[ik]);
#endif
	    }
	    icfft2(curr,cwavem);
            if (!os) {
              cfft2(prev,cwave);
              for (ik = 0; ik < nk; ik++) {
#ifdef SF_HAS_COMPLEX_H
                cwavem[ik] = cwave[ik]*ktp[ik];
#else
                cwavem[ik] = sf_crmul(cwave[ik],ktp[ik]);
#endif
              }
              icfft2(prev,cwavem);
            }
	  }
	}

    }
    if(verb) sf_warning("."); 

    /* write final wavefield to output */
    for (ix = 0; ix < nx; ix++) {
      if (complx)
        sf_complexwrite(curr+ix*nz2,nz,Fo);
      else
        sf_floatwrite(rcurr+ix*nz2,nz,Fo);
    }

    cfft2_finalize();
    exit (0);
}
Esempio n. 15
0
/**
   Time domain physical simulation.
   
   noisy: 
   - 0: no noise at all; 
   - 1: poisson and read out noise. 
   - 2: only poisson noise.   
*/
dmat *skysim_sim(dmat **mresout, const dmat *mideal, const dmat *mideal_oa, double ngsol, 
		 ASTER_S *aster, const POWFS_S *powfs, 
		 const PARMS_S *parms, int idtratc, int noisy, int phystart){
    int dtratc=0;
    if(!parms->skyc.multirate){
	dtratc=parms->skyc.dtrats->p[idtratc];
    }
    int hasphy;
    if(phystart>-1 && phystart<aster->nstep){
	hasphy=1;
    }else{
	hasphy=0;
    }
    const int nmod=mideal->nx;
    dmat *res=dnew(6,1);/*Results. 1-2: NGS and TT modes., 
			  3-4:On axis NGS and TT modes,
			  4-6: On axis NGS and TT wihtout considering un-orthogonality.*/
    dmat *mreal=NULL;/*modal correction at this step. */
    dmat *merr=dnew(nmod,1);/*modal error */
    dcell *merrm=dcellnew(1,1);dcell *pmerrm=NULL;
    const int nstep=aster->nstep?aster->nstep:parms->maos.nstep;
    dmat *mres=dnew(nmod,nstep);
    dmat* rnefs=parms->skyc.rnefs;
    dcell *zgradc=dcellnew3(aster->nwfs, 1, aster->ngs, 0);
    dcell *gradout=dcellnew3(aster->nwfs, 1, aster->ngs, 0);
    dmat *gradsave=0;
    if(parms->skyc.dbg){
	gradsave=dnew(aster->tsa*2,nstep);
    }
   
    
    SERVO_T *st2t=0;
    kalman_t *kalman=0;
    dcell *mpsol=0;
    dmat *pgm=0;
    dmat *dtrats=0;
    int multirate=parms->skyc.multirate;
    if(multirate){
	kalman=aster->kalman[0];
	dtrats=aster->dtrats;
    }else{
	if(parms->skyc.servo>0){
	    const double dtngs=parms->maos.dt*dtratc;
	    st2t=servo_new(merrm, NULL, 0, dtngs, aster->gain->p[idtratc]);
	    pgm=aster->pgm->p[idtratc];
	}else{
	    kalman=aster->kalman[idtratc];
	}
    }
    if(kalman){
	kalman_init(kalman);
	mpsol=dcellnew(aster->nwfs, 1); //for psol grad.
    }
    const long nwvl=parms->maos.nwvl;
    dcell **psf=0, **mtche=0, **ints=0;
    ccell *wvf=0,*wvfc=0, *otf=0;
    if(hasphy){
	psf=mycalloc(aster->nwfs,dcell*);
	wvf=ccellnew(aster->nwfs,1);
	wvfc=ccellnew(aster->nwfs,1);
	mtche=mycalloc(aster->nwfs,dcell*);
	ints=mycalloc(aster->nwfs,dcell*);
	otf=ccellnew(aster->nwfs,1);
    
	for(long iwfs=0; iwfs<aster->nwfs; iwfs++){
	    const int ipowfs=aster->wfs[iwfs].ipowfs;
	    const long ncomp=parms->maos.ncomp[ipowfs];
	    const long nsa=parms->maos.nsa[ipowfs];
	    wvf->p[iwfs]=cnew(ncomp,ncomp);
	    wvfc->p[iwfs]=NULL;
	    psf[iwfs]=dcellnew(nsa,nwvl);
	    //cfft2plan(wvf->p[iwfs], -1);
	    if(parms->skyc.multirate){
		mtche[iwfs]=aster->wfs[iwfs].pistat->mtche[(int)aster->idtrats->p[iwfs]];
	    }else{
		mtche[iwfs]=aster->wfs[iwfs].pistat->mtche[idtratc];
	    }
	    otf->p[iwfs]=cnew(ncomp,ncomp);
	    //cfft2plan(otf->p[iwfs],-1);
	    //cfft2plan(otf->p[iwfs],1);
	    ints[iwfs]=dcellnew(nsa,1);
	    int pixpsa=parms->skyc.pixpsa[ipowfs];
	    for(long isa=0; isa<nsa; isa++){
		ints[iwfs]->p[isa]=dnew(pixpsa,pixpsa);
	    }
	}
    }
    for(int irep=0; irep<parms->skyc.navg; irep++){
	if(kalman){
	    kalman_init(kalman);
	}else{
	    servo_reset(st2t);
	}
	dcellzero(zgradc);
	dcellzero(gradout);
	if(ints){
	    for(int iwfs=0; iwfs<aster->nwfs; iwfs++){
		dcellzero(ints[iwfs]);
	    }
	}
	for(int istep=0; istep<nstep; istep++){
	    memcpy(merr->p, PCOL(mideal,istep), nmod*sizeof(double));
	    dadd(&merr, 1, mreal, -1);/*form NGS mode error; */
	    memcpy(PCOL(mres,istep),merr->p,sizeof(double)*nmod);
	    if(mpsol){//collect averaged modes for PSOL.
		for(long iwfs=0; iwfs<aster->nwfs; iwfs++){
		    dadd(&mpsol->p[iwfs], 1, mreal, 1);
		}
	    }
	    pmerrm=0;
	    if(istep>=parms->skyc.evlstart){/*performance evaluation*/
		double res_ngs=dwdot(merr->p,parms->maos.mcc,merr->p);
		if(res_ngs>ngsol*100){
		    dfree(res); res=NULL;
		    break;
		}
		{
		    res->p[0]+=res_ngs;
		    res->p[1]+=dwdot2(merr->p,parms->maos.mcc_tt,merr->p);
		    double dot_oa=dwdot(merr->p, parms->maos.mcc_oa, merr->p);
		    double dot_res_ideal=dwdot(merr->p, parms->maos.mcc_oa, PCOL(mideal,istep));
		    double dot_res_oa=0;
		    for(int imod=0; imod<nmod; imod++){
			dot_res_oa+=merr->p[imod]*IND(mideal_oa,imod,istep);
		    }
		    res->p[2]+=dot_oa-2*dot_res_ideal+2*dot_res_oa;
		    res->p[4]+=dot_oa;
		}
		{
		    double dot_oa_tt=dwdot2(merr->p, parms->maos.mcc_oa_tt, merr->p);
		    /*Notice that mcc_oa_tt2 is 2x5 marix. */
		    double dot_res_ideal_tt=dwdot(merr->p, parms->maos.mcc_oa_tt2, PCOL(mideal,istep));
		    double dot_res_oa_tt=0;
		    for(int imod=0; imod<2; imod++){
			dot_res_oa_tt+=merr->p[imod]*IND(mideal_oa,imod,istep);
		    }
		    res->p[3]+=dot_oa_tt-2*dot_res_ideal_tt+2*dot_res_oa_tt;
		    res->p[5]+=dot_oa_tt;
		}
	    }//if evl

	    if(istep<phystart || phystart<0){
		/*Ztilt, noise free simulation for acquisition. */
		dmm(&zgradc->m, 1, aster->gm, merr, "nn", 1);/*grad due to residual NGS mode. */
		for(int iwfs=0; iwfs<aster->nwfs; iwfs++){
		    const int ipowfs=aster->wfs[iwfs].ipowfs;
		    const long ng=parms->maos.nsa[ipowfs]*2;
		    for(long ig=0; ig<ng; ig++){
			zgradc->p[iwfs]->p[ig]+=aster->wfs[iwfs].ztiltout->p[istep*ng+ig];
		    }
		}
	
		for(int iwfs=0; iwfs<aster->nwfs; iwfs++){
		    int dtrati=(multirate?(int)dtrats->p[iwfs]:dtratc);
		    if((istep+1) % dtrati==0){
			dadd(&gradout->p[iwfs], 0, zgradc->p[iwfs], 1./dtrati);
			dzero(zgradc->p[iwfs]);
			if(noisy){
			    int idtrati=(multirate?(int)aster->idtrats->p[iwfs]:idtratc);
			    dmat *nea=aster->wfs[iwfs].pistat->sanea->p[idtrati];
			    for(int i=0; i<nea->nx; i++){
				gradout->p[iwfs]->p[i]+=nea->p[i]*randn(&aster->rand);
			    }
			}
			pmerrm=merrm;//record output.
		    }
		}
	    }else{
		/*Accumulate PSF intensities*/
		for(long iwfs=0; iwfs<aster->nwfs; iwfs++){
		    const double thetax=aster->wfs[iwfs].thetax;
		    const double thetay=aster->wfs[iwfs].thetay;
		    const int ipowfs=aster->wfs[iwfs].ipowfs;
		    const long nsa=parms->maos.nsa[ipowfs];
		    ccell* wvfout=aster->wfs[iwfs].wvfout[istep];
		    for(long iwvl=0; iwvl<nwvl; iwvl++){
			double wvl=parms->maos.wvl[iwvl];
			for(long isa=0; isa<nsa; isa++){
			    ccp(&wvfc->p[iwfs], IND(wvfout,isa,iwvl));
			    /*Apply NGS mode error to PSF. */
			    ngsmod2wvf(wvfc->p[iwfs], wvl, merr, powfs+ipowfs, isa,
				       thetax, thetay, parms);
			    cembedc(wvf->p[iwfs],wvfc->p[iwfs],0,C_FULL);
			    cfft2(wvf->p[iwfs],-1);
			    /*peak in corner. */
			    cabs22d(&psf[iwfs]->p[isa+nsa*iwvl], 1., wvf->p[iwfs], 1.);
			}/*isa */
		    }/*iwvl */
		}/*iwfs */
	
		/*Form detector image from accumulated PSFs*/
		double igrad[2];
		for(long iwfs=0; iwfs<aster->nwfs; iwfs++){
		    int dtrati=dtratc, idtrat=idtratc;
		    if(multirate){//multirate
			idtrat=aster->idtrats->p[iwfs];
			dtrati=dtrats->p[iwfs];
		    }
		    if((istep+1) % dtrati == 0){/*has output */
			dcellzero(ints[iwfs]);
			const int ipowfs=aster->wfs[iwfs].ipowfs;
			const long nsa=parms->maos.nsa[ipowfs];
			for(long isa=0; isa<nsa; isa++){
			    for(long iwvl=0; iwvl<nwvl; iwvl++){
				double siglev=aster->wfs[iwfs].siglev->p[iwvl];
				ccpd(&otf->p[iwfs],psf[iwfs]->p[isa+nsa*iwvl]);
				cfft2i(otf->p[iwfs], 1); /*turn to OTF, peak in corner */
				ccwm(otf->p[iwfs], powfs[ipowfs].dtf[iwvl].nominal);
				cfft2(otf->p[iwfs], -1);
				dspmulcreal(ints[iwfs]->p[isa]->p, powfs[ipowfs].dtf[iwvl].si, 
					   otf->p[iwfs]->p, siglev);
			    }
		
			    /*Add noise and apply matched filter. */
#if _OPENMP >= 200805 
#pragma omp critical 
#endif
			    switch(noisy){
			    case 0:/*no noise at all. */
				break;
			    case 1:/*both poisson and read out noise. */
				{
				    double bkgrnd=aster->wfs[iwfs].bkgrnd*dtrati;
				    addnoise(ints[iwfs]->p[isa], &aster->rand, bkgrnd, bkgrnd, 0,0,IND(rnefs,idtrat,ipowfs));
				}
				break;
			    case 2:/*there is still poisson noise. */
				addnoise(ints[iwfs]->p[isa], &aster->rand, 0, 0, 0,0,0);
				break;
			    default:
				error("Invalid noisy\n");
			    }
		
			    igrad[0]=0;
			    igrad[1]=0;
			    double pixtheta=parms->skyc.pixtheta[ipowfs];
			    if(parms->skyc.mtch){
				dmulvec(igrad, mtche[iwfs]->p[isa], ints[iwfs]->p[isa]->p, 1);
			    }
			    if(!parms->skyc.mtch || fabs(igrad[0])>pixtheta || fabs(igrad[1])>pixtheta){
				if(!parms->skyc.mtch){
				    warning2("fall back to cog\n");
				}else{
				    warning_once("mtch is out of range\n");
				}
				dcog(igrad, ints[iwfs]->p[isa], 0, 0, 0, 3*IND(rnefs,idtrat,ipowfs), 0); 
				igrad[0]*=pixtheta;
				igrad[1]*=pixtheta;
			    }
			    gradout->p[iwfs]->p[isa]=igrad[0];
			    gradout->p[iwfs]->p[isa+nsa]=igrad[1];
			}/*isa */
			pmerrm=merrm;
			dcellzero(psf[iwfs]);/*reset accumulation.*/
		    }/*if iwfs has output*/
		}/*for wfs*/
	    }/*if phystart */
	    //output to mreal after using it to ensure two cycle delay.
	    if(st2t){//Type I or II control.
		if(st2t->mint->p[0]){//has output.
		    dcp(&mreal, st2t->mint->p[0]->p[0]);
		}
	    }else{//LQG control
		kalman_output(kalman, &mreal, 0, 1);
	    }
	    if(kalman){//LQG control
		int indk=0;
		//Form PSOL grads and obtain index to LQG M
		for(int iwfs=0; iwfs<aster->nwfs; iwfs++){
		    int dtrati=(multirate?(int)dtrats->p[iwfs]:dtratc);
		    if((istep+1) % dtrati==0){
			indk|=1<<iwfs;
			dmm(&gradout->p[iwfs], 1, aster->g->p[iwfs], mpsol->p[iwfs], "nn", 1./dtrati);
			dzero(mpsol->p[iwfs]);
		    }
		}
		if(indk){
		    kalman_update(kalman, gradout->m, indk-1);
		}
	    }else if(st2t){
		if(pmerrm){
		    dmm(&merrm->p[0], 0, pgm, gradout->m, "nn", 1);	
		}
		servo_filter(st2t, pmerrm);//do even if merrm is zero. to simulate additional latency
	    }
	    if(parms->skyc.dbg){
		memcpy(PCOL(gradsave, istep), gradout->m->p, sizeof(double)*gradsave->nx);
	    }
	}/*istep; */
    }
    if(parms->skyc.dbg){
	int dtrati=(multirate?(int)dtrats->p[0]:dtratc);
	writebin(gradsave,"%s/skysim_grads_aster%d_dtrat%d",dirsetup, aster->iaster,dtrati);
	writebin(mres,"%s/skysim_sim_mres_aster%d_dtrat%d",dirsetup,aster->iaster,dtrati);
    }
  
    dfree(mreal);
    dcellfree(mpsol);
    dfree(merr);
    dcellfree(merrm);
    dcellfree(zgradc);
    dcellfree(gradout);
    dfree(gradsave);
    if(hasphy){
	dcellfreearr(psf, aster->nwfs);
	dcellfreearr(ints, aster->nwfs);
        ccellfree(wvf);
	ccellfree(wvfc);
	ccellfree(otf);
	free(mtche);
    }
    servo_free(st2t);
    /*dfree(mres); */
    if(mresout) {
	*mresout=mres;
    }else{
	dfree(mres);
    }
    dscale(res, 1./((nstep-parms->skyc.evlstart)*parms->skyc.navg));
    return res;
}
Esempio n. 16
0
int main ( void )

/******************************************************************************/
/* 
  Purpose:

    MAIN is the main program for FFT_SERIAL.

  Discussion:

    The "complex" vector A is actually stored as a double vector B.

    The "complex" vector entry A[I] is stored as:

      B[I*2+0], the real part,
      B[I*2+1], the imaginary part.

  Modified:

    23 March 2009

  Author:

    Original C version by Wesley Petersen.
    This C version by John Burkardt.

  Reference:

    Wesley Petersen, Peter Arbenz, 
    Introduction to Parallel Computing - A practical guide with examples in C,
    Oxford University Press,
    ISBN: 0-19-851576-6,
    LC: QA76.58.P47.
*/
{
  double ctime;
  double ctime1;
  double ctime2;
  double error;
  int first;
  double flops;
  double fnm1;
  int i;
  int icase;
  int it;
  int ln2;
  double mflops;
  int n;
  int nits = 10000;
  static double seed;
  double sgn;
  double *w;
  double *x;
  double *y;
  double *z;
  double z0;
  double z1;

  timestamp ( );
  printf ( "\n" );
  printf ( "FFT_SERIAL\n" );
  printf ( "  C version\n" );
  printf ( "\n" );
  printf ( "  Demonstrate an implementation of the Fast Fourier Transform\n" );
  printf ( "  of a complex data vector.\n" );
/*
  Prepare for tests.
*/
  printf ( "\n" );
  printf ( "  Accuracy check:\n" );
  printf ( "\n" );
  printf ( "    FFT ( FFT ( X(1:N) ) ) == N * X(1:N)\n" );
  printf ( "\n" );
  printf ( "             N      NITS    Error         Time          Time/Call     MFLOPS\n" );
  printf ( "\n" );

  seed  = 331.0;
  n = 1;
/*
  LN2 is the log base 2 of N.  Each increase of LN2 doubles N.
*/
  for ( ln2 = 1; ln2 <= 20; ln2++ )
  {
    n = 2 * n;
/*
  Allocate storage for the complex arrays W, X, Y, Z.  

  We handle the complex arithmetic,
  and store a complex number as a pair of doubles, a complex vector as a doubly
  dimensioned array whose second dimension is 2. 
*/
    w = ( double * ) malloc (     n * sizeof ( double ) );
    x = ( double * ) malloc ( 2 * n * sizeof ( double ) );
    y = ( double * ) malloc ( 2 * n * sizeof ( double ) );
    z = ( double * ) malloc ( 2 * n * sizeof ( double ) );

    first = 1;

    for ( icase = 0; icase < 2; icase++ )
    {
      if ( first )
      {
        for ( i = 0; i < 2 * n; i = i + 2 )
        {
          z0 = ggl ( &seed );
          z1 = ggl ( &seed );
          x[i] = z0;
          z[i] = z0;
          x[i+1] = z1;
          z[i+1] = z1;
        }
      } 
      else
      {
        for ( i = 0; i < 2 * n; i = i + 2 )
        {
          z0 = 0.0;              /* real part of array */
          z1 = 0.0;              /* imaginary part of array */
          x[i] = z0;
          z[i] = z0;           /* copy of initial real data */
          x[i+1] = z1;
          z[i+1] = z1;         /* copy of initial imag. data */
        }
      }
/* 
  Initialize the sine and cosine tables.
*/
      cffti ( n, w );
/* 
  Transform forward, back 
*/
      if ( first )
      {
        sgn = + 1.0;
        cfft2 ( n, x, y, w, sgn );
        sgn = - 1.0;
        cfft2 ( n, y, x, w, sgn );
/* 
  Results should be same as the initial data multiplied by N.
*/
        fnm1 = 1.0 / ( double ) n;
        error = 0.0;
        for ( i = 0; i < 2 * n; i = i + 2 )
        {
          error = error 
          + pow ( z[i]   - fnm1 * x[i], 2 )
          + pow ( z[i+1] - fnm1 * x[i+1], 2 );
        }
        error = sqrt ( fnm1 * error );
        printf ( "  %12d  %8d  %12e", n, nits, error );
        first = 0;
      }
      else
      {
        ctime1 = cpu_time ( );
        for ( it = 0; it < nits; it++ )
        {
          sgn = + 1.0;
          cfft2 ( n, x, y, w, sgn );
          sgn = - 1.0;
          cfft2 ( n, y, x, w, sgn );
        }
        ctime2 = cpu_time ( );
        ctime = ctime2 - ctime1;

        flops = 2.0 * ( double ) nits * ( 5.0 * ( double ) n * ( double ) ln2 );

        mflops = flops / 1.0E+06 / ctime;

        printf ( "  %12e  %12e  %12f\n", ctime, ctime / ( double ) ( 2 * nits ), mflops );
      }
    }
    if ( ( ln2 % 4 ) == 0 ) 
    {
      nits = nits / 10;
    }
    if ( nits < 1 ) 
    {
      nits = 1;
    }
    free ( w );
    free ( x );
    free ( y );
    free ( z );
  }
  printf ( "\n" );
  printf ( "FFT_SERIAL:\n" );
  printf ( "  Normal end of execution.\n" );
  printf ( "\n" );
  timestamp ( );

  return 0;
}
Esempio n. 17
0
int lrexp(sf_complex **img, sf_complex **dat, bool adj, sf_complex **lt, sf_complex **rt, sf_complex *ww, geopar geop, int pad1, bool verb, int snap, sf_complex ***wvfld)
/*< zero-offset exploding reflector modeling/migration >*/
{
    int it, nt, ix, nx, nx2, iz, nz, nz2, nzx2,  wfnt, wfit;
    int im, i, j, m2, ik, nk;
    float dt, dx, dz, ox;
    sf_complex *curr, **wave, *cwave, *cwavem, c;
    sf_complex *currm;

    nx  = geop->nx;
    nz  = geop->nz;
    dx  = geop->dx;
    dz  = geop->dz;
    ox  = geop->ox;
    nt  = geop->nt;
    dt  = geop->dt;
    snap= geop->snap;
    nzx2= geop->nzx2;
    m2  = geop->m2;
    wfnt= geop->wfnt;

    nk = cfft2_init(pad1,nz,nx,&nz2,&nx2);
    if (nk!=geop->nk) sf_error("nk discrepancy!");

    curr = sf_complexalloc(nzx2);
    cwave  = sf_complexalloc(nk);
    wave = sf_complexalloc2(nzx2,m2);
    if (adj) {
	currm  = sf_complexalloc(nzx2);
	icfft2_allocate(cwave);
    } else {
	cwavem = sf_complexalloc(nk);
	icfft2_allocate(cwavem);
    }

#ifdef _OPENMP
#pragma omp parallel for private(iz)
#endif
    for (iz=0; iz < nzx2; iz++) {
	curr[iz] = sf_cmplx(0.,0.);
    }

    if (adj) { /* migration <- read wavefield */
#ifdef _OPENMP
#pragma omp parallel for private(ix,iz)
#endif
	for (ix=0; ix < nx; ix++) {
	    for (iz=0; iz < nz; iz++) {
		curr[iz+ix*nz2]=dat[ix][iz];
	    }
	}
	wfit = (int)(nt-1)/snap; // wfnt-1
	/* time stepping */
	for (it=nt-1; it > -1; it--) {
	    if (verb) sf_warning("it=%d;",it);
	
	    /* matrix multiplication */
	    for (im = 0; im < m2; im++) {
#ifdef _OPENMP
#pragma omp parallel for private(ix,iz,i,j) shared(currm,lt,curr)
#endif
		for (ix = 0; ix < nx; ix++) {
		    for (iz=0; iz < nz; iz++) {
			i = iz+ix*nz;  /* original grid */
			j = iz+ix*nz2; /* padded grid */
#ifdef SF_HAS_COMPLEX_H
			currm[j] = conjf(lt[im][i])*curr[j];
#else
			currm[j] = sf_cmul(conjf(lt[im][i]), curr[j]);
#endif
		    }
		}
		cfft2(currm,wave[im]);
	    }
#ifdef _OPENMP
#pragma omp parallel for private(ik,im,c) shared(wave,rt,cwave)
#endif	    
	    for (ik = 0; ik < nk; ik++) {
		c = sf_cmplx(0.,0.);
		for (im = 0; im < m2; im++) {
#ifdef SF_HAS_COMPLEX_H
		    c += wave[im][ik]*conjf(rt[ik][im]);
#else
		    c += sf_cmul(wave[im][ik],conjf(rt[ik][im])); //complex multiplies complex
#endif
		}
		cwave[ik] = c;
	    }

	    icfft2(curr,cwave);

	    if (snap > 0 && it%snap == 0) {
#ifdef _OPENMP
#pragma omp parallel for private(ix,iz,j)
#endif
		for ( ix = 0; ix < nx; ix++) {
		    for ( iz = 0; iz<nz; iz++ ) { 
			j = iz+ix*nz2; /* padded grid */
			wvfld[wfit][ix][iz] = curr[j];
		    }
		}
		wfit--;
	    }
	} /*time iteration*/
	/*generate image*/
#ifdef _OPENMP
#pragma omp parallel for private(ix,iz)
#endif
	for (ix=0; ix < nx; ix++) {
	    for (iz=0; iz < nz; iz++) {
		img[ix][iz] = curr[iz+ix*nz2];
	    }
	}
    } else { /* modeling -> write data */
	/*point source*/
	wfit = 0;
	/* time stepping */
	for (it=0; it < nt; it++) {
	    if (verb) sf_warning("it=%d;",it);

	    /* matrix multiplication */
	    cfft2(curr,cwave);
	    
	    for (im = 0; im < m2; im++) {
#ifdef _OPENMP
#pragma omp parallel for private(ik)
#endif
		for (ik = 0; ik < nk; ik++) {
#ifdef SF_HAS_COMPLEX_H
		    cwavem[ik] = cwave[ik]*rt[ik][im];
#else
		    cwavem[ik] = sf_cmul(cwave[ik],rt[ik][im]);
#endif
		}
		icfft2(wave[im],cwavem);
	    }
#ifdef _OPENMP
#pragma omp parallel for private(ix,iz,i,j,im,c) shared(curr,lt,wave)
#endif
	    for (ix = 0; ix < nx; ix++) {
		for (iz=0; iz < nz; iz++) {
		    i = iz+ix*nz;  /* original grid */
		    j = iz+ix*nz2; /* padded grid */

#ifdef SF_HAS_COMPLEX_H
		c = ww[it] * crealf(img[ix][iz]); // source term
#else
		c = sf_crmul(ww[it], crealf(img[ix][iz])); // source term
#endif
		    
		    for (im = 0; im < m2; im++) {
#ifdef SF_HAS_COMPLEX_H
			c += lt[im][i]*wave[im][j];
#else
			c += sf_cmul(lt[im][i], wave[im][j]);
#endif	    
		    }
		    curr[j] = c;
		}
	    }
	    /* record wavefield*/
#ifdef _OPENMP
#pragma omp parallel for private(ix,iz)
#endif
	for (ix=0; ix < nx; ix++) {
	    for (iz=0; iz < nz; iz++) {
		dat[ix][iz] = curr[iz+ix*nz2];
	    }
	}
	    if (snap > 0 && it%snap == 0) {
#ifdef _OPENMP
#pragma omp parallel for private(ix,iz,j)
#endif
		for ( ix = 0; ix < nx; ix++) {
		    for ( iz = 0; iz<nz; iz++ ) { 
			j = iz+ix*nz2; /* padded grid */
			wvfld[wfit][ix][iz] = curr[j];
		    }
		}
		wfit++;
	    }
	}
    }
    if (verb) sf_warning(".");
    cfft2_finalize();

    return 0;
}
Esempio n. 18
0
/**
   Setup the detector transfer functions. See maos/setup_powfs.c
 */
static void setup_powfs_dtf(POWFS_S *powfs, const PARMS_S* parms){
    const int npowfs=parms->maos.npowfs;
    for(int ipowfs=0; ipowfs<npowfs; ipowfs++){
	const int ncomp=parms->maos.ncomp[ipowfs];
	const int ncomp2=ncomp>>1;
	const int embfac=parms->maos.embfac[ipowfs];
	const int pixpsa=parms->skyc.pixpsa[ipowfs];
	const double pixtheta=parms->skyc.pixtheta[ipowfs];
	const double blur=parms->skyc.pixblur[ipowfs]*pixtheta;
	const double e0=exp(-2*M_PI*M_PI*blur*blur);
	const double dxsa=parms->maos.dxsa[ipowfs];
	const double pixoffx=parms->skyc.pixoffx[ipowfs];
	const double pixoffy=parms->skyc.pixoffy[ipowfs];
	const double pxo=-(pixpsa/2-0.5+pixoffx)*pixtheta;
	const double pyo=-(pixpsa/2-0.5+pixoffy)*pixtheta;
	loc_t *loc_ccd=mksqloc(pixpsa, pixpsa, pixtheta, pixtheta, pxo, pyo);
	powfs[ipowfs].dtf=mycalloc(parms->maos.nwvl,DTF_S);
	for(int iwvl=0; iwvl<parms->maos.nwvl; iwvl++){
	    const double wvl=parms->maos.wvl[iwvl];
	    const double dtheta=wvl/(dxsa*embfac);
	    const double pdtheta=pixtheta*pixtheta/(dtheta*dtheta);
	    const double du=1./(dtheta*ncomp);
	    const double du2=du*du;
	    const double dupth=du*pixtheta;
	    cmat *nominal=cnew(ncomp,ncomp);
	    //cfft2plan(nominal,-1);
	    //cfft2plan(nominal,1);
	    cmat* pn=nominal;
	    const double theta=0;
	    const double ct=cos(theta);
	    const double st=sin(theta);
	    for(int iy=0; iy<ncomp; iy++){
		int jy=iy-ncomp2;
		for(int ix=0; ix<ncomp; ix++){
		    int jx=ix-ncomp2;
		    double ir=ct*jx+st*jy;
		    double ia=-st*jx+ct*jy;
		    IND(pn,ix,iy)=sinc(ir*dupth)*sinc(ia*dupth)
			*pow(e0,ir*ir*du2)*pow(e0,ia*ia*du2)
			*pdtheta;
		}
	    }
	    if(parms->skyc.fnpsf1[ipowfs]){
		warning("powfs %d has additional otf to be multiplied\n", ipowfs);
		dcell *psf1c=dcellread("%s", parms->skyc.fnpsf1[ipowfs]);
		dmat *psf1=NULL;
		if(psf1c->nx == 1){
		    psf1=dref(psf1c->p[0]);
		}else if(psf1c->nx==parms->maos.nwvl){
		    psf1=dref(psf1c->p[iwvl]);
		}else{
		    error("skyc.fnpsf1 has wrong dimension\n");
		}
		dcellfree(psf1c);
		if(psf1->ny!=2){
		    error("skyc.fnpsf1 has wrong dimension\n");
		}
		dmat *psf1x=dnew_ref(psf1->nx, 1, psf1->p);
		dmat *psf1y=dnew_ref(psf1->nx, 1, psf1->p+psf1->nx);
		dmat *psf2x=dnew(ncomp*ncomp, 1);
		for(int iy=0; iy<ncomp; iy++){
		    int jy=iy-ncomp2;
		    for(int ix=0; ix<ncomp; ix++){
			int jx=ix-ncomp2;
			psf2x->p[ix+iy*ncomp]=sqrt(jx*jx+jy*jy)*dtheta;
		    }
		}
		info("powfs %d, iwvl=%d, dtheta=%g\n", ipowfs, iwvl, dtheta*206265000);
		writebin(psf2x, "powfs%d_psf2x_%d", ipowfs,iwvl);
		dmat *psf2=dinterp1(psf1x, psf1y, psf2x, 0);
		normalize_sum(psf2->p, psf2->nx*psf2->ny, 1);
		psf2->nx=ncomp; psf2->ny=ncomp;
		writebin(psf2, "powfs%d_psf2_%d", ipowfs,iwvl);
		cmat *otf2=cnew(ncomp, ncomp);
		//cfft2plan(otf2, -1);
		ccpd(&otf2, psf2);//peak in center
		cfftshift(otf2);//peak in corner
		cfft2(otf2, -1);
		cfftshift(otf2);//peak in center
		writebin(otf2, "powfs%d_otf2_%d", ipowfs, iwvl);
		writebin(nominal, "powfs%d_dtf%d_nominal_0",ipowfs,iwvl);
		for(int i=0; i<ncomp*ncomp; i++){
		    nominal->p[i]*=otf2->p[i];
		}
		writebin(nominal, "powfs%d_dtf%d_nominal_1",ipowfs,iwvl);
		dfree(psf1x);
		dfree(psf1y);
		dfree(psf2x);
		dfree(psf1);
		cfree(otf2);
		dfree(psf2);
	    }
	    cfftshift(nominal);//peak in corner
	    cfft2(nominal,-1);
	    cfftshift(nominal);//peak in center
	    cfft2i(nominal,1);
	    warning_once("double check nominal for off centered skyc.fnpsf1\n");
	    /*This nominal will multiply to OTF with peak in corner. But after
	      inverse fft, peak will be in center*/
	    ccp(&powfs[ipowfs].dtf[iwvl].nominal, nominal);
	    cfree(nominal);

	    loc_t *loc_psf=mksqloc(ncomp, ncomp, dtheta, dtheta, -ncomp2*dtheta, -ncomp2*dtheta);
	    powfs[ipowfs].dtf[iwvl].si=mkh(loc_psf,loc_ccd,0,0,1);
	    locfree(loc_psf);
	    if(parms->skyc.dbg){
		writebin(powfs[ipowfs].dtf[iwvl].nominal,
		       "%s/powfs%d_dtf%d_nominal",dirsetup,ipowfs,iwvl);
		writebin(powfs[ipowfs].dtf[iwvl].si,
			"%s/powfs%d_dtf%d_si",dirsetup,ipowfs,iwvl);
	    }
	    powfs[ipowfs].dtf[iwvl].U=cnew(ncomp,1);
	    dcomplex *U=powfs[ipowfs].dtf[iwvl].U->p;

	    for(int ix=0; ix<ncomp; ix++){
		int jx=ix<ncomp2?ix:(ix-ncomp);
		U[ix]=COMPLEX(0, -2.*M_PI*jx*du);
	    }
	}/*iwvl */
	locfree(loc_ccd);
    }/*ipowfs */
}
Esempio n. 19
0
int lrosback2(sf_complex **img, sf_complex ***wavfld, float **sill, sf_complex **rcd, bool adj,
	      bool verb, bool wantwf, sf_complex **lt, sf_complex **rt, int m2,
              geopar geop, int pad1, sf_complex ***wavfld2)  
/*< low-rank one-step backward propagation + imaging >*/
{
    int it,iz,im,ik,ix,i,j;     /* index variables */
    int nxb,nzb,dx,dz,gpz,gpx,gpl,snpint,dt,wfit;
    int nt,nz,nx, nk, nzx, nz2, nx2, nzx2;
    sf_complex c;
    sf_complex *cwave, *cwavem, *currm;
    sf_complex **wave, *curr;
    sf_complex **ccr;

    nx = geop->nx;
    nz = geop->nz;
    nxb = geop->nxb;
    nzb = geop->nzb;
    dx = geop->dx;
    dz = geop->dz;
    
    gpz  = geop->gpz;
    gpx  = geop->gpx;
    gpl  = geop->gpl;
    snpint = geop->snpint;
    
    nt = geop->nt;
    dt = geop->dt;

    ccr = sf_complexalloc2(nz, nx);

    nk = cfft2_init(pad1,nzb,nxb,&nz2,&nx2);
    nzx = nzb*nxb;
    nzx2 = nz2*nx2;

    curr = sf_complexalloc(nzx2);
    cwave  = sf_complexalloc(nk);
    cwavem = sf_complexalloc(nk);
    wave = sf_complexalloc2(nzx2,m2);

    if (!adj) {
	currm  = sf_complexalloc(nzx2);
	icfft2_allocate(cwave);
    } else {
	cwavem = sf_complexalloc(nk);
	icfft2_allocate(cwavem);
    }

#ifdef _OPENMP
#pragma omp parallel for private(iz)
#endif
    for (iz=0; iz < nzx2; iz++) {
	curr[iz] = sf_cmplx(0.,0.);
    }

#ifdef _OPENMP
#pragma omp parallel for private(ix, iz)
#endif
    for (ix = 0; ix < nx; ix++) {
	for (iz = 0; iz < nz; iz++) {
	  ccr[ix][iz] = sf_cmplx(0.,0.);
	}
    }

    if (adj) { /* migration */
      /* step backward in time */
      /*Main loop*/
      wfit = (int)(nt-1)/snpint;
      for (it = nt-1; it>=0; it--) {
	if  (verb) sf_warning("Backward receiver it=%d/%d;", it, nt-1);
#ifdef _OPENMP
#pragma omp parallel for private(ix,j)
#endif
        for (ix=0; ix<gpl; ix++)  {
	  j = (gpz+geop->top)+(ix+gpx+geop->lft)*nz2; /* padded grid */
	  curr[j]+=rcd[ix][it]; /* data injection */
	}
	/*matrix multiplication*/
	cfft2(curr,cwave);
	for (im = 0; im < m2; im++) {
#ifdef _OPENMP
#pragma omp parallel for private(ik)
#endif
	  for (ik = 0; ik < nk; ik++) {
#ifdef SF_HAS_COMPLEX_H
	    cwavem[ik] = cwave[ik]*rt[ik][im];
#else
	    cwavem[ik] = sf_cmul(cwave[ik],rt[ik][im]);
#endif
	  }
	  icfft2(wave[im],cwavem);
	}
	
#ifdef _OPENMP
#pragma omp parallel for private(ix,iz,i,j,im,c) shared(curr,lt,wave)
#endif
	for (ix = 0; ix < nxb; ix++) {
	  for (iz=0; iz < nzb; iz++) {
	    i = iz+ix*nzb;  /* original grid */
	    j = iz+ix*nz2; /* padded grid */
	    c = sf_cmplx(0.,0.); // initialize
	    for (im = 0; im < m2; im++) {
#ifdef SF_HAS_COMPLEX_H
	      c += lt[im][i]*wave[im][j];
#else
	      c += sf_cmul(lt[im][i], wave[im][j]);
#endif
	    }
	    curr[j] = c;
	  }
	}

        if ( wantwf && it%snpint == 0 ) {
#ifdef _OPENMP
#pragma omp parallel for private(ix,iz,j)
#endif
	  for ( ix = 0; ix < nx; ix++) {
	    for ( iz = 0; iz<nz; iz++ ) { 
	      j = (iz+geop->top)+(ix+geop->lft)*nz2; /* padded grid */
	      wavfld2[wfit][ix][iz] = curr[j];
	    }
	  }
        }
	/*cross-correlation imaging condition*/
	if (it%snpint == 0 ) {
#ifdef _OPENMP
#pragma omp parallel for private(ix,iz,j)
#endif
	  for (ix=0; ix<nx; ix++) {
	    for (iz=0; iz<nz; iz++) {
	      j = (iz+geop->top)+(ix+geop->lft)*nz2; /* padded grid */
#ifdef SF_HAS_COMPLEX_H
	      ccr[ix][iz] += conjf(wavfld[wfit][ix][iz])*curr[j];
#else
	      ccr[ix][iz] += sf_cmul(conjf(wavfld[wfit][ix][iz]),curr[j]);
#endif
	    }
	  }
	  wfit--;
	}
      } /*Main loop*/
      if (verb) sf_warning(".");
#ifdef _OPENMP
#pragma omp parallel for private(ix, iz)
#endif    
      for (ix=0; ix<nx; ix++) {
	for (iz=0; iz<nz; iz++) {
#ifdef SF_HAS_COMPLEX_H
	  img[ix][iz] = ccr[ix][iz]/(sill[ix][iz]+SF_EPS);
#else
	  img[ix][iz] = sf_crmul(ccr[ix][iz],1./(sill[ix][iz]+SF_EPS));
#endif
	}
      } 
    } else { /* modeling */
      /* adjoint of source illumination */
#ifdef _OPENMP
#pragma omp parallel for private(ix, iz)
#endif    
      for (ix=0; ix<nx; ix++) {
	for (iz=0; iz<nz; iz++) {
#ifdef SF_HAS_COMPLEX_H
	  ccr[ix][iz] = img[ix][iz]/(sill[ix][iz]+SF_EPS);
#else
	  ccr[ix][iz] = sf_crmul(img[ix][iz],1./(sill[ix][iz]+SF_EPS));
#endif
	}
      } 
      /* step forward in time */
      /*Main loop*/
      wfit=0;
      for (it=0; it<nt; it++) {
	if (verb) sf_warning("Forward receiver it=%d/%d;", it, nt-1);
        if ( wantwf && it%snpint == 0 ) {
#ifdef _OPENMP
#pragma omp parallel for private(ix,iz,j)
#endif
	  for ( ix = 0; ix < nx; ix++) {
	    for ( iz = 0; iz<nz; iz++ ) { 
	      j = (iz+geop->top)+(ix+geop->lft)*nz2; /* padded grid */
	      wavfld2[wfit][ix][iz] = curr[j];
	    }
	  }
        }
	/*adjoint of cross-correlation imaging condition*/
	if (it%snpint == 0 ) {
#ifdef _OPENMP
#pragma omp parallel for private(ix,iz,j)
#endif
	  for (ix=0; ix<nx; ix++) {
	    for (iz=0; iz<nz; iz++) {
	      j = (iz+geop->top)+(ix+geop->lft)*nz2; /* padded grid */
#ifdef SF_HAS_COMPLEX_H
	      curr[j] += (wavfld[wfit][ix][iz])*ccr[ix][iz];//adjoint of ccr[ix][iz] += conjf(wavfld[wfit][ix][iz])*curr[j]; ???
#else
	      curr[j] += sf_cmul((wavfld[wfit][ix][iz]),ccr[ix][iz]);
#endif
	    }
	  }
	  wfit++;
	}
	/*matrix multiplication*/
	for (im = 0; im < m2; im++) {
#ifdef _OPENMP
#pragma omp parallel for private(ix,iz,i,j) shared(currm,lt,curr)
#endif
	  for (ix = 0; ix < nxb; ix++) {
	    for (iz=0; iz < nzb; iz++) {
	      i = iz+ix*nzb;  /* original grid */
	      j = iz+ix*nz2; /* padded grid */
#ifdef SF_HAS_COMPLEX_H
	      currm[j] = conjf(lt[im][i])*curr[j];
#else
	      currm[j] = sf_cmul(conjf(lt[im][i]), curr[j]);
#endif
	    }
	  }
	  cfft2(currm,wave[im]);
	}
#ifdef _OPENMP
#pragma omp parallel for private(ik,im,c) shared(wave,rt,cwave)
#endif	    
	for (ik = 0; ik < nk; ik++) {
	  c = sf_cmplx(0.,0.);
	  for (im = 0; im < m2; im++) {
#ifdef SF_HAS_COMPLEX_H
	    c += wave[im][ik]*conjf(rt[ik][im]);
#else
	    c += sf_cmul(wave[im][ik],conjf(rt[ik][im])); //complex multiplies complex
#endif
	  }
	  cwave[ik] = c;
	}
	icfft2(curr,cwave);

#ifdef _OPENMP
#pragma omp parallel for private(ix,j)
#endif
        for (ix=0; ix<gpl; ix++)  {
	  j = (gpz+geop->top)+(ix+gpx+geop->lft)*nz2; /* padded grid */
	  rcd[ix][it]=curr[j];
	}
      } /*Main loop*/
    }
    cfft2_finalize();
    return 0;
}
Esempio n. 20
0
int main(int argc, char* argv[])
{
    bool verb;        
    int it,iz,im,ik,ix,i,j;     /* index variables */
    int nt,nz,nx, m2, nk, nzx, nz2, nx2, nzx2, n2, pad1;
    sf_complex c;

    float  *rr;      /* I/O arrays*/
    sf_complex *ww, *cwave, *cwavem;

    sf_complex **wave, *curr;
    float *rcurr;
    
    sf_file Fw,Fr,Fo;    /* I/O files */
    sf_axis at,az,ax;    /* cube axes */

    sf_complex **lt, **rt;
    sf_file left, right;


    sf_init(argc,argv);
    if(!sf_getbool("verb",&verb)) verb=false; /* verbosity */

    /* setup I/O files */
    Fw = sf_input ("in" );
    Fo = sf_output("out");
    Fr = sf_input ("ref");

    if (SF_COMPLEX != sf_gettype(Fw)) sf_error("Need complex input");
    if (SF_FLOAT != sf_gettype(Fr)) sf_error("Need float ref");

    sf_settype(Fo,SF_FLOAT);

    /* Read/Write axes */
    at = sf_iaxa(Fw,1); nt = sf_n(at); 
    az = sf_iaxa(Fr,1); nz = sf_n(az); 
    ax = sf_iaxa(Fr,2); nx = sf_n(ax); 

    sf_oaxa(Fo,az,1); 
    sf_oaxa(Fo,ax,2); 
    sf_oaxa(Fo,at,3);
    
    if (!sf_getint("pad1",&pad1)) pad1=1; /* padding factor on the first axis */

    nk = cfft2_init(pad1,nz,nx,&nz2,&nx2);

    nzx = nz*nx;
    nzx2 = nz2*nx2;

    /* propagator matrices */
    left = sf_input("left");
    right = sf_input("right");

    if (!sf_histint(left,"n1",&n2) || n2 != nzx) sf_error("Need n1=%d in left",nzx);
    if (!sf_histint(left,"n2",&m2))  sf_error("Need n2= in left");
    
    if (!sf_histint(right,"n1",&n2) || n2 != m2) sf_error("Need n1=%d in right",m2);
    if (!sf_histint(right,"n2",&n2) || n2 != nk) sf_error("Need n2=%d in right",nk);
//    if (!sf_histint(Fw,"n1",&nxx)) sf_error("No n1= in input");
  
    lt = sf_complexalloc2(nzx,m2);
    rt = sf_complexalloc2(m2,nk);

    sf_complexread(lt[0],nzx*m2,left);
    sf_complexread(rt[0],m2*nk,right);

//    sf_fileclose(left);
//    sf_fileclose(right);

    /* read wavelet & reflectivity */
    ww=sf_complexalloc(nt);  
    sf_complexread(ww,nt ,Fw);

    rr=sf_floatalloc(nzx); 
    sf_floatread(rr,nzx,Fr);

    curr   = sf_complexalloc(nzx2);
    rcurr  = sf_floatalloc(nzx2);

    cwave  = sf_complexalloc(nk);
    cwavem = sf_complexalloc(nk);
    wave   = sf_complexalloc2(nzx2,m2);


    for (iz=0; iz < nzx2; iz++) {
	curr[iz] = sf_cmplx(0.,0.);
	rcurr[iz]= 0.;
    }

    /* MAIN LOOP */
    for (it=0; it<nt; it++) {
	if(verb) sf_warning("it=%d;",it);

	/* matrix multiplication */
	cfft2(curr,cwave);

	for (im = 0; im < m2; im++) {
	    for (ik = 0; ik < nk; ik++) {
#ifdef SF_HAS_COMPLEX_H
		cwavem[ik] = cwave[ik]*rt[ik][im];
#else
		cwavem[ik] = sf_cmul(cwave[ik],rt[ik][im]); //complex multiplies complex
#endif
//		sf_warning("realcwave=%g, imagcwave=%g", crealf(cwavem[ik]),cimagf(cwavem[ik]));
	    }
	    icfft2(wave[im],cwavem);
	}

	for (ix = 0; ix < nx; ix++) {
	    for (iz=0; iz < nz; iz++) {
		i = iz+ix*nz;  /* original grid */
		j = iz+ix*nz2; /* padded grid */
#ifdef SF_HAS_COMPLEX_H
		c = ww[it] * rr[i]; // source term
#else
		c = sf_crmul(ww[it], rr[i]); // source term
#endif
		for (im = 0; im < m2; im++) {
#ifdef SF_HAS_COMPLEX_H
		    c += lt[im][i]*wave[im][j];
#else
		    c = sf_cadd(c,sf_cmul(lt[im][i], wave[im][j]));
#endif
		}

		curr[j] = c;
		rcurr[j] = crealf(c);
//		rcurr[j] = cimagf(c);
	    }

	    /* write wavefield to output */
	    sf_floatwrite(rcurr+ix*nz2,nz,Fo);
	}
    }
    if(verb) sf_warning("."); 
    
    exit (0);
}
Esempio n. 21
0
main()
{
    /*
       Example of Apple Altivec coded binary radix FFT
       using intrinsics from Petersen and Arbenz "Intro.
       to Parallel Computing," Section 3.6

       This is an expanded version of a generic work-space
       FFT: steps are in-line. cfft2(n,x,y,w,sign) takes complex
       n-array "x" (Fortran real,aimag,real,aimag,... order)
       and writes its DFT in "y". Both input "x" and the
       original contents of "y" are destroyed. Initialization
       for array "w" (size n/2 complex of twiddle factors
       (exp(twopi*i*k/n), for k=0..n/2-1)) is computed once
       by cffti(n,w).

                          WPP, SAM. Math. ETHZ, 1 June, 2002
    */

    int first,i,icase,it,ln2,n;
    int nits=1000000;
    static float seed = 331.0;
    float error,fnm1,sign,z0,z1,ggl();
    float *x,*y,*z,*w;
    double t1,mflops;
    /* allocate storage for x,y,z,w on 4-word bndr. */
    x = (float *) malloc(8*N);
    y = (float *) malloc(8*N);
    z = (float *) malloc(8*N);
    w = (float *) malloc(4*N);
    n     = 2;
    for(ln2=1; ln2<21; ln2++) {
        first = 1;
        for(icase=0; icase<2; icase++) {
            if(first) {
                for(i=0; i<2*n; i+=2) {
                    z0 = ggl(&seed);     /* real part of array */
                    z1 = ggl(&seed);     /* imaginary part of array */
                    x[i] = z0;
                    z[i] = z0;           /* copy of initial real data */
                    x[i+1] = z1;
                    z[i+1] = z1;         /* copy of initial imag. data */
                }
            } else {
                for(i=0; i<2*n; i+=2) {
                    z0 = 0;              /* real part of array */
                    z1 = 0;              /* imaginary part of array */
                    x[i] = z0;
                    z[i] = z0;           /* copy of initial real data */
                    x[i+1] = z1;
                    z[i+1] = z1;         /* copy of initial imag. data */
                }
            }
            /* initialize sine/cosine tables */
            cffti(n,w);
            /* transform forward, back */
            if(first) {
                sign = 1.0;
                cfft2(n,x,y,w,sign);
                sign = -1.0;
                cfft2(n,y,x,w,sign);
                /* results should be same as initial multiplied by n */
                fnm1 = 1.0/((float) n);
                error = 0.0;
                for(i=0; i<2*n; i+=2) {
                    error += (z[i] - fnm1*x[i])*(z[i] - fnm1*x[i]) +
                             (z[i+1] - fnm1*x[i+1])*(z[i+1] - fnm1*x[i+1]);
                }
                error = sqrt(fnm1*error);
                printf(" for n=%d, fwd/bck error=%e\n",n,error);
                first = 0;
            } else {
                for(it=0; it<nits; it++) {
                    sign = +1.0;
                    cfft2(n,x,y,w,sign);
                    sign = -1.0;
                    cfft2(n,y,x,w,sign);
                }
            }
        }
        if((ln2%4)==0) nits /= 10;
        n *= 2;
    }
    return 0;
}
Esempio n. 22
0
static void test_stfun(){
    rand_t rstat;
    int seed=4;
    double r0=0.2;
    double dx=1./16;
    long N=32;
    long nx=N;
    long ny=N;
    long nframe=500;
    seed_rand(&rstat, seed);
    if(L0<9000){
	dmat *rr=dlinspace(0, N*dx, N);
	dmat *covvk=turbcov(rr, sqrt(2)*N*dx, r0, L0);
	writebin(covvk, "cov_vk");
	dfree(rr);
	dfree(covvk);
    }
    /*    return; */
    {
	map_t *atm=mapnew(nx+1, ny+1, dx, dx,NULL);
	stfun_t *data=stfun_init(nx, ny, NULL);
	zfarr *save=zfarr_init(nframe, 1, "fractal_atm.bin");
	for(long i=0; i<nframe; i++){
	    for(long j=0; j<(nx+1)*(ny+1); j++){
		atm->p[j]=randn(&rstat);
	    }
	    fractal_do((dmat*)atm, dx, r0,L0,ninit);
	    stfun_push(data, (dmat*)atm);
	    zfarr_dmat(save, i, (dmat*)atm);
	    if(i%100==0)
		info("%ld of %ld\n", i, nframe);
	}
	zfarr_close(save);
	dmat *st=stfun_finalize(data);
	writebin(st, "stfun_fractal.bin");
	ddraw("fractal", st, NULL,NULL, "Atmosphere","x","y","stfun");
    }
    /*exit(0); */
    {
	stfun_t *data=stfun_init(nx, ny, NULL);
	dmat *spect=turbpsd(nx, ny, dx, r0, 100, 0, 0.5);
	cmat *atm=cnew(nx, ny);
	//cfft2plan(atm, -1);
	dmat *atmr=dnew(atm->nx, atm->ny);
	dmat *atmi=dnew(atm->nx, atm->ny);
	spect->p[0]=0;
	for(long ii=0; ii<nframe; ii+=2){
	    for(long i=0; i<atm->nx*atm->ny; i++){
		atm->p[i]=COMPLEX(randn(&rstat), randn(&rstat))*spect->p[i];
	    }
	    cfft2(atm, -1);
	    for(long i=0; i<atm->nx*atm->ny; i++){
		atmr->p[i]=creal(atm->p[i]);
		atmi->p[i]=cimag(atm->p[i]);
	    }
	    stfun_push(data, atmr);
	    stfun_push(data, atmi);
	    if(ii%100==0)
		info("%ld of %ld\n", ii, nframe);
	}
	dmat *st=stfun_finalize(data);
	writebin(st, "stfun_fft.bin");
	ddraw("fft", st, NULL,NULL, "Atmosphere","x","y","stfun");
    }
	
}
Esempio n. 23
0
Datum 
fft_main(PG_FUNCTION_ARGS)
{
  int i,n;
  double sgn;
  double *w;
  double wtime;
  double *x,*y,*z;
  int32 arg = PG_GETARG_INT32(0);

  timestamp();

  ereport(INFO,(errmsg("  Number of processors available = %d\n", omp_get_num_procs())));
  ereport(INFO,(errmsg("  Number of threads =              %d\n", omp_get_max_threads())));

  //Prepare for tests.
  ereport(INFO,(errmsg("             N      Time\n")));

  n = 4;
  w = (double *) malloc(    n * sizeof(double));
  x = (double *) malloc(2 * n * sizeof(double));
  y = (double *) malloc(2 * n * sizeof(double));
  z = (double *) malloc(2 * n * sizeof(double));

  //初始化数据
  x[0]=1.0; x[1]=0.0;
  x[2]=2.0; x[3]=0.0;
  x[4]=4.0; x[5]=0.0;
  x[6]=3.0; x[7]=0.0;

  ereport(INFO,(errmsg("x=")));
  for(i=0; i<2*n; i++){
    ereport(INFO,(errmsg("%f,",x[i])));
  }

  //Initialize the sine and cosine tables.
  cffti(n, w);

  wtime = omp_get_wtime();

  //Transform forward
  sgn = + 1.0;

  //fft计算
  cfft2( n, x, y, w, sgn );
    
  //输出结果
  ereport(INFO,(errmsg("y=")));
  for(i=0; i<2*n; i++){
    ereport(INFO,(errmsg("%f,",y[i])));
  }

  //元素个数
  ereport(INFO,(errmsg("  %12d", n)));
  //运行时间
  wtime = omp_get_wtime() - wtime;
  ereport(INFO,(errmsg("  %12e\n", wtime)));

  free(w);
  free(x);
  free(y);

  //Terminate.
  ereport(INFO,(errmsg("  Normal end of execution.\n")));
  timestamp();

  PG_RETURN_INT32(arg);
}
Esempio n. 24
0
int main(int argc, char* argv[])
{
    bool mig;
    int it, nt, ix, nx, iz, nz, nx2, nz2, nzx, nzx2, pad1;
    int im, i, j, m2, it1, it2, its, ik, n2, nk;
    float dt, dx, dz,x0;
    sf_complex *curr, **img, *dat, **lft, **rht, **wave, *cwave, *cwavem, c;
    sf_file data, image, left, right;

    sf_init(argc,argv);

    if (!sf_getbool("mig",&mig)) mig=false;
    /* if n, modeling; if y, migration */

    if (!sf_getint("pad1",&pad1)) pad1=1; /* padding factor on the first axis */

    if (mig) { /* migration */
	data = sf_input("in");
	image = sf_output("out");
	sf_settype(image,SF_COMPLEX);

	if (!sf_histint(data,"n1",&nx)) sf_error("No n1= in input");
	if (!sf_histfloat(data,"d1",&dx)) sf_error("No d1= in input");
	if (!sf_histfloat(data,"o1",&x0)) x0=0.; 

	if (!sf_histint(data,"n2",&nt)) sf_error("No n2= in input");
	if (!sf_histfloat(data,"d2",&dt)) sf_error("No d2= in input");

	if (!sf_getint("nz",&nz)) sf_error("Need nz=");
	/* depth samples (if migration) */
	if (!sf_getfloat("dz",&dz)) sf_error("Need dz=");
	/* depth sampling (if migration) */

	sf_putint(image,"n1",nz);
	sf_putfloat(image,"d1",dz);
	sf_putfloat(image,"o1",0.);
	sf_putstring(image,"label1","Depth");

	sf_putint(image,"n2",nx);
	sf_putfloat(image,"d2",dx);
	sf_putfloat(image,"o2",x0);
	sf_putstring(image,"label2","Distance");
    } else { /* modeling */
	image = sf_input("in");
	data = sf_output("out");
	sf_settype(data,SF_COMPLEX);

	if (!sf_histint(image,"n1",&nz)) sf_error("No n1= in input");
	if (!sf_histfloat(image,"d1",&dz)) sf_error("No d1= in input");

	if (!sf_histint(image,"n2",&nx))  sf_error("No n2= in input");
	if (!sf_histfloat(image,"d2",&dx)) sf_error("No d2= in input");
	if (!sf_histfloat(image,"o2",&x0)) x0=0.; 	

	if (!sf_getint("nt",&nt)) sf_error("Need nt=");
	/* time samples (if modeling) */
	if (!sf_getfloat("dt",&dt)) sf_error("Need dt=");
	/* time sampling (if modeling) */

	sf_putint(data,"n1",nx);
	sf_putfloat(data,"d1",dx);
	sf_putfloat(data,"o1",x0);
	sf_putstring(data,"label1","Distance");

	sf_putint(data,"n2",nt);
	sf_putfloat(data,"d2",dt);
	sf_putfloat(data,"o2",0.);
	sf_putstring(data,"label2","Time");
	sf_putstring(data,"unit2","s");
    }

    nk = cfft2_init(pad1,nx,nz,&nx2,&nz2);

    nzx = nz*nx;
    nzx2 = nz2*nx2;

    img = sf_complexalloc2(nz,nx);
    dat = sf_complexalloc(nx);

    /* propagator matrices */
    left = sf_input("left");
    right = sf_input("right");

    if (!sf_histint(left,"n1",&n2) || n2 != nzx) sf_error("Need n1=%d in left",nzx);
    if (!sf_histint(left,"n2",&m2))  sf_error("No n2= in left");
    
    if (!sf_histint(right,"n1",&n2) || n2 != m2) sf_error("Need n1=%d in right",m2);
    if (!sf_histint(right,"n2",&n2) || n2 != nk) sf_error("Need n2=%d in right",nk);
 
    lft = sf_complexalloc2(nzx,m2);
    rht = sf_complexalloc2(m2,nk);

    sf_complexread(lft[0],nzx*m2,left);
    sf_complexread(rht[0],m2*nk,right);

    curr = sf_complexalloc(nzx2);

    cwave  = sf_complexalloc(nk);
    cwavem = sf_complexalloc(nk);
    wave = sf_complexalloc2(nzx2,m2);

    for (iz=0; iz < nzx2; iz++) {
	curr[iz] = sf_cmplx(0.,0.);
    }


    if (mig) { /* migration */
	/* step backward in time */
	it1 = nt-1;
	it2 = -1;
	its = -1;	
    } else { /* modeling */
	sf_complexread(img[0],nzx,image);

	/* transpose */
	for (ix=0; ix < nx; ix++) {
	    for (iz=0; iz < nz; iz++) {
		curr[ix+iz*nx2]=img[ix][iz];
	    }
	}
	
	/* step forward in time */
	it1 = 0;
	it2 = nt;
	its = +1;
    }


    /* time stepping */
    for (it=it1; it != it2; it += its) {
	sf_warning("it=%d;",it);

	if (mig) { /* migration <- read data */
	    sf_complexread(dat,nx,data);
	} else {
	    for (ix=0; ix < nx; ix++) {
		dat[ix] = sf_cmplx(0.,0.);
	    }
	}

	for (ix=0; ix < nx; ix++) {
	    if (mig) {
		curr[ix] += dat[ix];
	    } else {
		dat[ix] = curr[ix];
	    }
	}

	/* matrix multiplication */
	cfft2(curr,cwave);

	for (im = 0; im < m2; im++) {
	    for (ik = 0; ik < nk; ik++) {
#ifdef SF_HAS_COMPLEX_H
		cwavem[ik] = cwave[ik]*rht[ik][im];
#else
		cwavem[ik] = sf_cmul(cwave[ik],rht[ik][im]);
#endif
	    }
	    icfft2(wave[im],cwavem);
	}


#ifdef _OPENMP
#pragma omp parallel for private(ix,iz,i,j,im,c) shared(curr,lft,wave)
#endif
	for (ix = 0; ix < nx; ix++) {
	    for (iz=0; iz < nz; iz++) {
		i = ix+iz*nx;  /* original grid */
		j = ix+iz*nx2; /* padded grid */
		
		c = sf_cmplx(0.,0.); /* initialize */

		for (im = 0; im < m2; im++) {
#ifdef SF_HAS_COMPLEX_H
		    c += lft[im][i]*wave[im][j];
#else
		    c += sf_cmul(lft[im][i], wave[im][j]);
#endif	    
		}

		curr[j] = c;
	    }
	}
	
	if (!mig) { /* modeling -> write out data */
	    sf_complexwrite(dat,nx,data);
	}
    }
    sf_warning(".");

    if (mig) {
	/* transpose */
	for (ix=0; ix < nx; ix++) {
	    for (iz=0; iz < nz; iz++) {
		img[ix][iz] = curr[ix+iz*nx2];
	    }
	}

	sf_complexwrite(img[0],nzx,image);
    }

    exit(0);
}