static void test_cov(){/*not good */ rand_t rstat; int seed=4; double r0=0.2; double dx=1./64; long N=1+1024; long nx=N; long ny=N; long nframe=1; seed_rand(&rstat, seed); map_t *atm=mapnew(nx, ny, dx,dx, NULL); cmat *atmhat=cnew((N+1)*3,(N+1)*3); dmat *atmhattot=dnew((N+1)*3,(N+1)*3); //cfft2plan(atmhat,-1); //cfft2plan(atmhat, 1); dset((dmat*)atm,1); cembedd(atmhat, (dmat*)atm, 0); cfft2(atmhat, -1); cabs22d(&atmhattot, 1, atmhat, 1); ccpd(&atmhat, atmhattot); cfft2i(atmhat, 1); cfftshift(atmhat); dmat *denom=dnew((N+1)*3,(N+1)*3); dmat *cov=dnew((N+1)*3,(N+1)*3); creal2d(&denom, 0, atmhat, 1); writebin(denom, "denom.bin"); dzero(atmhattot); for(long i=0; i<nframe; i++){ info("%ld of %ld\n", i, nframe); for(long j=0; j<nx*ny; j++){ atm->p[j]=randn(&rstat); } fractal_do((dmat*)atm, dx, r0,L0,ninit); /*mapwrite(atm, "atm_%ld.bin", i); */ cembedd(atmhat, (dmat*)atm, 0); cfft2(atmhat, -1); cabs22d(&atmhattot, 1, atmhat, 1); if(i==0 || (i+1)%10==0){ dscale(atmhattot, 1./(i+1)); ccpd(&atmhat, atmhattot); writebin(atmhattot, "atm_psf_%ld.bin",i+1); cfft2i(atmhat, 1); cfftshift(atmhat); creal2d(&cov, 0, atmhat, 1); for(long k=0; k<cov->nx*cov->ny; k++){ cov->p[k]/=denom->p[k]; } writebin(cov, "atm_cov_%ld.bin",i+1); } } }
/** Convert PSD into time series.*/ dmat* psd2time(const dmat *psdin, rand_t *rstat, double dt, int nstepin){ if(!psdin){ error("psdin cannot be null\n"); } long nstep=nextpow2(nstepin); double df=1./(dt*nstep); dmat *fs=dlinspace(0, df, nstep); dmat *psd=NULL; if(psdin->ny==1){//[alpha, beta, fmin, fmax] discribes power law with cut on/off freq. psd=dnew(nstep, 1); double alpha=psdin->p[0]; double beta=psdin->p[1]; long i0=1, imax=nstep; if(psdin->nx>2){ i0=(long)round(psdin->p[2]/df); if(i0<1) i0=1; } if(psdin->nx>3){ imax=(long)round(psdin->p[3]/df); } info("fmin=%g, fmax=%g, df=%g, i0=%ld, imax=%ld\n", psdin->p[2], psdin->p[3], df, i0, imax); for(long i=i0; i<imax; i++){ psd->p[i]=beta*pow(i*df, alpha); } }else if(psdin->ny==2){ if(psdin->nx<2){ error("Invalid PSD\n"); } psd=dinterp1(psdin, 0, fs, 1e-40); psd->p[0]=0;/*disable pistion. */ }else{ error("psdin is invalid format.\n"); } cmat *wshat=cnew(nstep, 1); //cfft2plan(wshat, -1); for(long i=0; i<nstep; i++){ wshat->p[i]=sqrt(psd->p[i]*df)*COMPLEX(randn(rstat), randn(rstat)); } cfft2(wshat, -1); dmat *out=NULL; creal2d(&out, 0, wshat, 1); cfree(wshat); dfree(psd); dfree(fs); dresize(out, nstepin, 1); return out; }
/* static int test_fft_speed_small(){ int nis=128; int *is=mycalloc(nis,int); dmat *tim=dnew(nis,1); for(int ii=0; ii<nis; ii++){ is[ii]=ii+1; } ccell *ac=cellnew(nis,1); rand_t stat; seed_rand(&stat,1); for(int ii=0; ii<nis; ii++){ ac->p[ii]=cnew(is[ii],is[ii]); //cfft2plan(ac->p[ii],-1); crandn(ac->p[ii],20,&stat); } TIC; for(int ii=0; ii<nis; ii++){ info2("size %4d: ",is[ii]); tic; for(int i=0; i<1000; i++){ cfft2(ac->p[ii],-1); } toc2("fft"); tim->p[ii]=toc3; } writebin(tim,"fft_timing"); } static void test_fft_speed(){ int nis=2048; int *is=mycalloc(nis,int); dmat *tim=dnew(nis,1); for(int ii=0; ii<nis; ii++){ is[ii]=(ii+1)*2; } ccell *ac=cellnew(nis,1); rand_t stat; seed_rand(&stat,1); TIC; for(int ii=0; ii<nis; ii++){ info2("size %4d: ",is[ii]); tic; ac->p[ii]=cnew(is[ii],is[ii]); //cfft2plan(ac->p[ii],-1); crandn(ac->p[ii],20,&stat); toc("plan"); } toc("plan"); for(int ii=0; ii<nis; ii++){ info2("size %4d: ",is[ii]); tic; int nrepeat; if(is[ii]<300) nrepeat=100; else if(is[ii]<1000) nrepeat=10; else nrepeat=1; for(int i=0; i<nrepeat; i++){ cfft2(ac->p[ii],-1); } toc2("fft"); tim->p[ii]=toc3/nrepeat; } writebin(tim,"fft_timing"); }*/ static void test_fft_special(){ int nis=2; int *is=mycalloc(nis,int); dmat *tim=dnew(nis,1); is[0]=3824; is[1]=4096; ccell *ac=ccellnew(nis,1); rand_t rstat; seed_rand(&rstat,1); TIC; for(int ii=0; ii<nis; ii++){ info2("size %4d: ",is[ii]); tic; ac->p[ii]=cnew(is[ii],is[ii]); //cfft2plan(ac->p[ii],-1); //cfft2partialplan(ac->p[ii],512,-1); crandn(ac->p[ii],20,&rstat); toc("plan"); } for(int ii=0; ii<nis; ii++){ info2("size %4d: ",is[ii]); tic; int nrepeat; if(is[ii]<300) nrepeat=100; else if(is[ii]<1000) nrepeat=10; else nrepeat=4; for(int i=0; i<nrepeat; i++){ cfft2(ac->p[ii],-1); } toc2("fft"); for(int i=0; i<nrepeat; i++){ cfft2partial(ac->p[ii],512,-1); } toc2("fft2partial"); tim->p[ii]=toc3/nrepeat; } writebin(tim,"fft_timing"); }
dmat *psd1d(const dmat *v, /**<[in] The data sequence*/ long nseg /**<[in] Number of overlapping segments*/ ){ long nx; long ncol; if(v->nx==1){ nx=v->ny; ncol=1; }else{ nx=v->nx; ncol=v->ny; } if(nseg<=1) nseg=1; const int lseg2=nx/(nseg+1); const int lseg=lseg2*2; dmat *psd=dnew(lseg2+1, ncol); cmat *hat=cnew(lseg, 1); //cfft2plan(hat, -1); for(long icol=0; icol<ncol; icol++){ double *ppsd=psd->p+icol*(lseg2+1); for(int iseg=0; iseg<nseg; iseg++){ double* p=v->p+icol*nx+iseg*lseg2; for(int ix=0; ix<lseg; ix++){ hat->p[ix]=p[ix]*W_J(ix, lseg2); } cfft2(hat, -1); ppsd[0]+=cabs2(hat->p[0]); for(int ix=1; ix<lseg2; ix++){ ppsd[ix]+=cabs2(hat->p[ix])+cabs2(hat->p[lseg-ix]); } ppsd[lseg2]+=cabs2(hat->p[lseg2]); } } double sumwt=0; for(int ix=0; ix<lseg; ix++){ sumwt+=pow(W_J(ix, lseg2), 2); } sumwt*=lseg*nseg; dscale(psd, 1./sumwt); cfree(hat); return psd; }
/* Compute cxx on atm to compare against L2, invpsd, fractal. */ static void test_cxx(){ rand_t rstat; int seed=4; double r0=0.2; double dx=1./4; long N=16; long nx=N; long ny=N; long nframe=40960; seed_rand(&rstat, seed); { dmat *cxx=dnew(N*N,N*N); map_t *atm=mapnew(nx+1, ny+1, dx, dx,NULL); for(long i=0; i<nframe; i++){ info("%ld of %ld\n", i, nframe); for(long j=0; j<(nx+1)*(ny+1); j++){ atm->p[j]=randn(&rstat); } fractal_do((dmat*)atm, dx, r0, L0, ninit); dmat *sec=dsub((dmat*)atm, 0, nx, 0, ny); dmat *atmvec=dref_reshape(sec, nx*ny, 1); dmm(&cxx,1, atmvec,atmvec,"nt",1); dfree(atmvec); dfree(sec); } dscale(cxx, 1./nframe); writebin(cxx, "cxx_fractal"); dfree(cxx); mapfree(atm); } { dmat *cxx=dnew(N*N,N*N); dmat *spect=turbpsd(nx, ny, dx, r0, 100, 0, 0.5); spect->p[0]=spect->p[1]; cmat *atm=cnew(nx, ny); //cfft2plan(atm, -1); dmat *atmr=dnew(nx*ny,1); dmat *atmi=dnew(nx*ny,1); for(long ii=0; ii<nframe; ii+=2){ info("%ld of %ld\n", ii, nframe); for(long i=0; i<atm->nx*atm->ny; i++){ atm->p[i]=COMPLEX(randn(&rstat), randn(&rstat))*spect->p[i]; } cfft2(atm, -1); for(long i=0; i<atm->nx*atm->ny; i++){ atmr->p[i]=creal(atm->p[i]); atmi->p[i]=cimag(atm->p[i]); } dmm(&cxx,1, atmr,atmr,"nt",1); dmm(&cxx,1, atmi,atmi,"nt",1); } dscale(cxx, 1./nframe); writebin(cxx, "cxx_fft"); dfree(cxx); dfree(atmr); dfree(atmi); cfree(atm); } loc_t *loc=mksqloc_auto(16,16,1./4,1./4); locwrite(loc,"loc"); dmat *B=stfun_kolmogorov(loc, r0); writebin(B, "B_theory"); }
static void test_psd(){ rand_t rstat; int seed=4; double r0=0.2; double dx=1./64; long N=1024; long nx=N; long ny=N; long ratio=1; long xskip=nx*(ratio-1)/2; long yskip=ny*(ratio-1)/2; long nframe=512; seed_rand(&rstat, seed); if(1){ map_t *atm=mapnew(nx+1, ny+1, dx,dx, NULL); cmat *hat=cnew(nx*ratio, ny*ratio); //cfft2plan(hat, -1); dmat *hattot=dnew(nx*ratio, ny*ratio); for(long i=0; i<nframe; i++){ info2("%ld of %ld\n", i, nframe); for(long j=0; j<(nx+1)*(ny+1); j++){ atm->p[j]=randn(&rstat); } fractal_do((dmat*)atm, dx, r0,L0,ninit); czero(hat); for(long iy=0; iy<ny; iy++){ for(long ix=0; ix<nx; ix++){ IND(hat,ix+xskip,iy+yskip)=IND(atm,ix,iy); } } cfftshift(hat); cfft2i(hat, -1); cabs22d(&hattot, 1, hat, 1); } dscale(hattot, 1./nframe); dfftshift(hattot); writebin(hattot, "PSD_fractal"); } { dmat *spect=turbpsd(nx, ny, dx, r0, 100, 0, 0.5); writebin(spect, "spect"); cmat *hat=cnew(nx*ratio, ny*ratio); //cfft2plan(hat, -1); dmat *hattot=dnew(nx*ratio, ny*ratio); cmat *atm=cnew(nx, ny); //cfft2plan(atm, -1); dmat *atmr=dnew(atm->nx, atm->ny); dmat *atmi=dnew(atm->nx, atm->ny); cmat* phat=hat; dmat* patmr=atmr; dmat* patmi=atmi; for(long ii=0; ii<nframe; ii+=2){ info2("%ld of %ld\n", ii, nframe); for(long i=0; i<atm->nx*atm->ny; i++){ atm->p[i]=COMPLEX(randn(&rstat), randn(&rstat))*spect->p[i]; } cfft2(atm, -1); for(long i=0; i<atm->nx*atm->ny; i++){ atmr->p[i]=creal(atm->p[i]); atmi->p[i]=cimag(atm->p[i]); } czero(hat); for(long iy=0; iy<ny; iy++){ for(long ix=0; ix<nx; ix++){ IND(phat,ix+xskip,iy+yskip)=IND(patmr,ix,iy); } } cfftshift(hat); cfft2i(hat, -1); cabs22d(&hattot, 1, hat, 1); czero(hat); for(long iy=0; iy<ny; iy++){ for(long ix=0; ix<nx; ix++){ IND(phat,ix+xskip,iy+yskip)=IND(patmi,ix,iy); } } cfftshift(hat); cfft2i(hat, -1); cabs22d(&hattot, 1, hat, 1); } dscale(hattot, 1./nframe); dfftshift(hattot); writebin(hattot, "PSD_fft"); } }
int main(int argc, char* argv[]) { bool verb,sub,os; int it,iz,im,ikx,ikz,ix,i,j; /* index variables */ int nt,nz,nx, m2, nk, nzx, nz2, nx2, nzx2, n2, pad1,nth; sf_complex c,old; /* I/O arrays*/ sf_complex *ww,*curr,*prev,*cwave,*cwavem,**wave,**lt, **rt; sf_complex *snap; float *rr; sf_file Fw,Fr,Fo; /* I/O files */ sf_axis at,az,ax; /* cube axes */ sf_file left, right; /*MPI related*/ int cpuid,numprocs; int provided; int n_local, o_local; int ozx2; sf_complex *sendbuf, *recvbuf; int *rcounts, *displs; //MPI_Init(&argc,&argv); MPI_Init_thread(&argc,&argv,MPI_THREAD_FUNNELED,&provided); threads_ok = provided >= MPI_THREAD_FUNNELED; sf_init(argc,argv); MPI_Comm_rank(MPI_COMM_WORLD, &cpuid); MPI_Comm_size(MPI_COMM_WORLD, &numprocs); if(!sf_getbool("verb",&verb)) verb=false; /* verbosity */ if(!sf_getbool("os",&os)) os=true; /* one-step flag */ if (os) { sf_warning("One-step wave extrapolation"); if(!sf_getbool("sub",&sub)) sub=false; /* subtraction flag */ } else { sf_warning("Two-step wave extrapolation"); if(!sf_getbool("sub",&sub)) sub=true; /* subtraction flag */ } /* setup I/O files */ Fw = sf_input ("--input" ); Fo = sf_output("--output"); Fr = sf_input ("ref"); if (SF_COMPLEX != sf_gettype(Fw)) sf_error("Need complex input"); if (SF_FLOAT != sf_gettype(Fr)) sf_error("Need float ref"); sf_settype(Fo,SF_COMPLEX); /* Read/Write axes */ at = sf_iaxa(Fw,1); nt = sf_n(at); az = sf_iaxa(Fr,1); nz = sf_n(az); ax = sf_iaxa(Fr,2); nx = sf_n(ax); if (cpuid==0) { sf_oaxa(Fo,az,1); sf_oaxa(Fo,ax,2); //sf_setn(at,1); sf_oaxa(Fo,at,3); } if (!sf_getint("pad1",&pad1)) pad1=1; /* padding factor on the first axis */ #pragma omp parallel { nth = omp_get_num_threads(); } if (verb) sf_warning(">>>> Using %d threads <<<<<", nth); nk = cfft2_init(pad1,nz,nx,&nz2,&nx2,&n_local,&o_local); sf_warning("Cpuid=%d,n0=%d,n1=%d,local_n0=%d,local_0_start=%d",cpuid,nz2,nx2,n_local,o_local); nzx = nz*nx; // nzx2 = nz2*nx2; nzx2 = n_local*nz2; ozx2 = o_local*nz2; /* propagator matrices */ left = sf_input("left"); right = sf_input("right"); if (!sf_histint(left,"n1",&n2) || n2 != nzx) sf_error("Need n1=%d in left",nzx); if (!sf_histint(left,"n2",&m2)) sf_error("Need n2= in left"); if (!sf_histint(right,"n1",&n2) || n2 != m2) sf_error("Need n1=%d in right",m2); if (!sf_histint(right,"n2",&n2) || n2 != nk) sf_error("Need n2=%d in right",nk); lt = sf_complexalloc2(nzx,m2); rt = sf_complexalloc2(m2,nk); sf_complexread(lt[0],nzx*m2,left); sf_complexread(rt[0],m2*nk,right); sf_fileclose(left); sf_fileclose(right); /* read wavelet & reflectivity */ ww=sf_complexalloc(nt); sf_complexread(ww,nt ,Fw); rr=sf_floatalloc(nzx); sf_floatread(rr,nzx,Fr); curr = sf_complexalloc(nzx2); if (!os) prev = sf_complexalloc(nzx2); else prev = NULL; cwave = sf_complexalloc(nzx2); cwavem = sf_complexalloc(nzx2); wave = sf_complexalloc2(nzx2,m2); for (iz=0; iz < nzx2; iz++) { curr[iz] = sf_cmplx(0.,0.); if (!os) prev[iz] = sf_cmplx(0.,0.); } sendbuf = curr; if (cpuid==0) { snap = sf_complexalloc(nz2*nx2); recvbuf = snap; rcounts = sf_intalloc(numprocs); displs = sf_intalloc(numprocs); } else { snap = NULL; recvbuf = NULL; rcounts = NULL; displs = NULL; } MPI_Gather(&nzx2, 1, MPI_INT, rcounts, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Gather(&ozx2, 1, MPI_INT, displs, 1, MPI_INT, 0, MPI_COMM_WORLD); /* MAIN LOOP */ for (it=0; it<nt; it++) { if(verb) sf_warning("it=%d;",it); /* matrix multiplication */ cfft2(curr,cwave); for (im = 0; im < m2; im++) { //for (ik = 0; ik < nk; ik++) { for (ikx = 0; ikx < n_local; ikx++) { for (ikz = 0; ikz < nz2; ikz++) { i = ikz + (o_local+ikx)*nz2; j = ikz + ikx*nz2; #ifdef SF_HAS_COMPLEX_H cwavem[j] = cwave[j]*rt[i][im]; #else cwavem[j] = sf_cmul(cwave[j],rt[i][im]); #endif } } icfft2(wave[im],cwavem); } for (ix = 0; ix < n_local && (ix+o_local)<nx ; ix++) { for (iz=0; iz < nz; iz++) { i = iz + (o_local+ix)*nz; /* original grid */ j = iz + ix*nz2; /* padded grid */ #ifdef SF_HAS_COMPLEX_H c = ww[it] * rr[i]; // source term #else c = sf_crmul(ww[it], rr[i]); // source term #endif if (sub) c += curr[j]; if (!os) { old = curr[j]; #ifdef SF_HAS_COMPLEX_H c += sub? (old-prev[j]) : -prev[j]; #else c = sf_cadd(c,sub? sf_csub(old,prev[j]) : sf_cneg(prev[j])); #endif prev[j] = old; } for (im = 0; im < m2; im++) { #ifdef SF_HAS_COMPLEX_H c += lt[im][i]*wave[im][j]; #else c += sf_cmul(lt[im][i], wave[im][j]); #endif } curr[j] = c; } } /* write wavefield to output */ MPI_Gatherv(sendbuf, nzx2, MPI_COMPLEX, recvbuf, rcounts, displs, MPI_COMPLEX, 0, MPI_COMM_WORLD); if (cpuid==0) { for (ix = 0; ix < nx; ix++) sf_complexwrite(snap+ix*nz2,nz,Fo); } } if(verb) sf_warning("."); cfft2_finalize(); MPI_Finalize(); exit (0); }
int main() { /* SSE version of cfft2 - uses INTEL intrinsics W. Petersen, SAM. Math. ETHZ 2 May, 2002 */ int first,i,icase,it,n; float seed,error,fnm1,sign,z0,z1,ggl(); float t1,ln2,mflops; void cffti(),cfft2(); first = 1; seed = 331.0; for(icase=0;icase<2;icase++){ if(first){ for(i=0;i<2*N;i+=2){ z0 = ggl(&seed); /* real part of array */ z1 = ggl(&seed); /* imaginary part of array */ x[i] = z0; z[i] = z0; /* copy of initial real data */ x[i+1] = z1; z[i+1] = z1; /* copy of initial imag. data */ } } else { for(i=0;i<2*N;i+=2){ z0 = 0; /* real part of array */ z1 = 0; /* imaginary part of array */ x[i] = z0; z[i] = z0; /* copy of initial real data */ x[i+1] = z1; z[i+1] = z1; /* copy of initial imag. data */ } } /* initialize sine/cosine tables */ n = N; cffti(n,w); /* transform forward, back */ if(first){ sign = 1.0; cfft2(n,x,y,w,sign); sign = -1.0; cfft2(n,y,x,w,sign); /* results should be same as initial multiplied by N */ fnm1 = 1.0/((float) n); error = 0.0; for(i=0;i<2*N;i+=2){ error += (z[i] - fnm1*x[i])*(z[i] - fnm1*x[i]) + (z[i+1] - fnm1*x[i+1])*(z[i+1] - fnm1*x[i+1]); } error = sqrt(fnm1*error); printf(" for n=%d, fwd/bck error=%e\n",N,error); first = 0; } else { unsigned j = 0; for(it=0;it<20000;it++){ sign = +1.0; cfft2(n,x,y,w,sign); sign = -1.0; cfft2(n,y,x,w,sign); } printf(" for n=%d\n",n); for (i = 0; i<N; ++i) { printf("%g ", w[i]); j++; if (j == 4) { printf("\n"); j = 0; } } } } return 0; }
int propnewc(sf_complex **ini, sf_complex **lt, sf_complex **rt, int nz, int nx, int nt, int m2, int nkzx, char *mode, int pad1, int snap, sf_complex **cc, sf_complex ***wvfld, bool verb, bool correct, sf_complex *alpha, sf_complex *beta) /*^*/ { /* index variables */ int it,iz,ix,im,ik,i,j,wfit; int nz2,nx2,nk,nzx2; sf_complex c; /* wavefield */ sf_complex **wave,**wave2, *curr, *currm, *cwave, *cwavem, *curr1, *curr2; nk = cfft2_init(pad1,nz,nx,&nz2,&nx2); nzx2 = nz2*nx2; if (nk!=nkzx) sf_error("nk discrepancy!"); curr = sf_complexalloc(nzx2); if (correct) { curr1 = sf_complexalloc(nzx2); curr2 = sf_complexalloc(nzx2); } currm = sf_complexalloc(nzx2); cwave = sf_complexalloc(nk); cwavem = sf_complexalloc(nk); wave = sf_complexalloc2(nk,m2); wave2 = sf_complexalloc2(nzx2,m2); icfft2_allocate(cwavem); /* initialization */ for (ix = 0; ix < nx2; ix++) { for (iz=0; iz < nz2; iz++) { j = iz+ix*nz2; if (ix<nx && iz<nz) curr[j] = ini[ix][iz]; else curr[j] = sf_cmplx(0.,0.); } } wfit = 0; /* MAIN LOOP */ for (it=0; it<nt; it++) { if(verb) sf_warning("it=%d;",it); /* outout wavefield */ if(snap>0) { if(it%snap==0 && wfit<=(int)(nt-1)/snap) { for (ix=0; ix<nx; ix++) for (iz=0; iz<nz; iz++) wvfld[wfit][ix][iz] = curr[iz+ix*nz2]; wfit++; } } if (mode[0]=='m') { /* matrix multiplication */ for (im = 0; im < m2; im++) { for (ix = 0; ix < nx; ix++) { for (iz=0; iz < nz; iz++) { i = iz+ix*nz; /* original grid */ j = iz+ix*nz2; /* padded grid */ #ifdef SF_HAS_COMPLEX_H currm[j] = lt[im][i]*curr[j]; #else currm[j] = sf_cmul(lt[im][i], curr[j]); #endif } } cfft2(currm,wave[im]); } for (ik = 0; ik < nk; ik++) { c = sf_cmplx(0.,0.); for (im = 0; im < m2; im++) { #ifdef SF_HAS_COMPLEX_H c += wave[im][ik]*rt[ik][im]; #else c += sf_cmul(wave[im][ik],rt[ik][im]); //complex multiplies complex #endif } cwave[ik] = c; } /* matrix multiplication */ for (im = 0; im < m2; im++) { for (ik = 0; ik < nk; ik++) { #ifdef SF_HAS_COMPLEX_H cwavem[ik] = cwave[ik]*rt[ik][im]; #else cwavem[ik] = sf_cmul(cwave[ik],rt[ik][im]); //complex multiplies complex #endif } icfft2(wave2[im],cwavem); } for (ix = 0; ix < nx; ix++) { for (iz=0; iz < nz; iz++) { i = iz+ix*nz; /* original grid */ j = iz+ix*nz2; /* padded grid */ c = sf_cmplx(0.,0.); for (im = 0; im < m2; im++) { #ifdef SF_HAS_COMPLEX_H c += lt[im][i]*wave2[im][j]; #else c += sf_cmul(lt[im][i], wave2[im][j]); #endif } curr[j] = c; } } if (correct) { for (ix = 0; ix < nx2; ix++) { for (iz=0; iz < nz2; iz++) { i = iz+ix*nz; /* original grid */ j = iz+ix*nz2; /* padded grid */ if (ix<nx && iz<nz) { #ifdef SF_HAS_COMPLEX_H currm[j] = curr[j]/alpha[i]; #else currm[j] = sf_cdiv(curr[j],alpha[i]); #endif } else { currm[j] = sf_cmplx(0.,0.); } } } cfft2(currm,cwave); for (ik = 0; ik < nk; ik++) { #ifdef SF_HAS_COMPLEX_H cwavem[ik] = cwave[ik]/beta[ik]; #else cwavem[ik] = sf_cdiv(cwave[ik],beta[ik]); #endif } icfft2(curr1,cwavem); for (ix = nx; ix < nx2; ix++) { for (iz=nz; iz < nz2; iz++) { j = iz+ix*nz2; /* padded grid */ curr1[j] = sf_cmplx(0.,0.); } } /**/ cfft2(curr,cwave); for (ik = 0; ik < nk; ik++) { #ifdef SF_HAS_COMPLEX_H cwavem[ik] = cwave[ik]/conjf(beta[ik]); #else cwavem[ik] = sf_cdiv(cwave[ik],conjf(beta[ik])); #endif } icfft2(curr,cwavem); for (ix = 0; ix < nx2; ix++) { for (iz=0; iz < nz2; iz++) { i = iz+ix*nz; /* original grid */ j = iz+ix*nz2; /* padded grid */ if (ix<nx && iz<nz) { #ifdef SF_HAS_COMPLEX_H curr2[j] = curr[j]/conjf(alpha[i]); #else curr2[j] = sf_cdiv(curr[j],conjf(alpha[i])); #endif } else { curr2[j] = sf_cmplx(0.,0.); } } } for (ix = 0; ix < nx2; ix++) { for (iz=0; iz < nz2; iz++) { j = iz+ix*nz2; /* padded grid */ #ifdef SF_HAS_COMPLEX_H curr[j] = (curr1[j] + curr2[j])/2.; #else curr[j] = sf_crmul(curr1[j]+curr2[j],0.5); #endif } } } } else if (mode[0]=='x') { cfft2(curr,cwave); /* matrix multiplication */ for (im = 0; im < m2; im++) { for (ik = 0; ik < nk; ik++) { #ifdef SF_HAS_COMPLEX_H cwavem[ik] = cwave[ik]*rt[ik][im]; #else cwavem[ik] = sf_cmul(cwave[ik],rt[ik][im]); //complex multiplies complex #endif } icfft2(wave2[im],cwavem); } for (ix = 0; ix < nx; ix++) { for (iz=0; iz < nz; iz++) { i = iz+ix*nz; /* original grid */ j = iz+ix*nz2; /* padded grid */ c = sf_cmplx(0.,0.); for (im = 0; im < m2; im++) { #ifdef SF_HAS_COMPLEX_H c += lt[im][i]*wave2[im][j]; #else c += sf_cmul(lt[im][i], wave2[im][j]); #endif } curr[j] = c; } } /* matrix multiplication */ for (im = 0; im < m2; im++) { for (ix = 0; ix < nx; ix++) { for (iz=0; iz < nz; iz++) { i = iz+ix*nz; /* original grid */ j = iz+ix*nz2; /* padded grid */ #ifdef SF_HAS_COMPLEX_H currm[j] = lt[im][i]*curr[j]; #else currm[j] = sf_cmul(lt[im][i], curr[j]); #endif } } cfft2(currm,wave[im]); } for (ik = 0; ik < nk; ik++) { c = sf_cmplx(0.,0.); for (im = 0; im < m2; im++) { #ifdef SF_HAS_COMPLEX_H c += wave[im][ik]*rt[ik][im]; #else c += sf_cmul(wave[im][ik],rt[ik][im]); //complex multiplies complex #endif } cwavem[ik] = c; } icfft2(curr,cwavem); if (correct) { for (ix = 0; ix < nx2; ix++) { for (iz=0; iz < nz2; iz++) { i = iz+ix*nz; /* original grid */ j = iz+ix*nz2; /* padded grid */ if (ix<nx && iz<nz) { #ifdef SF_HAS_COMPLEX_H currm[j] = curr[j]/alpha[i]; #else currm[j] = sf_cdiv(curr[j],alpha[i]); #endif } else { currm[j] = sf_cmplx(0.,0.); } } } cfft2(currm,cwave); for (ik = 0; ik < nk; ik++) { #ifdef SF_HAS_COMPLEX_H cwavem[ik] = cwave[ik]/beta[ik]; #else cwavem[ik] = sf_cdiv(cwave[ik],beta[ik]); #endif } icfft2(curr,cwavem); for (ix = nx; ix < nx2; ix++) { for (iz=nz; iz < nz2; iz++) { j = iz+ix*nz2; /* padded grid */ curr[j] = sf_cmplx(0.,0.); } } } } else if (mode[0]=='n') { /* matrix multiplication */ for (im = 0; im < m2; im++) { for (ix = 0; ix < nx; ix++) { for (iz=0; iz < nz; iz++) { i = iz+ix*nz; /* original grid */ j = iz+ix*nz2; /* padded grid */ #ifdef SF_HAS_COMPLEX_H currm[j] = lt[im][i]*curr[j]; #else currm[j] = sf_cmul(lt[im][i], curr[j]); #endif } } cfft2(currm,wave[im]); } for (ik = 0; ik < nk; ik++) { c = sf_cmplx(0.,0.); for (im = 0; im < m2; im++) { #ifdef SF_HAS_COMPLEX_H c += wave[im][ik]*rt[ik][im]; #else c += sf_cmul(wave[im][ik],rt[ik][im]); //complex multiplies complex #endif } cwavem[ik] = c; } icfft2(curr,cwavem); /* matrix multiplication */ for (im = 0; im < m2; im++) { for (ix = 0; ix < nx; ix++) { for (iz=0; iz < nz; iz++) { i = iz+ix*nz; /* original grid */ j = iz+ix*nz2; /* padded grid */ #ifdef SF_HAS_COMPLEX_H currm[j] = lt[im][i]*curr[j]; #else currm[j] = sf_cmul(lt[im][i], curr[j]); #endif } } cfft2(currm,wave[im]); } for (ik = 0; ik < nk; ik++) { c = sf_cmplx(0.,0.); for (im = 0; im < m2; im++) { #ifdef SF_HAS_COMPLEX_H c += wave[im][ik]*rt[ik][im]; #else c += sf_cmul(wave[im][ik],rt[ik][im]); //complex multiplies complex #endif } cwavem[ik] = c; } icfft2(curr,cwavem); } else if (mode[0]=='p') { cfft2(curr,cwave); /* matrix multiplication */ for (im = 0; im < m2; im++) { for (ik = 0; ik < nk; ik++) { #ifdef SF_HAS_COMPLEX_H cwavem[ik] = cwave[ik]*rt[ik][im]; #else cwavem[ik] = sf_cmul(cwave[ik],rt[ik][im]); //complex multiplies complex #endif } icfft2(wave2[im],cwavem); } for (ix = 0; ix < nx; ix++) { for (iz=0; iz < nz; iz++) { i = iz+ix*nz; /* original grid */ j = iz+ix*nz2; /* padded grid */ c = sf_cmplx(0.,0.); for (im = 0; im < m2; im++) { #ifdef SF_HAS_COMPLEX_H c += lt[im][i]*wave2[im][j]; #else c += sf_cmul(lt[im][i], wave2[im][j]); #endif } curr[j] = c; } } cfft2(curr,cwave); /* matrix multiplication */ for (im = 0; im < m2; im++) { for (ik = 0; ik < nk; ik++) { #ifdef SF_HAS_COMPLEX_H cwavem[ik] = cwave[ik]*rt[ik][im]; #else cwavem[ik] = sf_cmul(cwave[ik],rt[ik][im]); //complex multiplies complex #endif } icfft2(wave2[im],cwavem); } for (ix = 0; ix < nx; ix++) { for (iz=0; iz < nz; iz++) { i = iz+ix*nz; /* original grid */ j = iz+ix*nz2; /* padded grid */ c = sf_cmplx(0.,0.); for (im = 0; im < m2; im++) { #ifdef SF_HAS_COMPLEX_H c += lt[im][i]*wave2[im][j]; #else c += sf_cmul(lt[im][i], wave2[im][j]); #endif } curr[j] = c; } } } else sf_error("Check mode parameter!"); } /* time stepping */ if(verb) sf_warning("."); /* output final result*/ for (ix=0; ix<nx; ix++) for (iz=0; iz<nz; iz++) cc[ix][iz] = curr[iz+ix*nz2]; cfft2_finalize(); return 0; }
int lrosfor2(sf_complex ***wavfld, float **sill, sf_complex **rcd, bool verb, sf_complex **lt, sf_complex **rt, int m2, geopar geop, sf_complex *ww, float *rr, int pad1) /*< low-rank one-step forward modeling >*/ { int it,iz,im,ik,ix,i,j; /* index variables */ int nxb,nzb,dx,dz,spx,spz,gpz,gpx,gpl,snpint,dt,nth=1,wfit; int nt,nz,nx, nk, nzx, nz2, nx2, nzx2; sf_complex c; sf_complex *cwave, *cwavem; sf_complex **wave, *curr; nx = geop->nx; nz = geop->nz; nxb = geop->nxb; nzb = geop->nzb; dx = geop->dx; dz = geop->dz; spx = geop->spx; spz = geop->spz; gpz = geop->gpz; gpx = geop->gpx; gpl = geop->gpl; snpint = geop->snpint; nt = geop->nt; dt = geop->dt; #ifdef _OPENMP #pragma omp parallel { nth = omp_get_num_threads(); } sf_warning(">>>> Using %d threads <<<<<", nth); #endif /*Matrix dimensions*/ nk = cfft2_init(pad1,nzb,nxb,&nz2,&nx2); nzx = nzb*nxb; nzx2 = nz2*nx2; curr = sf_complexalloc(nzx2); cwave = sf_complexalloc(nk); cwavem = sf_complexalloc(nk); wave = sf_complexalloc2(nzx2,m2); icfft2_allocate(cwavem); #ifdef _OPENMP #pragma omp parallel for private(iz) #endif for (iz=0; iz < nzx2; iz++) { curr[iz] = sf_cmplx(0.,0.); } /*Main loop*/ wfit = 0; for (it = 0; it < nt; it++) { if (verb) sf_warning("Forward source it=%d/%d;", it, nt-1); /*matrix multiplication*/ cfft2(curr,cwave); for (im = 0; im < m2; im++) { #ifdef _OPENMP #pragma omp parallel for private(ik) #endif for (ik = 0; ik < nk; ik++) { #ifdef SF_HAS_COMPLEX_H cwavem[ik] = cwave[ik]*rt[ik][im]; #else cwavem[ik] = sf_cmul(cwave[ik],rt[ik][im]); #endif } icfft2(wave[im],cwavem); } #ifdef _OPENMP #pragma omp parallel for private(ix,iz,i,j,im,c) shared(curr,lt,wave) #endif for (ix = 0; ix < nxb; ix++) { for (iz=0; iz < nzb; iz++) { i = iz+ix*nzb; /* original grid */ j = iz+ix*nz2; /* padded grid */ if ((it*dt)<=geop->trunc) { #ifdef SF_HAS_COMPLEX_H c = ww[it] * rr[i]; // source term #else c = sf_crmul(ww[it], rr[i]); // source term #endif } else { c = sf_cmplx(0.,0.); } for (im = 0; im < m2; im++) { #ifdef SF_HAS_COMPLEX_H c += lt[im][i]*wave[im][j]; #else c += sf_cmul(lt[im][i], wave[im][j]); #endif } curr[j] = c; } } #ifdef _OPENMP #pragma omp parallel for private(ix,j) #endif for ( ix =0 ; ix < gpl; ix++) { j = (gpz+geop->top)+(ix+gpx+geop->lft)*nz2; /* padded grid */ rcd[ix][it] = curr[j]; } if ( it%snpint == 0 ) { #ifdef _OPENMP #pragma omp parallel for private(ix,iz,j) #endif for ( ix = 0; ix < nx; ix++) { for ( iz = 0; iz<nz; iz++ ) { j = (iz+geop->top)+(ix+geop->lft)*nz2; /* padded grid */ wavfld[wfit][ix][iz] = curr[j]; sill[ix][iz] += pow(hypotf(crealf(curr[j]),cimagf(curr[j])),2); //sill[ix][iz] += pow(hypotf(crealf(wavfld[wfit][ix][iz]),cimagf(wavfld[wfit][ix][iz])),2); } } wfit++; } } /*Main loop*/ if (verb) sf_warning("."); cfft2_finalize(); return wfit; }
int main(int argc, char* argv[]) { bool verb,complx,sub,os; int it,iz,im,ik,ix,i,j; /* index variables */ int nt,nz,nx, m2, nk, nzx, nz2, nx2, nzx2, n2, pad1,nth; sf_complex c,old; /* I/O arrays*/ sf_complex *ww,*curr,*prev,*cwave,*cwavem,**wave,**lt, **rt; float *rcurr,*rr; sf_file Fw,Fr,Fo; /* I/O files */ sf_axis at,az,ax; /* cube axes */ sf_file left, right; sf_init(argc,argv); if(!sf_getbool("verb",&verb)) verb=false; /* verbosity */ if(!sf_getbool("cmplx",&complx)) complx=true; /* outputs complex wavefield */ if(!sf_getbool("os",&os)) os=true; /* one-step flag */ if (os) { sf_warning("One-step wave extrapolation"); if(!sf_getbool("sub",&sub)) sub=false; /* subtraction flag */ } else { sf_warning("Two-step wave extrapolation"); if(!sf_getbool("sub",&sub)) sub=true; /* subtraction flag */ } /* setup I/O files */ Fw = sf_input ("in" ); Fo = sf_output("out"); Fr = sf_input ("ref"); if (SF_COMPLEX != sf_gettype(Fw)) sf_error("Need complex input"); if (SF_FLOAT != sf_gettype(Fr)) sf_error("Need float ref"); if(complx) sf_settype(Fo,SF_COMPLEX); else sf_settype(Fo,SF_FLOAT); /* Read/Write axes */ at = sf_iaxa(Fw,1); nt = sf_n(at); az = sf_iaxa(Fr,1); nz = sf_n(az); ax = sf_iaxa(Fr,2); nx = sf_n(ax); sf_oaxa(Fo,az,1); sf_oaxa(Fo,ax,2); sf_oaxa(Fo,at,3); if (!sf_getint("pad1",&pad1)) pad1=1; /* padding factor on the first axis */ #ifdef _OPENMP #pragma omp parallel { nth = omp_get_num_threads(); } if (verb) sf_warning(">>>> Using %d threads <<<<<", nth); #endif nk = cfft2_init(pad1,nz,nx,&nz2,&nx2); nzx = nz*nx; nzx2 = nz2*nx2; /* propagator matrices */ left = sf_input("left"); right = sf_input("right"); if (!sf_histint(left,"n1",&n2) || n2 != nzx) sf_error("Need n1=%d in left",nzx); if (!sf_histint(left,"n2",&m2)) sf_error("Need n2= in left"); if (!sf_histint(right,"n1",&n2) || n2 != m2) sf_error("Need n1=%d in right",m2); if (!sf_histint(right,"n2",&n2) || n2 != nk) sf_error("Need n2=%d in right",nk); lt = sf_complexalloc2(nzx,m2); rt = sf_complexalloc2(m2,nk); sf_complexread(lt[0],nzx*m2,left); sf_complexread(rt[0],m2*nk,right); sf_fileclose(left); sf_fileclose(right); /* read wavelet & reflectivity */ ww=sf_complexalloc(nt); sf_complexread(ww,nt ,Fw); rr=sf_floatalloc(nzx); sf_floatread(rr,nzx,Fr); curr = sf_complexalloc(nzx2); if (!os) prev = sf_complexalloc(nzx2); else prev = NULL; if(!complx) rcurr = sf_floatalloc(nzx2); else rcurr=NULL; cwave = sf_complexalloc(nk); cwavem = sf_complexalloc(nk); wave = sf_complexalloc2(nzx2,m2); for (iz=0; iz < nzx2; iz++) { curr[iz] = sf_cmplx(0.,0.); if (!os) prev[iz] = sf_cmplx(0.,0.); if(!complx) rcurr[iz]= 0.; } /* MAIN LOOP */ for (it=0; it<nt; it++) { if(verb) sf_warning("it=%d;",it); /* matrix multiplication */ cfft2(curr,cwave); for (im = 0; im < m2; im++) { for (ik = 0; ik < nk; ik++) { #ifdef SF_HAS_COMPLEX_H cwavem[ik] = cwave[ik]*rt[ik][im]; #else cwavem[ik] = sf_cmul(cwave[ik],rt[ik][im]); //complex multiplies complex #endif } icfft2(wave[im],cwavem); } for (ix = 0; ix < nx; ix++) { for (iz=0; iz < nz; iz++) { i = iz+ix*nz; /* original grid */ j = iz+ix*nz2; /* padded grid */ #ifdef SF_HAS_COMPLEX_H c = ww[it] * rr[i]; // source term #else c = sf_crmul(ww[it], rr[i]); // source term #endif if (sub) c += curr[j]; if (!os) { old = curr[j]; #ifdef SF_HAS_COMPLEX_H c += sub? (old-prev[j]) : -prev[j]; #else c = sf_cadd(c,sub? sf_csub(old,prev[j]) : sf_cneg(prev[j])); #endif prev[j] = old; } for (im = 0; im < m2; im++) { #ifdef SF_HAS_COMPLEX_H c += lt[im][i]*wave[im][j]; #else c += sf_cmul(lt[im][i], wave[im][j]); #endif } curr[j] = c; if (!complx) rcurr[j] = crealf(c); } /* write wavefield to output */ if (complx) sf_complexwrite(curr+ix*nz2,nz,Fo); else sf_floatwrite(rcurr+ix*nz2,nz,Fo); } } if(verb) sf_warning("."); cfft2_finalize(); exit (0); }
int main(int argc, char *argv[]) { bool wantwf, verb; int ix, iz, is, it, wfit, im, ik, i, j, itau; int ns, nx, nz, nt, wfnt, rnx, rnz, nzx, rnzx, vnx, ntau, htau, nds; int scalet, snap, snapshot, fnx, fnz, fnzx, nk, nb; int rectx, rectz, repeat, gpz, n, m, pad1, trunc, spx, spz; float dt, t0, z0, dz, x0, dx, s0, ds, wfdt, srctrunc; float dtau, tau0, tau; int nr, ndr, nr0; char *path1, *path2, number[5], *left, *right; double tstart, tend; struct timeval tim; /*wavenumber domain tapering*/ int taper; float *ktp; float ktmp,kx_trs,kz_trs,thresh; float dkx,dkz,kx0,kz0; float kx,kz; int nkz; sf_complex c, **lt, **rt; sf_complex *ww, **dd, ***dd3; float ***img1, **img2, ***mig1, **mig2; float *rr, **ccr, **sill, ***fwf, ***bwf; sf_complex *cwave, *cwavem, **wave, *curr; sf_axis at, ax, az, atau; sf_file Fdat, Fsrc, Fimg1, Fimg2; sf_file Ffwf, Fbwf, Fvel; sf_file Fleft, Fright; int cpuid, numprocs, nth, nspad, iturn; float *sendbuf, *recvbuf; sf_complex *sendbufc, *recvbufc; MPI_Comm comm=MPI_COMM_WORLD; MPI_Init(&argc, &argv); MPI_Comm_rank(comm, &cpuid); MPI_Comm_size(comm, &numprocs); sf_init(argc, argv); #ifdef _OPENMP #pragma omp parallel { nth=omp_get_num_threads(); } sf_warning(">>> Using %d threads <<<", nth); #endif gettimeofday(&tim, NULL); tstart=tim.tv_sec+(tim.tv_usec/1000000.0); if (!sf_getint("taper",&taper)) taper=0; /* tapering in the frequency domain */ if (!sf_getfloat("thresh",&thresh)) thresh=0.92; /* tapering threshold */ if(!sf_getbool("wantwf", &wantwf)) wantwf=false; if(!sf_getbool("verb", &verb)) verb=false; if(!sf_getint("pad1", &pad1)) pad1=1; /* padding factor on the first axis */ if(!sf_getint("nb", &nb)) sf_error("Need nb= "); if(!sf_getfloat("srctrunc", &srctrunc)) srctrunc=0.4; if(!sf_getint("rectx", &rectx)) rectx=2; if(!sf_getint("rectz", &rectz)) rectz=2; if(!sf_getint("repeat", &repeat)) repeat=2; if(!sf_getint("scalet", &scalet)) scalet=1; if(!sf_getint("snap", &snap)) snap=100; /* interval of the output wavefield */ if(!sf_getint("snapshot", &snapshot)) snapshot=0; /* print out the wavefield snapshots of this shot */ if(!sf_getint("nds", &nds)) sf_error("Need nds=!"); /* source and receiver positions */ if(!sf_getint("gpz", &gpz)) sf_error("Need gpz="); if(!sf_getint("spx", &spx)) sf_error("Need spx="); if(!sf_getint("spz", &spz)) sf_error("Need spz="); /* tau parameters */ if(!sf_getint("ntau", &ntau)) sf_error("Need ntau="); if(!sf_getfloat("dtau", &dtau)) sf_error("Need dtau="); if(!sf_getfloat("tau0", &tau0)) sf_error("Need tau0="); /* geometry parameters */ if(!sf_getint("rnx", &rnx)) sf_error("Need rnx="); if(!sf_getint("ndr", &ndr)) ndr=1; if(!sf_getint("nr0", &nr0)) nr0=0; /* input/output files */ Fdat=sf_input("--input"); Fimg1=sf_output("--output"); Fimg2=sf_output("Fimg2"); Fsrc=sf_input("Fsrc"); Fvel=sf_input("Fpadvel"); if(wantwf){ Ffwf=sf_output("Ffwf"); Fbwf=sf_output("Fbwf"); } at=sf_iaxa(Fsrc, 1); nt=sf_n(at); dt=sf_d(at); t0=sf_o(at); ax=sf_iaxa(Fvel, 2); vnx=sf_n(ax); dx=sf_d(ax); x0=sf_o(ax); az=sf_iaxa(Fvel, 1); rnz=sf_n(az); dz=sf_d(az); z0=sf_o(az); if(!sf_histint(Fdat, "n2", &nr)) sf_error("Need n2= in input!"); if(!sf_histint(Fdat, "n3", &ns)) sf_error("Need n3= in input!"); if(!sf_histfloat(Fdat, "d3", &ds)) sf_error("Need d3= in input!"); if(!sf_histfloat(Fdat, "o3", &s0)) sf_error("Need o3= in input!"); wfnt=(nt-1)/scalet+1; wfdt=dt*scalet; /* double check the geometry parameters */ if(nds != (int)(ds/dx)) sf_error("Need ds/dx= %d", nds); //sf_warning("s0=%g, x0+(rnx-1)*dx/2=%g", s0, x0+(rnx-1)*dx/2); //if(s0 != x0+(rnx-1)*dx/2) sf_error("Wrong origin information!"); if(vnx != nds*(ns-1)+rnx) sf_error("Wrong dimension in x axis!"); /* set up the output files */ atau=sf_iaxa(Fsrc, 1); sf_setn(atau, ntau); sf_setd(atau, dtau); sf_seto(atau, tau0); sf_setlabel(atau, "Tau"); sf_setunit(atau, "s"); sf_oaxa(Fimg1, az, 1); sf_oaxa(Fimg1, ax, 2); sf_oaxa(Fimg1, atau, 3); sf_oaxa(Fimg2, az, 1); sf_oaxa(Fimg2, ax, 2); sf_putint(Fimg2, "n3", 1); sf_settype(Fimg1, SF_FLOAT); sf_settype(Fimg2, SF_FLOAT); if(wantwf){ sf_setn(ax, rnx); sf_seto(ax, -(rnx-1)*dx/2.0); sf_oaxa(Ffwf, az, 1); sf_oaxa(Ffwf, ax, 2); sf_putint(Ffwf, "n3", (wfnt-1)/snap+1); sf_putfloat(Ffwf, "d3", snap*wfdt); sf_putfloat(Ffwf, "o3", t0); sf_putstring(Ffwf, "label3", "Time"); sf_putstring(Ffwf, "unit3", "s"); sf_settype(Ffwf, SF_FLOAT); sf_oaxa(Fbwf, az, 1); sf_oaxa(Fbwf, ax, 2); sf_putint(Fbwf, "n3", (wfnt-1)/snap+1); sf_putfloat(Fbwf, "d3", -snap*wfdt); sf_putfloat(Fbwf, "o3", (wfnt-1)*wfdt); sf_putstring(Fbwf, "label3", "Time"); sf_putstring(Fbwf, "unit3", "s"); sf_settype(Fbwf, SF_FLOAT); } nx=rnx+2*nb; nz=rnz+2*nb; nzx=nx*nz; rnzx=rnz*rnx; nk=cfft2_init(pad1, nz, nx, &fnz, &fnx); fnzx=fnz*fnx; if(ns%numprocs==0) nspad=ns; else nspad=(ns/numprocs+1)*numprocs; /* print axies parameters for double check */ sf_warning("cpuid=%d, numprocs=%d, nspad=%d", cpuid, numprocs, nspad); sf_warning("nt=%d, dt=%g, scalet=%d, wfnt=%d, wfdt=%g",nt, dt, scalet, wfnt, wfdt); sf_warning("vnx=%d, nx=%d, dx=%g, nb=%d, rnx=%d", vnx, nx, dx, nb, rnx); sf_warning("nr=%d, ndr=%d, nr0=%g", nr, ndr, nr0); sf_warning("nz=%d, rnz=%d, dz=%g, z0=%g", nz, rnz, dz, z0); sf_warning("spx=%d, spz=%d, gpz=%d", spx, spz, gpz); sf_warning("ns=%d, ds=%g, s0=%g", ns, ds, s0); sf_warning("ntau=%d, dtau=%g, tau0=%g", ntau, dtau, tau0); sf_warning("nzx=%d, fnzx=%d, nk=%d", nzx, fnzx, nk); /* allocate storage and read data */ ww=sf_complexalloc(nt); sf_complexread(ww, nt, Fsrc); sf_fileclose(Fsrc); gpz=gpz+nb; spz=spz+nb; spx=spx+nb; nr0=nr0+nb; trunc=srctrunc/dt+0.5; dd=sf_complexalloc2(nt, nr); if(cpuid==0) dd3=sf_complexalloc3(nt, nr, numprocs); rr=sf_floatalloc(nzx); reflgen(nz, nx, spz, spx, rectz, rectx, repeat, rr); fwf=sf_floatalloc3(rnz, rnx, wfnt); bwf=sf_floatalloc3(rnz, rnx, wfnt); img1=sf_floatalloc3(rnz, vnx, ntau); img2=sf_floatalloc2(rnz, vnx); mig1=sf_floatalloc3(rnz, rnx, ntau); mig2=sf_floatalloc2(rnz, rnx); ccr=sf_floatalloc2(rnz, rnx); sill=sf_floatalloc2(rnz, rnx); curr=sf_complexalloc(fnzx); cwave=sf_complexalloc(nk); cwavem=sf_complexalloc(nk); icfft2_allocate(cwavem); if (taper!=0) { dkz = 1./(fnz*dz); kz0 = -0.5/dz; dkx = 1./(fnx*dx); kx0 = -0.5/dx; nkz = fnz; sf_warning("dkz=%f,dkx=%f,kz0=%f,kx0=%f",dkz,dkx,kz0,kx0); sf_warning("nk=%d,nkz=%d,nkx=%d",nk,nkz,fnx); kx_trs = thresh*fabs(0.5/dx); kz_trs = thresh*fabs(0.5/dz); sf_warning("Applying kz tapering below %f",kz_trs); sf_warning("Applying kx tapering below %f",kx_trs); ktp = sf_floatalloc(nk); /* constructing the tapering op */ for (ix=0; ix < fnx; ix++) { kx = kx0+ix*dkx; for (iz=0; iz < nkz; iz++) { kz = kz0+iz*dkz; ktmp = 1.; if (fabs(kx) > kx_trs) ktmp *= powf((2*kx_trs - fabs(kx))/(kx_trs),2); if (fabs(kz) > kz_trs) ktmp *= powf((2*kz_trs - fabs(kz))/(kz_trs),2); ktp[iz+ix*nkz] = ktmp; } } } /* initialize image tables that would be used for summing images */ #ifdef _OPENMP #pragma omp parallel for private(ix, iz, itau) #endif for(ix=0; ix<vnx; ix++){ for(iz=0; iz<rnz; iz++){ img2[ix][iz]=0.; for(itau=0; itau<ntau; itau++){ img1[itau][ix][iz]=0.; } } } path1=sf_getstring("path1"); path2=sf_getstring("path2"); if(path1==NULL) path1="./mat/left"; if(path2==NULL) path2="./mat/right"; /* shot loop */ for (iturn=0; iturn*numprocs<nspad; iturn++){ is=iturn*numprocs+cpuid; /* read data */ if(cpuid==0){ sf_seek(Fdat, ((off_t) is)*((off_t) nr)*((off_t) nt)*sizeof(float complex), SEEK_SET); if((iturn+1)*numprocs<=ns){ sf_complexread(dd3[0][0], nr*nt*numprocs, Fdat); }else{ sf_complexread(dd3[0][0], nr*nt*(ns-iturn*numprocs), Fdat); for(is=ns; is<nspad; is++) for(ix=0; ix<nr; ix++) for(it=0; it<nt; it++) dd3[is-iturn*numprocs][ix][it]=sf_cmplx(0.,0.); is=iturn*numprocs; } sendbufc=dd3[0][0]; recvbufc=dd[0]; }else{ sendbufc=NULL; recvbufc=dd[0]; } MPI_Scatter(sendbufc, nt*nr, MPI_COMPLEX, recvbufc, nt*nr, MPI_COMPLEX, 0, comm); if(is<ns){ /* effective shot loop */ /* construct the names of left and right matrices */ left=sf_charalloc(strlen(path1)); right=sf_charalloc(strlen(path2)); strcpy(left, path1); strcpy(right, path2); sprintf(number, "%d", is+1); strcat(left, number); strcat(right, number); Fleft=sf_input(left); Fright=sf_input(right); if(!sf_histint(Fleft, "n1", &n) || n != nzx) sf_error("Need n1=%d in Fleft", nzx); if(!sf_histint(Fleft, "n2", &m)) sf_error("No n2 in Fleft"); if(!sf_histint(Fright, "n1", &n) || n != m) sf_error("Need n1=%d in Fright", m); if(!sf_histint(Fright, "n2", &n) || n != nk) sf_error("Need n2=%d in Fright", nk); /* allocate storage for each shot migration */ lt=sf_complexalloc2(nzx, m); rt=sf_complexalloc2(m, nk); sf_complexread(lt[0], nzx*m, Fleft); sf_complexread(rt[0], m*nk, Fright); sf_fileclose(Fleft); sf_fileclose(Fright); /* initialize curr and imaging variables */ #ifdef _OPENMP #pragma omp parallel for private(iz) #endif for(iz=0; iz<fnzx; iz++){ curr[iz]=sf_cmplx(0.,0.); } #ifdef _OPENMP #pragma omp parallel for private(ix, iz, itau) #endif for(ix=0; ix<rnx; ix++){ for(iz=0; iz<rnz; iz++){ mig2[ix][iz]=0.; ccr[ix][iz]=0.; sill[ix][iz]=0.; for(itau=0; itau<ntau; itau++){ mig1[itau][ix][iz]=0.; } } } /* wave */ wave=sf_complexalloc2(fnzx, m); /* snapshot */ if(wantwf && is==snapshot) wantwf=true; else wantwf=false; /* forward propagation */ wfit=0; for(it=0; it<nt; it++){ if(verb) sf_warning("Forward propagation it=%d/%d",it+1, nt); cfft2(curr, cwave); for(im=0; im<m; im++){ #ifdef _OPENMP #pragma omp parallel for private(ik) #endif for(ik=0; ik<nk; ik++){ #ifdef SF_HAS_COMPLEX_H cwavem[ik]=cwave[ik]*rt[ik][im]; #else cwavem[ik]=sf_cmul(cwave[ik],rt[ik][im]); #endif } icfft2(wave[im],cwavem); } #ifdef _OPENMP #pragma omp parallel for private(ix, iz, i, j, im, c) shared(curr, it) #endif for(ix=0; ix<nx; ix++){ for(iz=0; iz<nz; iz++){ i=iz+ix*nz; j=iz+ix*fnz; if(it<trunc){ #ifdef SF_HAS_COMPLEX_H c=ww[it]*rr[i]; #else c=sf_crmul(ww[it],rr[i]); #endif }else{ c=sf_cmplx(0.,0.); } // c += curr[j]; for(im=0; im<m; im++){ #ifdef SF_HAS_COMPLEX_H c += lt[im][i]*wave[im][j]; #else c += sf_cmul(lt[im][i], wave[im][j]); #endif } curr[j]=c; } } if (taper!=0) { if (it%taper == 0) { cfft2(curr,cwave); for (ik = 0; ik < nk; ik++) { #ifdef SF_HAS_COMPLEX_H cwavem[ik] = cwave[ik]*ktp[ik]; #else cwavem[ik] = sf_crmul(cwave[ik],ktp[ik]); #endif } icfft2(curr,cwavem); } } if(it%scalet==0){ #ifdef _OPENMP #pragma omp parallel for private(ix, iz) #endif for(ix=0; ix<rnx; ix++){ for(iz=0; iz<rnz; iz++){ fwf[wfit][ix][iz]=crealf(curr[(ix+nb)*fnz+(iz+nb)]); } } wfit++; } } //end of it /* check wfnt */ if(wfit != wfnt) sf_error("At this point, wfit should be equal to wfnt"); /* backward propagation starts from here... */ #ifdef _OPENMP #pragma omp parallel for private(iz) #endif for(iz=0; iz<fnzx; iz++){ curr[iz]=sf_cmplx(0.,0.); } wfit=wfnt-1; for(it=nt-1; it>=0; it--){ if(verb) sf_warning("Backward propagation it=%d/%d",it+1, nt); #ifdef _OPENMP #pragma omp parallel for private(ix) #endif for(ix=0; ix<nr; ix++){ curr[(nr0+ix*ndr)*fnz+gpz]+=dd[ix][it]; } cfft2(curr, cwave); for(im=0; im<m; im++){ #ifdef _OPENMP #pragma omp parallel for private(ik) #endif for(ik=0; ik<nk; ik++){ #ifdef SF_HAS_COMPLEX_H cwavem[ik]=cwave[ik]*conjf(rt[ik][im]); #else cwavem[ik]=sf_cmul(cwave[ik],conjf(rt[ik][im])); #endif } icfft2(wave[im],cwavem); } #ifdef _OPENMP #pragma omp parallel for private(ix, iz, i, j, im, c) shared(curr, it) #endif for(ix=0; ix<nx; ix++){ for(iz=0; iz<nz; iz++){ i=iz+ix*nz; j=iz+ix*fnz; // c=curr[j]; c=sf_cmplx(0.,0.); for(im=0; im<m; im++){ #ifdef SF_HAS_COMPLEX_H c += conjf(lt[im][i])*wave[im][j]; #else c += sf_cmul(conjf(lt[im][i]), wave[im][j]); #endif } curr[j]=c; } } if (taper!=0) { if (it%taper == 0) { cfft2(curr,cwave); for (ik = 0; ik < nk; ik++) { #ifdef SF_HAS_COMPLEX_H cwavem[ik] = cwave[ik]*ktp[ik]; #else cwavem[ik] = sf_crmul(cwave[ik],ktp[ik]); #endif } icfft2(curr,cwavem); } } if(it%scalet==0){ #ifdef _OPENMP #pragma omp parallel for private(ix, iz) #endif for(ix=0; ix<rnx; ix++){ for(iz=0; iz<rnz; iz++){ bwf[wfit][ix][iz]=crealf(curr[(ix+nb)*fnz+(iz+nb)]); ccr[ix][iz] += fwf[wfit][ix][iz]*bwf[wfit][ix][iz]; sill[ix][iz] += fwf[wfit][ix][iz]*fwf[wfit][ix][iz]; } } wfit--; } } //end of it if(wfit != -1) sf_error("Check program! The final wfit should be -1!"); /* free storage */ free(*rt); free(rt); free(*lt); free(lt); free(*wave); free(wave); free(left); free(right); /* normalized image */ #ifdef _OPENMP #pragma omp parallel for private(ix, iz) #endif for (ix=0; ix<rnx; ix++){ for(iz=0; iz<rnz; iz++){ mig2[ix][iz]=ccr[ix][iz]/(sill[ix][iz]+SF_EPS); // sill[ix][iz]=0.; } } /* time-shift imaging condition */ for(itau=0; itau<ntau; itau++){ //sf_warning("itau/ntau=%d/%d", itau+1, ntau); tau=itau*dtau+tau0; htau=tau/wfdt; for(it=abs(htau); it<wfnt-abs(htau); it++){ #ifdef _OPENMP #pragma omp parallel for private(ix, iz) #endif for(ix=0; ix<rnx; ix++){ for(iz=0; iz<rnz; iz++){ mig1[itau][ix][iz]+=fwf[it+htau][ix][iz]*bwf[it-htau][ix][iz]; // sill[ix][iz]+=fwf[it+htau][ix][iz]*fwf[it+htau][ix][iz]; } // end of iz } // end of ix } // end of it //#ifdef _OPENMP //#pragma omp parallel for private(ix, iz) //#endif /* source illumination */ // for(ix=0; ix<rnx; ix++){ // for(iz=0; iz<rnz; iz++){ // mig1[itau][ix][iz] = mig1[itau][ix][iz]/(sill[ix][iz]+SF_EPS); // } // } } //end of itau /* output wavefield snapshot */ if(wantwf){ for(it=0; it<wfnt; it++){ if(it%snap==0){ sf_floatwrite(fwf[it][0], rnzx, Ffwf); sf_floatwrite(bwf[wfnt-1-it][0], rnzx, Fbwf); } } sf_fileclose(Ffwf); sf_fileclose(Fbwf); } /* add all the shot images that are on the same node */ #ifdef _OPENMP #pragma omp parallel for private(itau, ix, iz) #endif for(itau=0; itau<ntau; itau++){ for(ix=0; ix<rnx; ix++){ for(iz=0; iz<rnz; iz++){ img1[itau][ix+is*nds][iz] += mig1[itau][ix][iz]; } } } #ifdef _OPENMP #pragma omp parallel for private(ix, iz) #endif for(ix=0; ix<rnx; ix++){ for(iz=0; iz<rnz; iz++){ img2[ix+is*nds][iz] += mig2[ix][iz]; } } } // end of is<ns } // end of iturn ////////////////end of ishot MPI_Barrier(comm); cfft2_finalize(); sf_fileclose(Fdat); free(ww); free(rr); free(*dd); free(dd); if(cpuid==0) {free(**dd3); free(*dd3); free(dd3);} free(cwave); free(cwavem); free(curr); free(*ccr); free(ccr); free(*sill); free(sill); free(**fwf); free(*fwf); free(fwf); free(**bwf); free(*bwf); free(bwf); free(**mig1); free(*mig1); free(mig1); free(*mig2); free(mig2); /* sum image */ if(cpuid==0){ sendbuf=(float *)MPI_IN_PLACE; recvbuf=img1[0][0]; }else{ sendbuf=img1[0][0]; recvbuf=NULL; } MPI_Reduce(sendbuf, recvbuf, ntau*vnx*rnz, MPI_FLOAT, MPI_SUM, 0, comm); if(cpuid==0){ sendbuf=MPI_IN_PLACE; recvbuf=img2[0]; }else{ sendbuf=img2[0]; recvbuf=NULL; } MPI_Reduce(sendbuf, recvbuf, vnx*rnz, MPI_FLOAT, MPI_SUM, 0, comm); /* output image */ if(cpuid==0){ sf_floatwrite(img1[0][0], ntau*vnx*rnz, Fimg1); sf_floatwrite(img2[0], vnx*rnz, Fimg2); } MPI_Barrier(comm); sf_fileclose(Fimg1); sf_fileclose(Fimg2); free(**img1); free(*img1); free(img1); free(*img2); free(img2); gettimeofday(&tim, NULL); tend=tim.tv_sec+(tim.tv_usec/1000000.0); sf_warning(">> The computing time is %.3lf minutes <<", (tend-tstart)/60.); MPI_Finalize(); exit(0); }
int prop1Pa(sf_complex *input, sf_complex *output, sf_complex *lt, sf_complex *rt, int nz, int nx, int nkzx, int m2) /*< Just nsps(-) >*/ { int iz, ix, im, ik, i, j; int nz2, nx2, nk, nzx, nzx2; int pad1 = 1; sf_complex **wave, **wave2, *curr, *currm, *cwave, *cwavem, c; nk = cfft2_init(pad1,nz,nx,&nz2,&nx2); if (nk!=nkzx) sf_error("nk discrepancy!"); nzx = nz*nx; nzx2 = nz2*nx2; curr = sf_complexalloc(nzx2); currm = sf_complexalloc(nzx2); cwave = sf_complexalloc(nk); cwavem = sf_complexalloc(nk); wave = sf_complexalloc2(nk,m2); wave2 = sf_complexalloc2(nzx2,m2); icfft2_allocate(cwave); /* initialization */ for (ix = 0; ix < nx2; ix++) { for (iz=0; iz < nz2; iz++) { i = iz+ix*nz; j = iz+ix*nz2; if (ix<nx && iz<nz) curr[j] = input[i]; else curr[j] = sf_cmplx(0.,0.); } } /* nsps(-) */ /* matrix multiplication */ for (im = 0; im < m2; im++) { for (ix = 0; ix < nx; ix++) { for (iz=0; iz < nz; iz++) { i = iz+ix*nz; /* original grid */ j = iz+ix*nz2; /* padded grid */ #ifdef SF_HAS_COMPLEX_H currm[j] = conjf(lt[im*nzx+i])*curr[j]; #else currm[j] = sf_cmul(conjf(lt[im*nzx+i]), curr[j]); #endif } } cfft2(currm,wave[im]); } for (ik = 0; ik < nk; ik++) { c = sf_cmplx(0.,0.); for (im = 0; im < m2; im++) { #ifdef SF_HAS_COMPLEX_H c += wave[im][ik]*conjf(rt[ik*m2+im]); #else c += sf_cmul(wave[im][ik],conjf(rt[ik*m2+im])); #endif } cwave[ik] = c; } /* saving a pair of FFTs */ icfft2(curr,cwave); /* output final result*/ for (ix = 0; ix < nx; ix++) { for (iz=0; iz < nz; iz++) { i = iz+ix*nz; j = iz+ix*nz2; output[i] = curr[j]; } } cfft2_finalize(); return 0; }
int main(int argc, char* argv[]) { bool verb,complx,sub,os; int it,iz,im,ik,ix,i,j; /* index variables */ int nt,nz,nx, m2, nk, nzx, nz2, nx2, nzx2, n2, pad1,nth; sf_complex c,old; int snap; /* I/O arrays*/ sf_complex *ww,*curr,*prev,*cwave,*cwavem,**wave,**lt, **rt; float *rcurr,*rr; sf_file Fw,Fr,Fo; /* I/O files */ sf_axis at,az,ax; /* cube axes */ sf_file left, right; sf_file snaps; /*for tapering*/ float dt,dx,dz,dkx,dkz,kx0,kz0,kx,kz,ktmp,kx_trs,kz_trs,thresh; float *ktp; int taper; sf_init(argc,argv); if(!sf_getbool("verb",&verb)) verb=false; /* verbosity */ if(!sf_getbool("cmplx",&complx)) complx=true; /* outputs complex wavefield */ if(!sf_getbool("os",&os)) os=true; /* one-step flag */ if (os) { sf_warning("One-step wave extrapolation"); if(!sf_getbool("sub",&sub)) sub=false; /* subtraction flag */ } else { sf_warning("Two-step wave extrapolation"); if(!sf_getbool("sub",&sub)) sub=true; /* subtraction flag */ } if (!sf_getint("taper",&taper)) taper=0; /* tapering in the frequency domain */ if (!sf_getfloat("thresh",&thresh)) thresh=0.92; /* tapering threshold */ /* setup I/O files */ Fw = sf_input ("in" ); Fo = sf_output("out"); Fr = sf_input ("ref"); if (SF_COMPLEX != sf_gettype(Fw)) sf_error("Need complex input"); if (SF_FLOAT != sf_gettype(Fr)) sf_error("Need float ref"); if(complx) sf_settype(Fo,SF_COMPLEX); else sf_settype(Fo,SF_FLOAT); /* Read/Write axes */ at = sf_iaxa(Fw,1); nt = sf_n(at); dt = sf_d(at); az = sf_iaxa(Fr,1); nz = sf_n(az); dz = sf_d(az); ax = sf_iaxa(Fr,2); nx = sf_n(ax); dx = sf_d(ax); sf_oaxa(Fo,az,1); sf_oaxa(Fo,ax,2); if (!sf_getint("snap",&snap)) snap=0; /* interval for snapshots */ if (snap > 0) { snaps = sf_output("snaps"); /* (optional) snapshot file */ sf_oaxa(snaps,az,1); sf_oaxa(snaps,ax,2); sf_oaxa(snaps,at,3); sf_putint(snaps,"n3",nt/snap); sf_putfloat(snaps,"d3",dt*snap); sf_putfloat(snaps,"o3",0.); if(complx) sf_settype(snaps,SF_COMPLEX); else sf_settype(snaps,SF_FLOAT); } else { snaps = NULL; } if (!sf_getint("pad1",&pad1)) pad1=1; /* padding factor on the first axis */ #ifdef _OPENMP #pragma omp parallel { nth = omp_get_num_threads(); } if (verb) sf_warning(">>>> Using %d threads <<<<<", nth); #endif nk = cfft2_init(pad1,nz,nx,&nz2,&nx2); nzx = nz*nx; nzx2 = nz2*nx2; /* propagator matrices */ left = sf_input("left"); right = sf_input("right"); if (!sf_histint(left,"n1",&n2) || n2 != nzx) sf_error("Need n1=%d in left",nzx); if (!sf_histint(left,"n2",&m2)) sf_error("Need n2= in left"); if (!sf_histint(right,"n1",&n2) || n2 != m2) sf_error("Need n1=%d in right",m2); if (!sf_histint(right,"n2",&n2) || n2 != nk) sf_error("Need n2=%d in right",nk); lt = sf_complexalloc2(nzx,m2); rt = sf_complexalloc2(m2,nk); sf_complexread(lt[0],nzx*m2,left); sf_complexread(rt[0],m2*nk,right); sf_fileclose(left); sf_fileclose(right); /* read wavelet & reflectivity */ ww=sf_complexalloc(nt); sf_complexread(ww,nt ,Fw); rr=sf_floatalloc(nzx); sf_floatread(rr,nzx,Fr); curr = sf_complexalloc(nzx2); if (!os) prev = sf_complexalloc(nzx2); else prev = NULL; if(!complx) rcurr = sf_floatalloc(nzx2); else rcurr=NULL; cwave = sf_complexalloc(nk); cwavem = sf_complexalloc(nk); wave = sf_complexalloc2(nzx2,m2); /*icfft2_allocate(cwavem);*/ for (iz=0; iz < nzx2; iz++) { curr[iz] = sf_cmplx(0.,0.); if (!os) prev[iz] = sf_cmplx(0.,0.); if(!complx) rcurr[iz]= 0.; } if (taper!=0) { dkz = 1./(nz2*dz); kz0 = -0.5/dz; dkx = 1./(nx2*dx); kx0 = -0.5/dx; kx_trs = thresh*fabs(0.5/dx); kz_trs = thresh*fabs(0.5/dz); sf_warning("dkz=%f,dkx=%f,kz0=%f,kx0=%f",dkz,dkx,kz0,kx0); sf_warning("nk=%d,nkz=%d,nkx=%d",nk,nz2,nx2); sf_warning("Applying kz tapering below %f",kz_trs); sf_warning("Applying kx tapering below %f",kx_trs); ktp = sf_floatalloc(nk); /* constructing the tapering op */ for (ix=0; ix < nx2; ix++) { kx = kx0+ix*dkx; for (iz=0; iz < nz2; iz++) { kz = kz0+iz*dkz; ktmp = 1.; if (fabs(kx) > kx_trs) ktmp *= (fabs(kx)>kx_trs)? powf((fabs(kx0)-fabs(kx)+kx_trs)/kx0,2) : 1.; if (fabs(kz) > kz_trs) ktmp *= (fabs(kz)>kz_trs)? powf((fabs(kz0)-fabs(kz)+kz_trs)/kz0,2) : 1.; ktp[iz+ix*nz2] = ktmp; } } } /* MAIN LOOP */ for (it=0; it<nt; it++) { if(verb) sf_warning("it=%d;",it); /* matrix multiplication */ cfft2(curr,cwave); for (im = 0; im < m2; im++) { for (ik = 0; ik < nk; ik++) { #ifdef SF_HAS_COMPLEX_H cwavem[ik] = cwave[ik]*rt[ik][im]; #else cwavem[ik] = sf_cmul(cwave[ik],rt[ik][im]); //complex multiplies complex #endif } icfft2(wave[im],cwavem); } for (ix = 0; ix < nx; ix++) { for (iz=0; iz < nz; iz++) { i = iz+ix*nz; /* original grid */ j = iz+ix*nz2; /* padded grid */ #ifdef SF_HAS_COMPLEX_H c = ww[it] * rr[i]; // source term #else c = sf_crmul(ww[it], rr[i]); // source term #endif if (sub) c += curr[j]; if (!os) { old = curr[j]; #ifdef SF_HAS_COMPLEX_H c += sub? (old-prev[j]) : -prev[j]; #else c = sf_cadd(c,sub? sf_csub(old,prev[j]) : sf_cneg(prev[j])); #endif prev[j] = old; } for (im = 0; im < m2; im++) { #ifdef SF_HAS_COMPLEX_H c += lt[im][i]*wave[im][j]; #else c += sf_cmul(lt[im][i], wave[im][j]); #endif } curr[j] = c; if (!complx) rcurr[j] = crealf(c); } if (NULL != snaps && 0 == it%snap) { /* write wavefield snapshots */ if (complx) sf_complexwrite(curr+ix*nz2,nz,snaps); else sf_floatwrite(rcurr+ix*nz2,nz,snaps); } } if (taper!=0) { if (it%taper == 0) { cfft2(curr,cwave); for (ik = 0; ik < nk; ik++) { #ifdef SF_HAS_COMPLEX_H cwavem[ik] = cwave[ik]*ktp[ik]; #else cwavem[ik] = sf_crmul(cwave[ik],ktp[ik]); #endif } icfft2(curr,cwavem); if (!os) { cfft2(prev,cwave); for (ik = 0; ik < nk; ik++) { #ifdef SF_HAS_COMPLEX_H cwavem[ik] = cwave[ik]*ktp[ik]; #else cwavem[ik] = sf_crmul(cwave[ik],ktp[ik]); #endif } icfft2(prev,cwavem); } } } } if(verb) sf_warning("."); /* write final wavefield to output */ for (ix = 0; ix < nx; ix++) { if (complx) sf_complexwrite(curr+ix*nz2,nz,Fo); else sf_floatwrite(rcurr+ix*nz2,nz,Fo); } cfft2_finalize(); exit (0); }
/** Time domain physical simulation. noisy: - 0: no noise at all; - 1: poisson and read out noise. - 2: only poisson noise. */ dmat *skysim_sim(dmat **mresout, const dmat *mideal, const dmat *mideal_oa, double ngsol, ASTER_S *aster, const POWFS_S *powfs, const PARMS_S *parms, int idtratc, int noisy, int phystart){ int dtratc=0; if(!parms->skyc.multirate){ dtratc=parms->skyc.dtrats->p[idtratc]; } int hasphy; if(phystart>-1 && phystart<aster->nstep){ hasphy=1; }else{ hasphy=0; } const int nmod=mideal->nx; dmat *res=dnew(6,1);/*Results. 1-2: NGS and TT modes., 3-4:On axis NGS and TT modes, 4-6: On axis NGS and TT wihtout considering un-orthogonality.*/ dmat *mreal=NULL;/*modal correction at this step. */ dmat *merr=dnew(nmod,1);/*modal error */ dcell *merrm=dcellnew(1,1);dcell *pmerrm=NULL; const int nstep=aster->nstep?aster->nstep:parms->maos.nstep; dmat *mres=dnew(nmod,nstep); dmat* rnefs=parms->skyc.rnefs; dcell *zgradc=dcellnew3(aster->nwfs, 1, aster->ngs, 0); dcell *gradout=dcellnew3(aster->nwfs, 1, aster->ngs, 0); dmat *gradsave=0; if(parms->skyc.dbg){ gradsave=dnew(aster->tsa*2,nstep); } SERVO_T *st2t=0; kalman_t *kalman=0; dcell *mpsol=0; dmat *pgm=0; dmat *dtrats=0; int multirate=parms->skyc.multirate; if(multirate){ kalman=aster->kalman[0]; dtrats=aster->dtrats; }else{ if(parms->skyc.servo>0){ const double dtngs=parms->maos.dt*dtratc; st2t=servo_new(merrm, NULL, 0, dtngs, aster->gain->p[idtratc]); pgm=aster->pgm->p[idtratc]; }else{ kalman=aster->kalman[idtratc]; } } if(kalman){ kalman_init(kalman); mpsol=dcellnew(aster->nwfs, 1); //for psol grad. } const long nwvl=parms->maos.nwvl; dcell **psf=0, **mtche=0, **ints=0; ccell *wvf=0,*wvfc=0, *otf=0; if(hasphy){ psf=mycalloc(aster->nwfs,dcell*); wvf=ccellnew(aster->nwfs,1); wvfc=ccellnew(aster->nwfs,1); mtche=mycalloc(aster->nwfs,dcell*); ints=mycalloc(aster->nwfs,dcell*); otf=ccellnew(aster->nwfs,1); for(long iwfs=0; iwfs<aster->nwfs; iwfs++){ const int ipowfs=aster->wfs[iwfs].ipowfs; const long ncomp=parms->maos.ncomp[ipowfs]; const long nsa=parms->maos.nsa[ipowfs]; wvf->p[iwfs]=cnew(ncomp,ncomp); wvfc->p[iwfs]=NULL; psf[iwfs]=dcellnew(nsa,nwvl); //cfft2plan(wvf->p[iwfs], -1); if(parms->skyc.multirate){ mtche[iwfs]=aster->wfs[iwfs].pistat->mtche[(int)aster->idtrats->p[iwfs]]; }else{ mtche[iwfs]=aster->wfs[iwfs].pistat->mtche[idtratc]; } otf->p[iwfs]=cnew(ncomp,ncomp); //cfft2plan(otf->p[iwfs],-1); //cfft2plan(otf->p[iwfs],1); ints[iwfs]=dcellnew(nsa,1); int pixpsa=parms->skyc.pixpsa[ipowfs]; for(long isa=0; isa<nsa; isa++){ ints[iwfs]->p[isa]=dnew(pixpsa,pixpsa); } } } for(int irep=0; irep<parms->skyc.navg; irep++){ if(kalman){ kalman_init(kalman); }else{ servo_reset(st2t); } dcellzero(zgradc); dcellzero(gradout); if(ints){ for(int iwfs=0; iwfs<aster->nwfs; iwfs++){ dcellzero(ints[iwfs]); } } for(int istep=0; istep<nstep; istep++){ memcpy(merr->p, PCOL(mideal,istep), nmod*sizeof(double)); dadd(&merr, 1, mreal, -1);/*form NGS mode error; */ memcpy(PCOL(mres,istep),merr->p,sizeof(double)*nmod); if(mpsol){//collect averaged modes for PSOL. for(long iwfs=0; iwfs<aster->nwfs; iwfs++){ dadd(&mpsol->p[iwfs], 1, mreal, 1); } } pmerrm=0; if(istep>=parms->skyc.evlstart){/*performance evaluation*/ double res_ngs=dwdot(merr->p,parms->maos.mcc,merr->p); if(res_ngs>ngsol*100){ dfree(res); res=NULL; break; } { res->p[0]+=res_ngs; res->p[1]+=dwdot2(merr->p,parms->maos.mcc_tt,merr->p); double dot_oa=dwdot(merr->p, parms->maos.mcc_oa, merr->p); double dot_res_ideal=dwdot(merr->p, parms->maos.mcc_oa, PCOL(mideal,istep)); double dot_res_oa=0; for(int imod=0; imod<nmod; imod++){ dot_res_oa+=merr->p[imod]*IND(mideal_oa,imod,istep); } res->p[2]+=dot_oa-2*dot_res_ideal+2*dot_res_oa; res->p[4]+=dot_oa; } { double dot_oa_tt=dwdot2(merr->p, parms->maos.mcc_oa_tt, merr->p); /*Notice that mcc_oa_tt2 is 2x5 marix. */ double dot_res_ideal_tt=dwdot(merr->p, parms->maos.mcc_oa_tt2, PCOL(mideal,istep)); double dot_res_oa_tt=0; for(int imod=0; imod<2; imod++){ dot_res_oa_tt+=merr->p[imod]*IND(mideal_oa,imod,istep); } res->p[3]+=dot_oa_tt-2*dot_res_ideal_tt+2*dot_res_oa_tt; res->p[5]+=dot_oa_tt; } }//if evl if(istep<phystart || phystart<0){ /*Ztilt, noise free simulation for acquisition. */ dmm(&zgradc->m, 1, aster->gm, merr, "nn", 1);/*grad due to residual NGS mode. */ for(int iwfs=0; iwfs<aster->nwfs; iwfs++){ const int ipowfs=aster->wfs[iwfs].ipowfs; const long ng=parms->maos.nsa[ipowfs]*2; for(long ig=0; ig<ng; ig++){ zgradc->p[iwfs]->p[ig]+=aster->wfs[iwfs].ztiltout->p[istep*ng+ig]; } } for(int iwfs=0; iwfs<aster->nwfs; iwfs++){ int dtrati=(multirate?(int)dtrats->p[iwfs]:dtratc); if((istep+1) % dtrati==0){ dadd(&gradout->p[iwfs], 0, zgradc->p[iwfs], 1./dtrati); dzero(zgradc->p[iwfs]); if(noisy){ int idtrati=(multirate?(int)aster->idtrats->p[iwfs]:idtratc); dmat *nea=aster->wfs[iwfs].pistat->sanea->p[idtrati]; for(int i=0; i<nea->nx; i++){ gradout->p[iwfs]->p[i]+=nea->p[i]*randn(&aster->rand); } } pmerrm=merrm;//record output. } } }else{ /*Accumulate PSF intensities*/ for(long iwfs=0; iwfs<aster->nwfs; iwfs++){ const double thetax=aster->wfs[iwfs].thetax; const double thetay=aster->wfs[iwfs].thetay; const int ipowfs=aster->wfs[iwfs].ipowfs; const long nsa=parms->maos.nsa[ipowfs]; ccell* wvfout=aster->wfs[iwfs].wvfout[istep]; for(long iwvl=0; iwvl<nwvl; iwvl++){ double wvl=parms->maos.wvl[iwvl]; for(long isa=0; isa<nsa; isa++){ ccp(&wvfc->p[iwfs], IND(wvfout,isa,iwvl)); /*Apply NGS mode error to PSF. */ ngsmod2wvf(wvfc->p[iwfs], wvl, merr, powfs+ipowfs, isa, thetax, thetay, parms); cembedc(wvf->p[iwfs],wvfc->p[iwfs],0,C_FULL); cfft2(wvf->p[iwfs],-1); /*peak in corner. */ cabs22d(&psf[iwfs]->p[isa+nsa*iwvl], 1., wvf->p[iwfs], 1.); }/*isa */ }/*iwvl */ }/*iwfs */ /*Form detector image from accumulated PSFs*/ double igrad[2]; for(long iwfs=0; iwfs<aster->nwfs; iwfs++){ int dtrati=dtratc, idtrat=idtratc; if(multirate){//multirate idtrat=aster->idtrats->p[iwfs]; dtrati=dtrats->p[iwfs]; } if((istep+1) % dtrati == 0){/*has output */ dcellzero(ints[iwfs]); const int ipowfs=aster->wfs[iwfs].ipowfs; const long nsa=parms->maos.nsa[ipowfs]; for(long isa=0; isa<nsa; isa++){ for(long iwvl=0; iwvl<nwvl; iwvl++){ double siglev=aster->wfs[iwfs].siglev->p[iwvl]; ccpd(&otf->p[iwfs],psf[iwfs]->p[isa+nsa*iwvl]); cfft2i(otf->p[iwfs], 1); /*turn to OTF, peak in corner */ ccwm(otf->p[iwfs], powfs[ipowfs].dtf[iwvl].nominal); cfft2(otf->p[iwfs], -1); dspmulcreal(ints[iwfs]->p[isa]->p, powfs[ipowfs].dtf[iwvl].si, otf->p[iwfs]->p, siglev); } /*Add noise and apply matched filter. */ #if _OPENMP >= 200805 #pragma omp critical #endif switch(noisy){ case 0:/*no noise at all. */ break; case 1:/*both poisson and read out noise. */ { double bkgrnd=aster->wfs[iwfs].bkgrnd*dtrati; addnoise(ints[iwfs]->p[isa], &aster->rand, bkgrnd, bkgrnd, 0,0,IND(rnefs,idtrat,ipowfs)); } break; case 2:/*there is still poisson noise. */ addnoise(ints[iwfs]->p[isa], &aster->rand, 0, 0, 0,0,0); break; default: error("Invalid noisy\n"); } igrad[0]=0; igrad[1]=0; double pixtheta=parms->skyc.pixtheta[ipowfs]; if(parms->skyc.mtch){ dmulvec(igrad, mtche[iwfs]->p[isa], ints[iwfs]->p[isa]->p, 1); } if(!parms->skyc.mtch || fabs(igrad[0])>pixtheta || fabs(igrad[1])>pixtheta){ if(!parms->skyc.mtch){ warning2("fall back to cog\n"); }else{ warning_once("mtch is out of range\n"); } dcog(igrad, ints[iwfs]->p[isa], 0, 0, 0, 3*IND(rnefs,idtrat,ipowfs), 0); igrad[0]*=pixtheta; igrad[1]*=pixtheta; } gradout->p[iwfs]->p[isa]=igrad[0]; gradout->p[iwfs]->p[isa+nsa]=igrad[1]; }/*isa */ pmerrm=merrm; dcellzero(psf[iwfs]);/*reset accumulation.*/ }/*if iwfs has output*/ }/*for wfs*/ }/*if phystart */ //output to mreal after using it to ensure two cycle delay. if(st2t){//Type I or II control. if(st2t->mint->p[0]){//has output. dcp(&mreal, st2t->mint->p[0]->p[0]); } }else{//LQG control kalman_output(kalman, &mreal, 0, 1); } if(kalman){//LQG control int indk=0; //Form PSOL grads and obtain index to LQG M for(int iwfs=0; iwfs<aster->nwfs; iwfs++){ int dtrati=(multirate?(int)dtrats->p[iwfs]:dtratc); if((istep+1) % dtrati==0){ indk|=1<<iwfs; dmm(&gradout->p[iwfs], 1, aster->g->p[iwfs], mpsol->p[iwfs], "nn", 1./dtrati); dzero(mpsol->p[iwfs]); } } if(indk){ kalman_update(kalman, gradout->m, indk-1); } }else if(st2t){ if(pmerrm){ dmm(&merrm->p[0], 0, pgm, gradout->m, "nn", 1); } servo_filter(st2t, pmerrm);//do even if merrm is zero. to simulate additional latency } if(parms->skyc.dbg){ memcpy(PCOL(gradsave, istep), gradout->m->p, sizeof(double)*gradsave->nx); } }/*istep; */ } if(parms->skyc.dbg){ int dtrati=(multirate?(int)dtrats->p[0]:dtratc); writebin(gradsave,"%s/skysim_grads_aster%d_dtrat%d",dirsetup, aster->iaster,dtrati); writebin(mres,"%s/skysim_sim_mres_aster%d_dtrat%d",dirsetup,aster->iaster,dtrati); } dfree(mreal); dcellfree(mpsol); dfree(merr); dcellfree(merrm); dcellfree(zgradc); dcellfree(gradout); dfree(gradsave); if(hasphy){ dcellfreearr(psf, aster->nwfs); dcellfreearr(ints, aster->nwfs); ccellfree(wvf); ccellfree(wvfc); ccellfree(otf); free(mtche); } servo_free(st2t); /*dfree(mres); */ if(mresout) { *mresout=mres; }else{ dfree(mres); } dscale(res, 1./((nstep-parms->skyc.evlstart)*parms->skyc.navg)); return res; }
int main ( void ) /******************************************************************************/ /* Purpose: MAIN is the main program for FFT_SERIAL. Discussion: The "complex" vector A is actually stored as a double vector B. The "complex" vector entry A[I] is stored as: B[I*2+0], the real part, B[I*2+1], the imaginary part. Modified: 23 March 2009 Author: Original C version by Wesley Petersen. This C version by John Burkardt. Reference: Wesley Petersen, Peter Arbenz, Introduction to Parallel Computing - A practical guide with examples in C, Oxford University Press, ISBN: 0-19-851576-6, LC: QA76.58.P47. */ { double ctime; double ctime1; double ctime2; double error; int first; double flops; double fnm1; int i; int icase; int it; int ln2; double mflops; int n; int nits = 10000; static double seed; double sgn; double *w; double *x; double *y; double *z; double z0; double z1; timestamp ( ); printf ( "\n" ); printf ( "FFT_SERIAL\n" ); printf ( " C version\n" ); printf ( "\n" ); printf ( " Demonstrate an implementation of the Fast Fourier Transform\n" ); printf ( " of a complex data vector.\n" ); /* Prepare for tests. */ printf ( "\n" ); printf ( " Accuracy check:\n" ); printf ( "\n" ); printf ( " FFT ( FFT ( X(1:N) ) ) == N * X(1:N)\n" ); printf ( "\n" ); printf ( " N NITS Error Time Time/Call MFLOPS\n" ); printf ( "\n" ); seed = 331.0; n = 1; /* LN2 is the log base 2 of N. Each increase of LN2 doubles N. */ for ( ln2 = 1; ln2 <= 20; ln2++ ) { n = 2 * n; /* Allocate storage for the complex arrays W, X, Y, Z. We handle the complex arithmetic, and store a complex number as a pair of doubles, a complex vector as a doubly dimensioned array whose second dimension is 2. */ w = ( double * ) malloc ( n * sizeof ( double ) ); x = ( double * ) malloc ( 2 * n * sizeof ( double ) ); y = ( double * ) malloc ( 2 * n * sizeof ( double ) ); z = ( double * ) malloc ( 2 * n * sizeof ( double ) ); first = 1; for ( icase = 0; icase < 2; icase++ ) { if ( first ) { for ( i = 0; i < 2 * n; i = i + 2 ) { z0 = ggl ( &seed ); z1 = ggl ( &seed ); x[i] = z0; z[i] = z0; x[i+1] = z1; z[i+1] = z1; } } else { for ( i = 0; i < 2 * n; i = i + 2 ) { z0 = 0.0; /* real part of array */ z1 = 0.0; /* imaginary part of array */ x[i] = z0; z[i] = z0; /* copy of initial real data */ x[i+1] = z1; z[i+1] = z1; /* copy of initial imag. data */ } } /* Initialize the sine and cosine tables. */ cffti ( n, w ); /* Transform forward, back */ if ( first ) { sgn = + 1.0; cfft2 ( n, x, y, w, sgn ); sgn = - 1.0; cfft2 ( n, y, x, w, sgn ); /* Results should be same as the initial data multiplied by N. */ fnm1 = 1.0 / ( double ) n; error = 0.0; for ( i = 0; i < 2 * n; i = i + 2 ) { error = error + pow ( z[i] - fnm1 * x[i], 2 ) + pow ( z[i+1] - fnm1 * x[i+1], 2 ); } error = sqrt ( fnm1 * error ); printf ( " %12d %8d %12e", n, nits, error ); first = 0; } else { ctime1 = cpu_time ( ); for ( it = 0; it < nits; it++ ) { sgn = + 1.0; cfft2 ( n, x, y, w, sgn ); sgn = - 1.0; cfft2 ( n, y, x, w, sgn ); } ctime2 = cpu_time ( ); ctime = ctime2 - ctime1; flops = 2.0 * ( double ) nits * ( 5.0 * ( double ) n * ( double ) ln2 ); mflops = flops / 1.0E+06 / ctime; printf ( " %12e %12e %12f\n", ctime, ctime / ( double ) ( 2 * nits ), mflops ); } } if ( ( ln2 % 4 ) == 0 ) { nits = nits / 10; } if ( nits < 1 ) { nits = 1; } free ( w ); free ( x ); free ( y ); free ( z ); } printf ( "\n" ); printf ( "FFT_SERIAL:\n" ); printf ( " Normal end of execution.\n" ); printf ( "\n" ); timestamp ( ); return 0; }
int lrexp(sf_complex **img, sf_complex **dat, bool adj, sf_complex **lt, sf_complex **rt, sf_complex *ww, geopar geop, int pad1, bool verb, int snap, sf_complex ***wvfld) /*< zero-offset exploding reflector modeling/migration >*/ { int it, nt, ix, nx, nx2, iz, nz, nz2, nzx2, wfnt, wfit; int im, i, j, m2, ik, nk; float dt, dx, dz, ox; sf_complex *curr, **wave, *cwave, *cwavem, c; sf_complex *currm; nx = geop->nx; nz = geop->nz; dx = geop->dx; dz = geop->dz; ox = geop->ox; nt = geop->nt; dt = geop->dt; snap= geop->snap; nzx2= geop->nzx2; m2 = geop->m2; wfnt= geop->wfnt; nk = cfft2_init(pad1,nz,nx,&nz2,&nx2); if (nk!=geop->nk) sf_error("nk discrepancy!"); curr = sf_complexalloc(nzx2); cwave = sf_complexalloc(nk); wave = sf_complexalloc2(nzx2,m2); if (adj) { currm = sf_complexalloc(nzx2); icfft2_allocate(cwave); } else { cwavem = sf_complexalloc(nk); icfft2_allocate(cwavem); } #ifdef _OPENMP #pragma omp parallel for private(iz) #endif for (iz=0; iz < nzx2; iz++) { curr[iz] = sf_cmplx(0.,0.); } if (adj) { /* migration <- read wavefield */ #ifdef _OPENMP #pragma omp parallel for private(ix,iz) #endif for (ix=0; ix < nx; ix++) { for (iz=0; iz < nz; iz++) { curr[iz+ix*nz2]=dat[ix][iz]; } } wfit = (int)(nt-1)/snap; // wfnt-1 /* time stepping */ for (it=nt-1; it > -1; it--) { if (verb) sf_warning("it=%d;",it); /* matrix multiplication */ for (im = 0; im < m2; im++) { #ifdef _OPENMP #pragma omp parallel for private(ix,iz,i,j) shared(currm,lt,curr) #endif for (ix = 0; ix < nx; ix++) { for (iz=0; iz < nz; iz++) { i = iz+ix*nz; /* original grid */ j = iz+ix*nz2; /* padded grid */ #ifdef SF_HAS_COMPLEX_H currm[j] = conjf(lt[im][i])*curr[j]; #else currm[j] = sf_cmul(conjf(lt[im][i]), curr[j]); #endif } } cfft2(currm,wave[im]); } #ifdef _OPENMP #pragma omp parallel for private(ik,im,c) shared(wave,rt,cwave) #endif for (ik = 0; ik < nk; ik++) { c = sf_cmplx(0.,0.); for (im = 0; im < m2; im++) { #ifdef SF_HAS_COMPLEX_H c += wave[im][ik]*conjf(rt[ik][im]); #else c += sf_cmul(wave[im][ik],conjf(rt[ik][im])); //complex multiplies complex #endif } cwave[ik] = c; } icfft2(curr,cwave); if (snap > 0 && it%snap == 0) { #ifdef _OPENMP #pragma omp parallel for private(ix,iz,j) #endif for ( ix = 0; ix < nx; ix++) { for ( iz = 0; iz<nz; iz++ ) { j = iz+ix*nz2; /* padded grid */ wvfld[wfit][ix][iz] = curr[j]; } } wfit--; } } /*time iteration*/ /*generate image*/ #ifdef _OPENMP #pragma omp parallel for private(ix,iz) #endif for (ix=0; ix < nx; ix++) { for (iz=0; iz < nz; iz++) { img[ix][iz] = curr[iz+ix*nz2]; } } } else { /* modeling -> write data */ /*point source*/ wfit = 0; /* time stepping */ for (it=0; it < nt; it++) { if (verb) sf_warning("it=%d;",it); /* matrix multiplication */ cfft2(curr,cwave); for (im = 0; im < m2; im++) { #ifdef _OPENMP #pragma omp parallel for private(ik) #endif for (ik = 0; ik < nk; ik++) { #ifdef SF_HAS_COMPLEX_H cwavem[ik] = cwave[ik]*rt[ik][im]; #else cwavem[ik] = sf_cmul(cwave[ik],rt[ik][im]); #endif } icfft2(wave[im],cwavem); } #ifdef _OPENMP #pragma omp parallel for private(ix,iz,i,j,im,c) shared(curr,lt,wave) #endif for (ix = 0; ix < nx; ix++) { for (iz=0; iz < nz; iz++) { i = iz+ix*nz; /* original grid */ j = iz+ix*nz2; /* padded grid */ #ifdef SF_HAS_COMPLEX_H c = ww[it] * crealf(img[ix][iz]); // source term #else c = sf_crmul(ww[it], crealf(img[ix][iz])); // source term #endif for (im = 0; im < m2; im++) { #ifdef SF_HAS_COMPLEX_H c += lt[im][i]*wave[im][j]; #else c += sf_cmul(lt[im][i], wave[im][j]); #endif } curr[j] = c; } } /* record wavefield*/ #ifdef _OPENMP #pragma omp parallel for private(ix,iz) #endif for (ix=0; ix < nx; ix++) { for (iz=0; iz < nz; iz++) { dat[ix][iz] = curr[iz+ix*nz2]; } } if (snap > 0 && it%snap == 0) { #ifdef _OPENMP #pragma omp parallel for private(ix,iz,j) #endif for ( ix = 0; ix < nx; ix++) { for ( iz = 0; iz<nz; iz++ ) { j = iz+ix*nz2; /* padded grid */ wvfld[wfit][ix][iz] = curr[j]; } } wfit++; } } } if (verb) sf_warning("."); cfft2_finalize(); return 0; }
/** Setup the detector transfer functions. See maos/setup_powfs.c */ static void setup_powfs_dtf(POWFS_S *powfs, const PARMS_S* parms){ const int npowfs=parms->maos.npowfs; for(int ipowfs=0; ipowfs<npowfs; ipowfs++){ const int ncomp=parms->maos.ncomp[ipowfs]; const int ncomp2=ncomp>>1; const int embfac=parms->maos.embfac[ipowfs]; const int pixpsa=parms->skyc.pixpsa[ipowfs]; const double pixtheta=parms->skyc.pixtheta[ipowfs]; const double blur=parms->skyc.pixblur[ipowfs]*pixtheta; const double e0=exp(-2*M_PI*M_PI*blur*blur); const double dxsa=parms->maos.dxsa[ipowfs]; const double pixoffx=parms->skyc.pixoffx[ipowfs]; const double pixoffy=parms->skyc.pixoffy[ipowfs]; const double pxo=-(pixpsa/2-0.5+pixoffx)*pixtheta; const double pyo=-(pixpsa/2-0.5+pixoffy)*pixtheta; loc_t *loc_ccd=mksqloc(pixpsa, pixpsa, pixtheta, pixtheta, pxo, pyo); powfs[ipowfs].dtf=mycalloc(parms->maos.nwvl,DTF_S); for(int iwvl=0; iwvl<parms->maos.nwvl; iwvl++){ const double wvl=parms->maos.wvl[iwvl]; const double dtheta=wvl/(dxsa*embfac); const double pdtheta=pixtheta*pixtheta/(dtheta*dtheta); const double du=1./(dtheta*ncomp); const double du2=du*du; const double dupth=du*pixtheta; cmat *nominal=cnew(ncomp,ncomp); //cfft2plan(nominal,-1); //cfft2plan(nominal,1); cmat* pn=nominal; const double theta=0; const double ct=cos(theta); const double st=sin(theta); for(int iy=0; iy<ncomp; iy++){ int jy=iy-ncomp2; for(int ix=0; ix<ncomp; ix++){ int jx=ix-ncomp2; double ir=ct*jx+st*jy; double ia=-st*jx+ct*jy; IND(pn,ix,iy)=sinc(ir*dupth)*sinc(ia*dupth) *pow(e0,ir*ir*du2)*pow(e0,ia*ia*du2) *pdtheta; } } if(parms->skyc.fnpsf1[ipowfs]){ warning("powfs %d has additional otf to be multiplied\n", ipowfs); dcell *psf1c=dcellread("%s", parms->skyc.fnpsf1[ipowfs]); dmat *psf1=NULL; if(psf1c->nx == 1){ psf1=dref(psf1c->p[0]); }else if(psf1c->nx==parms->maos.nwvl){ psf1=dref(psf1c->p[iwvl]); }else{ error("skyc.fnpsf1 has wrong dimension\n"); } dcellfree(psf1c); if(psf1->ny!=2){ error("skyc.fnpsf1 has wrong dimension\n"); } dmat *psf1x=dnew_ref(psf1->nx, 1, psf1->p); dmat *psf1y=dnew_ref(psf1->nx, 1, psf1->p+psf1->nx); dmat *psf2x=dnew(ncomp*ncomp, 1); for(int iy=0; iy<ncomp; iy++){ int jy=iy-ncomp2; for(int ix=0; ix<ncomp; ix++){ int jx=ix-ncomp2; psf2x->p[ix+iy*ncomp]=sqrt(jx*jx+jy*jy)*dtheta; } } info("powfs %d, iwvl=%d, dtheta=%g\n", ipowfs, iwvl, dtheta*206265000); writebin(psf2x, "powfs%d_psf2x_%d", ipowfs,iwvl); dmat *psf2=dinterp1(psf1x, psf1y, psf2x, 0); normalize_sum(psf2->p, psf2->nx*psf2->ny, 1); psf2->nx=ncomp; psf2->ny=ncomp; writebin(psf2, "powfs%d_psf2_%d", ipowfs,iwvl); cmat *otf2=cnew(ncomp, ncomp); //cfft2plan(otf2, -1); ccpd(&otf2, psf2);//peak in center cfftshift(otf2);//peak in corner cfft2(otf2, -1); cfftshift(otf2);//peak in center writebin(otf2, "powfs%d_otf2_%d", ipowfs, iwvl); writebin(nominal, "powfs%d_dtf%d_nominal_0",ipowfs,iwvl); for(int i=0; i<ncomp*ncomp; i++){ nominal->p[i]*=otf2->p[i]; } writebin(nominal, "powfs%d_dtf%d_nominal_1",ipowfs,iwvl); dfree(psf1x); dfree(psf1y); dfree(psf2x); dfree(psf1); cfree(otf2); dfree(psf2); } cfftshift(nominal);//peak in corner cfft2(nominal,-1); cfftshift(nominal);//peak in center cfft2i(nominal,1); warning_once("double check nominal for off centered skyc.fnpsf1\n"); /*This nominal will multiply to OTF with peak in corner. But after inverse fft, peak will be in center*/ ccp(&powfs[ipowfs].dtf[iwvl].nominal, nominal); cfree(nominal); loc_t *loc_psf=mksqloc(ncomp, ncomp, dtheta, dtheta, -ncomp2*dtheta, -ncomp2*dtheta); powfs[ipowfs].dtf[iwvl].si=mkh(loc_psf,loc_ccd,0,0,1); locfree(loc_psf); if(parms->skyc.dbg){ writebin(powfs[ipowfs].dtf[iwvl].nominal, "%s/powfs%d_dtf%d_nominal",dirsetup,ipowfs,iwvl); writebin(powfs[ipowfs].dtf[iwvl].si, "%s/powfs%d_dtf%d_si",dirsetup,ipowfs,iwvl); } powfs[ipowfs].dtf[iwvl].U=cnew(ncomp,1); dcomplex *U=powfs[ipowfs].dtf[iwvl].U->p; for(int ix=0; ix<ncomp; ix++){ int jx=ix<ncomp2?ix:(ix-ncomp); U[ix]=COMPLEX(0, -2.*M_PI*jx*du); } }/*iwvl */ locfree(loc_ccd); }/*ipowfs */ }
int lrosback2(sf_complex **img, sf_complex ***wavfld, float **sill, sf_complex **rcd, bool adj, bool verb, bool wantwf, sf_complex **lt, sf_complex **rt, int m2, geopar geop, int pad1, sf_complex ***wavfld2) /*< low-rank one-step backward propagation + imaging >*/ { int it,iz,im,ik,ix,i,j; /* index variables */ int nxb,nzb,dx,dz,gpz,gpx,gpl,snpint,dt,wfit; int nt,nz,nx, nk, nzx, nz2, nx2, nzx2; sf_complex c; sf_complex *cwave, *cwavem, *currm; sf_complex **wave, *curr; sf_complex **ccr; nx = geop->nx; nz = geop->nz; nxb = geop->nxb; nzb = geop->nzb; dx = geop->dx; dz = geop->dz; gpz = geop->gpz; gpx = geop->gpx; gpl = geop->gpl; snpint = geop->snpint; nt = geop->nt; dt = geop->dt; ccr = sf_complexalloc2(nz, nx); nk = cfft2_init(pad1,nzb,nxb,&nz2,&nx2); nzx = nzb*nxb; nzx2 = nz2*nx2; curr = sf_complexalloc(nzx2); cwave = sf_complexalloc(nk); cwavem = sf_complexalloc(nk); wave = sf_complexalloc2(nzx2,m2); if (!adj) { currm = sf_complexalloc(nzx2); icfft2_allocate(cwave); } else { cwavem = sf_complexalloc(nk); icfft2_allocate(cwavem); } #ifdef _OPENMP #pragma omp parallel for private(iz) #endif for (iz=0; iz < nzx2; iz++) { curr[iz] = sf_cmplx(0.,0.); } #ifdef _OPENMP #pragma omp parallel for private(ix, iz) #endif for (ix = 0; ix < nx; ix++) { for (iz = 0; iz < nz; iz++) { ccr[ix][iz] = sf_cmplx(0.,0.); } } if (adj) { /* migration */ /* step backward in time */ /*Main loop*/ wfit = (int)(nt-1)/snpint; for (it = nt-1; it>=0; it--) { if (verb) sf_warning("Backward receiver it=%d/%d;", it, nt-1); #ifdef _OPENMP #pragma omp parallel for private(ix,j) #endif for (ix=0; ix<gpl; ix++) { j = (gpz+geop->top)+(ix+gpx+geop->lft)*nz2; /* padded grid */ curr[j]+=rcd[ix][it]; /* data injection */ } /*matrix multiplication*/ cfft2(curr,cwave); for (im = 0; im < m2; im++) { #ifdef _OPENMP #pragma omp parallel for private(ik) #endif for (ik = 0; ik < nk; ik++) { #ifdef SF_HAS_COMPLEX_H cwavem[ik] = cwave[ik]*rt[ik][im]; #else cwavem[ik] = sf_cmul(cwave[ik],rt[ik][im]); #endif } icfft2(wave[im],cwavem); } #ifdef _OPENMP #pragma omp parallel for private(ix,iz,i,j,im,c) shared(curr,lt,wave) #endif for (ix = 0; ix < nxb; ix++) { for (iz=0; iz < nzb; iz++) { i = iz+ix*nzb; /* original grid */ j = iz+ix*nz2; /* padded grid */ c = sf_cmplx(0.,0.); // initialize for (im = 0; im < m2; im++) { #ifdef SF_HAS_COMPLEX_H c += lt[im][i]*wave[im][j]; #else c += sf_cmul(lt[im][i], wave[im][j]); #endif } curr[j] = c; } } if ( wantwf && it%snpint == 0 ) { #ifdef _OPENMP #pragma omp parallel for private(ix,iz,j) #endif for ( ix = 0; ix < nx; ix++) { for ( iz = 0; iz<nz; iz++ ) { j = (iz+geop->top)+(ix+geop->lft)*nz2; /* padded grid */ wavfld2[wfit][ix][iz] = curr[j]; } } } /*cross-correlation imaging condition*/ if (it%snpint == 0 ) { #ifdef _OPENMP #pragma omp parallel for private(ix,iz,j) #endif for (ix=0; ix<nx; ix++) { for (iz=0; iz<nz; iz++) { j = (iz+geop->top)+(ix+geop->lft)*nz2; /* padded grid */ #ifdef SF_HAS_COMPLEX_H ccr[ix][iz] += conjf(wavfld[wfit][ix][iz])*curr[j]; #else ccr[ix][iz] += sf_cmul(conjf(wavfld[wfit][ix][iz]),curr[j]); #endif } } wfit--; } } /*Main loop*/ if (verb) sf_warning("."); #ifdef _OPENMP #pragma omp parallel for private(ix, iz) #endif for (ix=0; ix<nx; ix++) { for (iz=0; iz<nz; iz++) { #ifdef SF_HAS_COMPLEX_H img[ix][iz] = ccr[ix][iz]/(sill[ix][iz]+SF_EPS); #else img[ix][iz] = sf_crmul(ccr[ix][iz],1./(sill[ix][iz]+SF_EPS)); #endif } } } else { /* modeling */ /* adjoint of source illumination */ #ifdef _OPENMP #pragma omp parallel for private(ix, iz) #endif for (ix=0; ix<nx; ix++) { for (iz=0; iz<nz; iz++) { #ifdef SF_HAS_COMPLEX_H ccr[ix][iz] = img[ix][iz]/(sill[ix][iz]+SF_EPS); #else ccr[ix][iz] = sf_crmul(img[ix][iz],1./(sill[ix][iz]+SF_EPS)); #endif } } /* step forward in time */ /*Main loop*/ wfit=0; for (it=0; it<nt; it++) { if (verb) sf_warning("Forward receiver it=%d/%d;", it, nt-1); if ( wantwf && it%snpint == 0 ) { #ifdef _OPENMP #pragma omp parallel for private(ix,iz,j) #endif for ( ix = 0; ix < nx; ix++) { for ( iz = 0; iz<nz; iz++ ) { j = (iz+geop->top)+(ix+geop->lft)*nz2; /* padded grid */ wavfld2[wfit][ix][iz] = curr[j]; } } } /*adjoint of cross-correlation imaging condition*/ if (it%snpint == 0 ) { #ifdef _OPENMP #pragma omp parallel for private(ix,iz,j) #endif for (ix=0; ix<nx; ix++) { for (iz=0; iz<nz; iz++) { j = (iz+geop->top)+(ix+geop->lft)*nz2; /* padded grid */ #ifdef SF_HAS_COMPLEX_H curr[j] += (wavfld[wfit][ix][iz])*ccr[ix][iz];//adjoint of ccr[ix][iz] += conjf(wavfld[wfit][ix][iz])*curr[j]; ??? #else curr[j] += sf_cmul((wavfld[wfit][ix][iz]),ccr[ix][iz]); #endif } } wfit++; } /*matrix multiplication*/ for (im = 0; im < m2; im++) { #ifdef _OPENMP #pragma omp parallel for private(ix,iz,i,j) shared(currm,lt,curr) #endif for (ix = 0; ix < nxb; ix++) { for (iz=0; iz < nzb; iz++) { i = iz+ix*nzb; /* original grid */ j = iz+ix*nz2; /* padded grid */ #ifdef SF_HAS_COMPLEX_H currm[j] = conjf(lt[im][i])*curr[j]; #else currm[j] = sf_cmul(conjf(lt[im][i]), curr[j]); #endif } } cfft2(currm,wave[im]); } #ifdef _OPENMP #pragma omp parallel for private(ik,im,c) shared(wave,rt,cwave) #endif for (ik = 0; ik < nk; ik++) { c = sf_cmplx(0.,0.); for (im = 0; im < m2; im++) { #ifdef SF_HAS_COMPLEX_H c += wave[im][ik]*conjf(rt[ik][im]); #else c += sf_cmul(wave[im][ik],conjf(rt[ik][im])); //complex multiplies complex #endif } cwave[ik] = c; } icfft2(curr,cwave); #ifdef _OPENMP #pragma omp parallel for private(ix,j) #endif for (ix=0; ix<gpl; ix++) { j = (gpz+geop->top)+(ix+gpx+geop->lft)*nz2; /* padded grid */ rcd[ix][it]=curr[j]; } } /*Main loop*/ } cfft2_finalize(); return 0; }
int main(int argc, char* argv[]) { bool verb; int it,iz,im,ik,ix,i,j; /* index variables */ int nt,nz,nx, m2, nk, nzx, nz2, nx2, nzx2, n2, pad1; sf_complex c; float *rr; /* I/O arrays*/ sf_complex *ww, *cwave, *cwavem; sf_complex **wave, *curr; float *rcurr; sf_file Fw,Fr,Fo; /* I/O files */ sf_axis at,az,ax; /* cube axes */ sf_complex **lt, **rt; sf_file left, right; sf_init(argc,argv); if(!sf_getbool("verb",&verb)) verb=false; /* verbosity */ /* setup I/O files */ Fw = sf_input ("in" ); Fo = sf_output("out"); Fr = sf_input ("ref"); if (SF_COMPLEX != sf_gettype(Fw)) sf_error("Need complex input"); if (SF_FLOAT != sf_gettype(Fr)) sf_error("Need float ref"); sf_settype(Fo,SF_FLOAT); /* Read/Write axes */ at = sf_iaxa(Fw,1); nt = sf_n(at); az = sf_iaxa(Fr,1); nz = sf_n(az); ax = sf_iaxa(Fr,2); nx = sf_n(ax); sf_oaxa(Fo,az,1); sf_oaxa(Fo,ax,2); sf_oaxa(Fo,at,3); if (!sf_getint("pad1",&pad1)) pad1=1; /* padding factor on the first axis */ nk = cfft2_init(pad1,nz,nx,&nz2,&nx2); nzx = nz*nx; nzx2 = nz2*nx2; /* propagator matrices */ left = sf_input("left"); right = sf_input("right"); if (!sf_histint(left,"n1",&n2) || n2 != nzx) sf_error("Need n1=%d in left",nzx); if (!sf_histint(left,"n2",&m2)) sf_error("Need n2= in left"); if (!sf_histint(right,"n1",&n2) || n2 != m2) sf_error("Need n1=%d in right",m2); if (!sf_histint(right,"n2",&n2) || n2 != nk) sf_error("Need n2=%d in right",nk); // if (!sf_histint(Fw,"n1",&nxx)) sf_error("No n1= in input"); lt = sf_complexalloc2(nzx,m2); rt = sf_complexalloc2(m2,nk); sf_complexread(lt[0],nzx*m2,left); sf_complexread(rt[0],m2*nk,right); // sf_fileclose(left); // sf_fileclose(right); /* read wavelet & reflectivity */ ww=sf_complexalloc(nt); sf_complexread(ww,nt ,Fw); rr=sf_floatalloc(nzx); sf_floatread(rr,nzx,Fr); curr = sf_complexalloc(nzx2); rcurr = sf_floatalloc(nzx2); cwave = sf_complexalloc(nk); cwavem = sf_complexalloc(nk); wave = sf_complexalloc2(nzx2,m2); for (iz=0; iz < nzx2; iz++) { curr[iz] = sf_cmplx(0.,0.); rcurr[iz]= 0.; } /* MAIN LOOP */ for (it=0; it<nt; it++) { if(verb) sf_warning("it=%d;",it); /* matrix multiplication */ cfft2(curr,cwave); for (im = 0; im < m2; im++) { for (ik = 0; ik < nk; ik++) { #ifdef SF_HAS_COMPLEX_H cwavem[ik] = cwave[ik]*rt[ik][im]; #else cwavem[ik] = sf_cmul(cwave[ik],rt[ik][im]); //complex multiplies complex #endif // sf_warning("realcwave=%g, imagcwave=%g", crealf(cwavem[ik]),cimagf(cwavem[ik])); } icfft2(wave[im],cwavem); } for (ix = 0; ix < nx; ix++) { for (iz=0; iz < nz; iz++) { i = iz+ix*nz; /* original grid */ j = iz+ix*nz2; /* padded grid */ #ifdef SF_HAS_COMPLEX_H c = ww[it] * rr[i]; // source term #else c = sf_crmul(ww[it], rr[i]); // source term #endif for (im = 0; im < m2; im++) { #ifdef SF_HAS_COMPLEX_H c += lt[im][i]*wave[im][j]; #else c = sf_cadd(c,sf_cmul(lt[im][i], wave[im][j])); #endif } curr[j] = c; rcurr[j] = crealf(c); // rcurr[j] = cimagf(c); } /* write wavefield to output */ sf_floatwrite(rcurr+ix*nz2,nz,Fo); } } if(verb) sf_warning("."); exit (0); }
main() { /* Example of Apple Altivec coded binary radix FFT using intrinsics from Petersen and Arbenz "Intro. to Parallel Computing," Section 3.6 This is an expanded version of a generic work-space FFT: steps are in-line. cfft2(n,x,y,w,sign) takes complex n-array "x" (Fortran real,aimag,real,aimag,... order) and writes its DFT in "y". Both input "x" and the original contents of "y" are destroyed. Initialization for array "w" (size n/2 complex of twiddle factors (exp(twopi*i*k/n), for k=0..n/2-1)) is computed once by cffti(n,w). WPP, SAM. Math. ETHZ, 1 June, 2002 */ int first,i,icase,it,ln2,n; int nits=1000000; static float seed = 331.0; float error,fnm1,sign,z0,z1,ggl(); float *x,*y,*z,*w; double t1,mflops; /* allocate storage for x,y,z,w on 4-word bndr. */ x = (float *) malloc(8*N); y = (float *) malloc(8*N); z = (float *) malloc(8*N); w = (float *) malloc(4*N); n = 2; for(ln2=1; ln2<21; ln2++) { first = 1; for(icase=0; icase<2; icase++) { if(first) { for(i=0; i<2*n; i+=2) { z0 = ggl(&seed); /* real part of array */ z1 = ggl(&seed); /* imaginary part of array */ x[i] = z0; z[i] = z0; /* copy of initial real data */ x[i+1] = z1; z[i+1] = z1; /* copy of initial imag. data */ } } else { for(i=0; i<2*n; i+=2) { z0 = 0; /* real part of array */ z1 = 0; /* imaginary part of array */ x[i] = z0; z[i] = z0; /* copy of initial real data */ x[i+1] = z1; z[i+1] = z1; /* copy of initial imag. data */ } } /* initialize sine/cosine tables */ cffti(n,w); /* transform forward, back */ if(first) { sign = 1.0; cfft2(n,x,y,w,sign); sign = -1.0; cfft2(n,y,x,w,sign); /* results should be same as initial multiplied by n */ fnm1 = 1.0/((float) n); error = 0.0; for(i=0; i<2*n; i+=2) { error += (z[i] - fnm1*x[i])*(z[i] - fnm1*x[i]) + (z[i+1] - fnm1*x[i+1])*(z[i+1] - fnm1*x[i+1]); } error = sqrt(fnm1*error); printf(" for n=%d, fwd/bck error=%e\n",n,error); first = 0; } else { for(it=0; it<nits; it++) { sign = +1.0; cfft2(n,x,y,w,sign); sign = -1.0; cfft2(n,y,x,w,sign); } } } if((ln2%4)==0) nits /= 10; n *= 2; } return 0; }
static void test_stfun(){ rand_t rstat; int seed=4; double r0=0.2; double dx=1./16; long N=32; long nx=N; long ny=N; long nframe=500; seed_rand(&rstat, seed); if(L0<9000){ dmat *rr=dlinspace(0, N*dx, N); dmat *covvk=turbcov(rr, sqrt(2)*N*dx, r0, L0); writebin(covvk, "cov_vk"); dfree(rr); dfree(covvk); } /* return; */ { map_t *atm=mapnew(nx+1, ny+1, dx, dx,NULL); stfun_t *data=stfun_init(nx, ny, NULL); zfarr *save=zfarr_init(nframe, 1, "fractal_atm.bin"); for(long i=0; i<nframe; i++){ for(long j=0; j<(nx+1)*(ny+1); j++){ atm->p[j]=randn(&rstat); } fractal_do((dmat*)atm, dx, r0,L0,ninit); stfun_push(data, (dmat*)atm); zfarr_dmat(save, i, (dmat*)atm); if(i%100==0) info("%ld of %ld\n", i, nframe); } zfarr_close(save); dmat *st=stfun_finalize(data); writebin(st, "stfun_fractal.bin"); ddraw("fractal", st, NULL,NULL, "Atmosphere","x","y","stfun"); } /*exit(0); */ { stfun_t *data=stfun_init(nx, ny, NULL); dmat *spect=turbpsd(nx, ny, dx, r0, 100, 0, 0.5); cmat *atm=cnew(nx, ny); //cfft2plan(atm, -1); dmat *atmr=dnew(atm->nx, atm->ny); dmat *atmi=dnew(atm->nx, atm->ny); spect->p[0]=0; for(long ii=0; ii<nframe; ii+=2){ for(long i=0; i<atm->nx*atm->ny; i++){ atm->p[i]=COMPLEX(randn(&rstat), randn(&rstat))*spect->p[i]; } cfft2(atm, -1); for(long i=0; i<atm->nx*atm->ny; i++){ atmr->p[i]=creal(atm->p[i]); atmi->p[i]=cimag(atm->p[i]); } stfun_push(data, atmr); stfun_push(data, atmi); if(ii%100==0) info("%ld of %ld\n", ii, nframe); } dmat *st=stfun_finalize(data); writebin(st, "stfun_fft.bin"); ddraw("fft", st, NULL,NULL, "Atmosphere","x","y","stfun"); } }
Datum fft_main(PG_FUNCTION_ARGS) { int i,n; double sgn; double *w; double wtime; double *x,*y,*z; int32 arg = PG_GETARG_INT32(0); timestamp(); ereport(INFO,(errmsg(" Number of processors available = %d\n", omp_get_num_procs()))); ereport(INFO,(errmsg(" Number of threads = %d\n", omp_get_max_threads()))); //Prepare for tests. ereport(INFO,(errmsg(" N Time\n"))); n = 4; w = (double *) malloc( n * sizeof(double)); x = (double *) malloc(2 * n * sizeof(double)); y = (double *) malloc(2 * n * sizeof(double)); z = (double *) malloc(2 * n * sizeof(double)); //初始化数据 x[0]=1.0; x[1]=0.0; x[2]=2.0; x[3]=0.0; x[4]=4.0; x[5]=0.0; x[6]=3.0; x[7]=0.0; ereport(INFO,(errmsg("x="))); for(i=0; i<2*n; i++){ ereport(INFO,(errmsg("%f,",x[i]))); } //Initialize the sine and cosine tables. cffti(n, w); wtime = omp_get_wtime(); //Transform forward sgn = + 1.0; //fft计算 cfft2( n, x, y, w, sgn ); //输出结果 ereport(INFO,(errmsg("y="))); for(i=0; i<2*n; i++){ ereport(INFO,(errmsg("%f,",y[i]))); } //元素个数 ereport(INFO,(errmsg(" %12d", n))); //运行时间 wtime = omp_get_wtime() - wtime; ereport(INFO,(errmsg(" %12e\n", wtime))); free(w); free(x); free(y); //Terminate. ereport(INFO,(errmsg(" Normal end of execution.\n"))); timestamp(); PG_RETURN_INT32(arg); }
int main(int argc, char* argv[]) { bool mig; int it, nt, ix, nx, iz, nz, nx2, nz2, nzx, nzx2, pad1; int im, i, j, m2, it1, it2, its, ik, n2, nk; float dt, dx, dz,x0; sf_complex *curr, **img, *dat, **lft, **rht, **wave, *cwave, *cwavem, c; sf_file data, image, left, right; sf_init(argc,argv); if (!sf_getbool("mig",&mig)) mig=false; /* if n, modeling; if y, migration */ if (!sf_getint("pad1",&pad1)) pad1=1; /* padding factor on the first axis */ if (mig) { /* migration */ data = sf_input("in"); image = sf_output("out"); sf_settype(image,SF_COMPLEX); if (!sf_histint(data,"n1",&nx)) sf_error("No n1= in input"); if (!sf_histfloat(data,"d1",&dx)) sf_error("No d1= in input"); if (!sf_histfloat(data,"o1",&x0)) x0=0.; if (!sf_histint(data,"n2",&nt)) sf_error("No n2= in input"); if (!sf_histfloat(data,"d2",&dt)) sf_error("No d2= in input"); if (!sf_getint("nz",&nz)) sf_error("Need nz="); /* depth samples (if migration) */ if (!sf_getfloat("dz",&dz)) sf_error("Need dz="); /* depth sampling (if migration) */ sf_putint(image,"n1",nz); sf_putfloat(image,"d1",dz); sf_putfloat(image,"o1",0.); sf_putstring(image,"label1","Depth"); sf_putint(image,"n2",nx); sf_putfloat(image,"d2",dx); sf_putfloat(image,"o2",x0); sf_putstring(image,"label2","Distance"); } else { /* modeling */ image = sf_input("in"); data = sf_output("out"); sf_settype(data,SF_COMPLEX); if (!sf_histint(image,"n1",&nz)) sf_error("No n1= in input"); if (!sf_histfloat(image,"d1",&dz)) sf_error("No d1= in input"); if (!sf_histint(image,"n2",&nx)) sf_error("No n2= in input"); if (!sf_histfloat(image,"d2",&dx)) sf_error("No d2= in input"); if (!sf_histfloat(image,"o2",&x0)) x0=0.; if (!sf_getint("nt",&nt)) sf_error("Need nt="); /* time samples (if modeling) */ if (!sf_getfloat("dt",&dt)) sf_error("Need dt="); /* time sampling (if modeling) */ sf_putint(data,"n1",nx); sf_putfloat(data,"d1",dx); sf_putfloat(data,"o1",x0); sf_putstring(data,"label1","Distance"); sf_putint(data,"n2",nt); sf_putfloat(data,"d2",dt); sf_putfloat(data,"o2",0.); sf_putstring(data,"label2","Time"); sf_putstring(data,"unit2","s"); } nk = cfft2_init(pad1,nx,nz,&nx2,&nz2); nzx = nz*nx; nzx2 = nz2*nx2; img = sf_complexalloc2(nz,nx); dat = sf_complexalloc(nx); /* propagator matrices */ left = sf_input("left"); right = sf_input("right"); if (!sf_histint(left,"n1",&n2) || n2 != nzx) sf_error("Need n1=%d in left",nzx); if (!sf_histint(left,"n2",&m2)) sf_error("No n2= in left"); if (!sf_histint(right,"n1",&n2) || n2 != m2) sf_error("Need n1=%d in right",m2); if (!sf_histint(right,"n2",&n2) || n2 != nk) sf_error("Need n2=%d in right",nk); lft = sf_complexalloc2(nzx,m2); rht = sf_complexalloc2(m2,nk); sf_complexread(lft[0],nzx*m2,left); sf_complexread(rht[0],m2*nk,right); curr = sf_complexalloc(nzx2); cwave = sf_complexalloc(nk); cwavem = sf_complexalloc(nk); wave = sf_complexalloc2(nzx2,m2); for (iz=0; iz < nzx2; iz++) { curr[iz] = sf_cmplx(0.,0.); } if (mig) { /* migration */ /* step backward in time */ it1 = nt-1; it2 = -1; its = -1; } else { /* modeling */ sf_complexread(img[0],nzx,image); /* transpose */ for (ix=0; ix < nx; ix++) { for (iz=0; iz < nz; iz++) { curr[ix+iz*nx2]=img[ix][iz]; } } /* step forward in time */ it1 = 0; it2 = nt; its = +1; } /* time stepping */ for (it=it1; it != it2; it += its) { sf_warning("it=%d;",it); if (mig) { /* migration <- read data */ sf_complexread(dat,nx,data); } else { for (ix=0; ix < nx; ix++) { dat[ix] = sf_cmplx(0.,0.); } } for (ix=0; ix < nx; ix++) { if (mig) { curr[ix] += dat[ix]; } else { dat[ix] = curr[ix]; } } /* matrix multiplication */ cfft2(curr,cwave); for (im = 0; im < m2; im++) { for (ik = 0; ik < nk; ik++) { #ifdef SF_HAS_COMPLEX_H cwavem[ik] = cwave[ik]*rht[ik][im]; #else cwavem[ik] = sf_cmul(cwave[ik],rht[ik][im]); #endif } icfft2(wave[im],cwavem); } #ifdef _OPENMP #pragma omp parallel for private(ix,iz,i,j,im,c) shared(curr,lft,wave) #endif for (ix = 0; ix < nx; ix++) { for (iz=0; iz < nz; iz++) { i = ix+iz*nx; /* original grid */ j = ix+iz*nx2; /* padded grid */ c = sf_cmplx(0.,0.); /* initialize */ for (im = 0; im < m2; im++) { #ifdef SF_HAS_COMPLEX_H c += lft[im][i]*wave[im][j]; #else c += sf_cmul(lft[im][i], wave[im][j]); #endif } curr[j] = c; } } if (!mig) { /* modeling -> write out data */ sf_complexwrite(dat,nx,data); } } sf_warning("."); if (mig) { /* transpose */ for (ix=0; ix < nx; ix++) { for (iz=0; iz < nz; iz++) { img[ix][iz] = curr[ix+iz*nx2]; } } sf_complexwrite(img[0],nzx,image); } exit(0); }