int main(int argc, char* argv[]) { bool verb; int it,iz,im,ikz,ikx,iky,ix,iy,i,j,snap; /* index variables */ int nt,nz,nx,ny, m2, nk, nzx, nz2, nx2, ny2, nzx2, n2, pad1; float dt; sf_complex c; float *rr; /* I/O arrays*/ sf_complex *cwave, *cwavem, *ww; sf_complex **wave, *curr; float *rcurr, *rcurr_all; sf_file Fw,Fr,Fo; /* I/O files */ sf_axis at,az,ax,ay; /* cube axes */ sf_complex **lt, **rt; sf_file left, right, snaps; /*MPI related*/ int cpuid,numprocs; int provided; int n_local, o_local; int ozx2; float *sendbuf, *recvbuf; int *rcounts, *displs; MPI_Init_thread(&argc,&argv,MPI_THREAD_FUNNELED,&provided); threads_ok = provided >= MPI_THREAD_FUNNELED; sf_init(argc,argv); MPI_Comm_rank(MPI_COMM_WORLD, &cpuid); MPI_Comm_size(MPI_COMM_WORLD, &numprocs); if(!sf_getbool("verb",&verb)) verb=true; /* verbosity */ /* setup I/O files */ Fw = sf_input ("--input" ); Fo = sf_output("--output"); Fr = sf_input ("ref"); /* Read/Write axes */ at = sf_iaxa(Fw,1); nt = sf_n(at); dt = sf_d(at); az = sf_iaxa(Fr,1); nz = sf_n(az); ax = sf_iaxa(Fr,2); nx = sf_n(ax); ay = sf_iaxa(Fr,3); ny = sf_n(ay); if (!sf_getint("pad1",&pad1)) pad1=1; /* padding factor on the first axis */ if (!sf_getint("snap",&snap)) snap=0; /* interval for snapshots */ if (cpuid==0) { sf_oaxa(Fo,az,1); sf_oaxa(Fo,ax,2); sf_oaxa(Fo,ay,3); sf_settype(Fo,SF_FLOAT); if (snap > 0) { snaps = sf_output("snaps"); /* (optional) snapshot file */ sf_oaxa(snaps,az,1); sf_oaxa(snaps,ax,2); sf_oaxa(snaps,ay,3); sf_oaxa(snaps,at,4); sf_settype(snaps,SF_FLOAT); sf_putint(snaps,"n4",nt/snap); sf_putfloat(snaps,"d4",dt*snap); sf_putfloat(snaps,"o4",0.); } else { snaps = NULL; } } //nk = cfft3_init(pad1,nz,nx,ny,&nz2,&nx2,&ny2); //n_local = ny2; //o_local = 0; nk = mcfft3_init(pad1,nz,nx,ny,&nz2,&nx2,&ny2,&n_local,&o_local); sf_warning("Cpuid=%d,n2=%d,n1=%d,n0=%d,local_n0=%d,local_0_start=%d",cpuid,nz2,nx2,ny2,n_local,o_local); nzx = nz*nx*ny; //nzx2 = nz2*nx2*ny2; nzx2 = n_local*nz2*nx2; ozx2 = o_local*nz2*nx2; /* propagator matrices */ left = sf_input("left"); right = sf_input("right"); if (!sf_histint(left,"n1",&n2) || n2 != nzx) sf_error("Need n1=%d in left",nzx); if (!sf_histint(left,"n2",&m2)) sf_error("Need n2=%d in left",m2); if (!sf_histint(right,"n1",&n2) || n2 != m2) sf_error("Need n1=%d in right",m2); if (!sf_histint(right,"n2",&n2) || n2 != nk) sf_error("Need n2=%d in right",nk); lt = sf_complexalloc2(nzx,m2); rt = sf_complexalloc2(m2,nk); sf_complexread(lt[0],nzx*m2,left); sf_complexread(rt[0],m2*nk,right); /* read wavelet & reflectivity */ ww=sf_complexalloc(nt); sf_complexread(ww,nt ,Fw); rr=sf_floatalloc(nzx); sf_floatread(rr,nzx,Fr); curr = sf_complexalloc(nzx2); rcurr= sf_floatalloc(nzx2); cwave = sf_complexalloc(nzx2); cwavem = sf_complexalloc(nzx2); wave = sf_complexalloc2(nzx2,m2); //icfft3_allocate(cwavem); for (iz=0; iz < nzx2; iz++) { curr[iz]=sf_cmplx(0.,0.); rcurr[iz]=0.; } sendbuf = rcurr; if (cpuid==0) { rcurr_all = sf_floatalloc(nz2*nx2*ny2); recvbuf = rcurr_all; rcounts = sf_intalloc(numprocs); displs = sf_intalloc(numprocs); } else { rcurr_all = NULL; recvbuf = NULL; rcounts = NULL; displs = NULL; } MPI_Gather(&nzx2, 1, MPI_INT, rcounts, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Gather(&ozx2, 1, MPI_INT, displs, 1, MPI_INT, 0, MPI_COMM_WORLD); /* MAIN LOOP */ for (it=0; it<nt; it++) { if(verb) sf_warning("it=%d;",it); /* matrix multiplication */ mcfft3(curr,cwave); for (im = 0; im < m2; im++) { for (iky = 0; iky < n_local; iky++) { for (ikx = 0; ikx < nx2; ikx++) { for (ikz = 0; ikz < nz2; ikz++) { i = ikz + ikx*nz2 + (o_local+iky)*nx2*nz2; j = ikz + ikx*nz2 + iky*nx2*nz2; #ifdef SF_HAS_COMPLEX_H cwavem[j] = cwave[j]*rt[i][im]; #else cwavem[j] = sf_cmul(cwave[j],rt[i][im]); #endif } } } imcfft3(wave[im],cwavem); } for (iy = 0; iy < n_local && (iy+o_local)<ny; iy++) { for (ix = 0; ix < nx; ix++) { for (iz=0; iz < nz; iz++) { i = iz + ix*nz + (o_local+iy)*nx*nz; /* original grid */ j = iz + ix*nz2+ iy*nx2*nz2; /* padded grid */ #ifdef SF_HAS_COMPLEX_H c = ww[it] * rr[i]; #else c = sf_crmul(ww[it],rr[i]); #endif for (im = 0; im < m2; im++) { #ifdef SF_HAS_COMPLEX_H c += lt[im][i]*wave[im][j]; #else c += sf_cmul(lt[im][i],wave[im][j]); #endif } curr[j] = c; rcurr[j]= crealf(c); } } } /* output movie */ if (NULL != snaps && 0 == it%snap) { MPI_Gatherv(sendbuf, nzx2, MPI_FLOAT, recvbuf, rcounts, displs, MPI_FLOAT, 0, MPI_COMM_WORLD); if (cpuid==0) { for (iy = 0; iy < ny; iy++) for (ix = 0; ix < nx; ix++) sf_floatwrite(rcurr_all+nz2*(ix+nx2*iy),nz,snaps); } } } if(verb) sf_warning("."); /* write wavefield to output */ MPI_Gatherv(sendbuf, nzx2, MPI_FLOAT, recvbuf, rcounts, displs, MPI_FLOAT, 0, MPI_COMM_WORLD); if (cpuid==0) { for (iy = 0; iy < ny; iy++) for (ix = 0; ix < nx; ix++) sf_floatwrite(rcurr_all+nz2*(ix+nx2*iy),nz,Fo); } mcfft3_finalize(); MPI_Finalize(); exit (0); }
int main(int argc, char* argv[]) { bool mig, sub; int it, nt, ix, nx, iz, nz, nx2, nz2, nzx, nzx2, ih, nh, nh2; int im, i, j, m2, it1, it2, its, ikz, ikx, ikh, n2, nk, snap; float dt, dx, dz, c, old, dh; float *curr, *prev, **img, **dat, **lft, **rht, **wave; sf_complex *cwave, *cwavem; sf_file data, image, left, right, snaps; /*MPI related*/ int cpuid,numprocs; int provided; int n_local, o_local, nz_local; int ozx2; float *sendbuf, *recvbuf, *wave_all; int *rcounts, *displs; /*wall time*/ double startTime, elapsedTime; double clockZero = 0.0; MPI_Init_thread(&argc,&argv,MPI_THREAD_FUNNELED,&provided); threads_ok = provided >= MPI_THREAD_FUNNELED; sf_init(argc,argv); MPI_Comm_rank(MPI_COMM_WORLD, &cpuid); MPI_Comm_size(MPI_COMM_WORLD, &numprocs); if (!sf_getbool("mig",&mig)) mig=false; /* if n, modeling; if y, migration */ if (!sf_getint("snap",&snap)) snap=0; /* interval for snapshots */ snaps = (snap > 0)? sf_output("snaps"): NULL; /* (optional) snapshot file */ if (mig) { /* migration */ data = sf_input("input"); image = sf_output("output"); if (!sf_histint(data,"n1",&nh)) sf_error("No n1="); if (!sf_histfloat(data,"d1",&dh)) sf_error("No d1="); if (!sf_histint(data,"n2",&nx)) sf_error("No n2="); if (!sf_histfloat(data,"d2",&dx)) sf_error("No d2="); if (!sf_histint(data,"n3",&nt)) sf_error("No n3="); if (!sf_histfloat(data,"d3",&dt)) sf_error("No d3="); if (!sf_getint("nz",&nz)) sf_error("Need nz="); /* time samples (if migration) */ if (!sf_getfloat("dz",&dz)) sf_error("Need dz="); /* time sampling (if migration) */ if (cpuid==0) { sf_putint(image,"o1",0.); sf_putint(image,"n1",nz); sf_putfloat(image,"d1",dz); sf_putstring(image,"label1","Depth"); sf_putint(image,"o2",0.); sf_putint(image,"n2",nx); sf_putfloat(image,"d2",dx); sf_putstring(image,"label2","Midpoint"); sf_putint(image,"n3",1); /* stack for now */ } } else { /* modeling */ image = sf_input("input"); data = sf_output("output"); if (!sf_histint(image,"n1",&nz)) sf_error("No n1="); if (!sf_histfloat(image,"d1",&dz)) sf_error("No d1="); if (!sf_histint(image,"n2",&nx)) sf_error("No n2="); if (!sf_histfloat(image,"d2",&dx)) sf_error("No d2="); if (!sf_getint("nt",&nt)) sf_error("Need nt="); /* time samples (if modeling) */ if (!sf_getfloat("dt",&dt)) sf_error("Need dt="); /* time sampling (if modeling) */ if (!sf_getint("nh",&nh)) sf_error("Need nh="); /* offset samples (if modeling) */ if (!sf_getfloat("dh",&dh)) sf_error("Need dh="); /* offset sampling (if modeling) */ if (cpuid==0) { sf_putint(data,"n1",nh); sf_putfloat(data,"d1",dh); sf_putstring(data,"label1","Half-Offset"); sf_putint(data,"o2",0.); sf_putint(data,"n2",nx); sf_putfloat(data,"d2",dx); sf_putstring(data,"label2","Midpoint"); sf_putint(data,"n3",nt); sf_putfloat(data,"d3",dt); sf_putstring(data,"label3","Time"); sf_putstring(data,"unit3","s"); } } if (cpuid==0) { if (NULL != snaps) { sf_putint(snaps,"n1",nh); sf_putfloat(snaps,"d1",dh); sf_putstring(snaps,"label1","Half-Offset"); sf_putint(snaps,"n2",nx); sf_putfloat(snaps,"d2",dx); sf_putstring(snaps,"label2","Midpoint"); sf_putint(snaps,"n3",nz); sf_putfloat(snaps,"d3",dz); sf_putstring(snaps,"label3","Depth"); sf_putint(snaps,"n4",nt/snap); sf_putfloat(snaps,"d4",dt*snap); if (mig) { sf_putfloat(snaps,"o4",(nt-1)*dt); } else { sf_putfloat(snaps,"o4",0.); } sf_putstring(snaps,"label4","Time"); } } /* Mark the starting time. */ startTime = walltime( &clockZero ); nk = mcfft3_init(1,nh,nx,nz,&nh2,&nx2,&nz2,&n_local,&o_local); nz_local = (n_local < nz-o_local)? n_local:nz-o_local; sf_warning("Cpuid=%d,n2=%d,n1=%d,n0=%d,local_n0=%d,local_0_start=%d,nz_local=%d",cpuid,nh2,nx2,nz2,n_local,o_local,nz_local); if (cpuid==0) if (o_local!=0) sf_error("Cpuid and o_local inconsistant!"); nzx = nz*nx*nh; //nzx2 = nz2*nx2*nh2; nzx2 = n_local*nx2*nh2; ozx2 = o_local*nx2*nh2; img = sf_floatalloc2(nz,nx); dat = sf_floatalloc2(nh,nx); /* propagator matrices */ left = sf_input("left"); right = sf_input("right"); if (!sf_histbool(left,"sub",&sub) && !sf_getbool("sub",&sub)) sub=true; /* if -1 is included in the matrix */ if (!sf_histint(left,"n1",&n2) || n2 != nzx) sf_error("Need n1=%d in left",nzx); if (!sf_histint(left,"n2",&m2)) sf_error("No n2= in left"); if (!sf_histint(right,"n1",&n2) || n2 != m2) sf_error("Need n1=%d in right",m2); if (!sf_histint(right,"n2",&n2) || n2 != nk) sf_error("Need n2=%d in right",nk); lft = sf_floatalloc2(nzx,m2); rht = sf_floatalloc2(m2,nk); sf_floatread(lft[0],nzx*m2,left); sf_floatread(rht[0],m2*nk,right); curr = sf_floatalloc(nzx2); prev = sf_floatalloc(nzx2); cwave = sf_complexalloc(nk); cwavem = sf_complexalloc(nk); wave = sf_floatalloc2(nzx2,m2); #ifdef _OPENMP #pragma omp parallel for default(shared) private(iz) #endif for (iz=0; iz < nzx2; iz++) { curr[iz]=0.; prev[iz]=0.; } sendbuf = prev; if (cpuid==0) { wave_all = sf_floatalloc(nh2*nx2*nz2); recvbuf = wave_all; rcounts = sf_intalloc(numprocs); displs = sf_intalloc(numprocs); } else { wave_all = NULL; recvbuf = NULL; rcounts = NULL; displs = NULL; } MPI_Gather(&nzx2, 1, MPI_INT, rcounts, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Gather(&ozx2, 1, MPI_INT, displs, 1, MPI_INT, 0, MPI_COMM_WORLD); if (mig) { /* migration */ /* step backward in time */ it1 = nt-1; it2 = -1; its = -1; } else { /* modeling */ sf_floatread(img[0],nz*nx,image); /* transpose and initialize at zero offset */ #ifdef _OPENMP #pragma omp parallel for default(shared) private(iz,ix) #endif for (iz=0; iz < nz_local; iz++) { for (ix=0; ix < nx; ix++) { curr[nh2*(ix+iz*nx2)]=img[ix][iz+o_local]; } } /* step forward in time */ it1 = 0; it2 = nt; its = +1; } /* time stepping */ for (it=it1; it != it2; it += its) { sf_warning("it=%d;",it); if (mig) { /* migration <- read data */ sf_floatread(dat[0],nx*nh,data); } else { #ifdef _OPENMP #pragma omp parallel for default(shared) private(ix,ih) #endif for (ix=0; ix < nx; ix++) { for (ih=0; ih < nh; ih++) { dat[ix][ih] = 0.; } } } if (NULL != snaps && 0 == it%snap) { MPI_Gatherv(sendbuf, nzx2, MPI_FLOAT, recvbuf, rcounts, displs, MPI_FLOAT, 0, MPI_COMM_WORLD); if (cpuid==0) { for (iz = 0; iz < nz; iz++) for (ix = 0; ix < nx; ix++) sf_floatwrite(wave_all+nh2*(ix+nx2*iz),nh,snaps); } } /* at z=0 */ if (cpuid==0) { #ifdef _OPENMP #pragma omp parallel for default(shared) private(ix,ih) #endif for (ix=0; ix < nx; ix++) { for (ih=0; ih < nh; ih++) { if (mig) { curr[ix*nh2+ih] += dat[ix][ih]; } else { dat[ix][ih] = curr[ix*nh2+ih]; } } } } /* matrix multiplication */ mcfft3(curr,cwave); for (im = 0; im < m2; im++) { //for (ik = 0; ik < nk; ik++) { #ifdef _OPENMP #pragma omp parallel for default(shared) private(ikz,ikx,ikh,i,j) #endif for (ikz = 0; ikz < n_local; ikz++) { for (ikx = 0; ikx < nx2; ikx++) { for (ikh = 0; ikh < nh2; ikh++) { i = ikh + ikx*nh2 + (o_local+ikz)*nx2*nh2; j = ikh + ikx*nh2 + ikz*nx2*nh2; #ifdef SF_HAS_COMPLEX_H cwavem[j] = cwave[j]*rht[i][im]; #else cwavem[j] = sf_crmul(cwave[j],rht[i][im]); #endif } } } imcfft3(wave[im],cwavem); } #ifdef _OPENMP #pragma omp parallel for default(shared) private(ix,iz,ih,i,j,im,old,c) #endif for (iz=0; iz < nz_local; iz++) { for (ix = 0; ix < nx; ix++) { for (ih=0; ih < nh; ih++) { i = ih + ix*nh + (o_local+iz)*nx*nh; /* original grid */ j = ih + ix*nh2+ iz*nx2*nh2; /* padded grid */ old = curr[j]; c = sub? 2*old: 0.0f; c -= prev[j]; prev[j] = old; for (im = 0; im < m2; im++) { c += lft[im][i]*wave[im][j]; } curr[j] = c; } } } if (!mig) { /* modeling -> write out data */ if (cpuid==0) sf_floatwrite(dat[0],nx*nh,data); } } sf_warning("."); if (mig) { sendbuf = curr; MPI_Gatherv(sendbuf, nzx2, MPI_FLOAT, recvbuf, rcounts, displs, MPI_FLOAT, 0, MPI_COMM_WORLD); if (cpuid==0) { /* transpose */ #ifdef _OPENMP #pragma omp parallel for default(shared) private(iz,ix) #endif for (iz=0; iz < nz; iz++) { for (ix=0; ix < nx; ix++) { img[ix][iz] = wave_all[nh2*(ix+iz*nx2)]; } } sf_floatwrite(img[0],nz*nx,image); } } mcfft3_finalize(); /* Work's done. Get the elapsed wall time. */ elapsedTime = walltime( &startTime ); /* Print the wall time and terminate. */ if (cpuid==0) printf("\nwall time = %.5fs\n", elapsedTime); MPI_Finalize(); exit(0); }