Exemplo n.º 1
0
int main(int argc, char* argv[])
{
  bool verb;        
  int it,iz,im,ikz,ikx,iky,ix,iy,i,j,snap;     /* index variables */
  int nt,nz,nx,ny, m2, nk, nzx, nz2, nx2, ny2, nzx2, n2, pad1;
  float dt;
  sf_complex c;

  float  *rr;      /* I/O arrays*/
  sf_complex *cwave, *cwavem, *ww;
  sf_complex **wave, *curr;
  float *rcurr, *rcurr_all;

  sf_file Fw,Fr,Fo;    /* I/O files */
  sf_axis at,az,ax,ay;    /* cube axes */

  sf_complex **lt, **rt;
  sf_file left, right, snaps;

  /*MPI related*/
  int cpuid,numprocs;
  int provided;
  int n_local, o_local;
  int ozx2;
  float *sendbuf, *recvbuf;
  int *rcounts, *displs;

  MPI_Init_thread(&argc,&argv,MPI_THREAD_FUNNELED,&provided);
  threads_ok = provided >= MPI_THREAD_FUNNELED;

  sf_init(argc,argv);

  MPI_Comm_rank(MPI_COMM_WORLD, &cpuid);
  MPI_Comm_size(MPI_COMM_WORLD, &numprocs);

  if(!sf_getbool("verb",&verb)) verb=true; /* verbosity */

  /* setup I/O files */
  Fw = sf_input ("--input" );
  Fo = sf_output("--output");
  Fr = sf_input ("ref");

  /* Read/Write axes */
  at = sf_iaxa(Fw,1); nt = sf_n(at); dt = sf_d(at); 
  az = sf_iaxa(Fr,1); nz = sf_n(az); 
  ax = sf_iaxa(Fr,2); nx = sf_n(ax); 
  ay = sf_iaxa(Fr,3); ny = sf_n(ay); 

  if (!sf_getint("pad1",&pad1)) pad1=1; /* padding factor on the first axis */

  if (!sf_getint("snap",&snap)) snap=0;
  /* interval for snapshots */
    
  if (cpuid==0) {

    sf_oaxa(Fo,az,1); 
    sf_oaxa(Fo,ax,2);
    sf_oaxa(Fo,ay,3);
    
    sf_settype(Fo,SF_FLOAT);

    if (snap > 0) {
      snaps = sf_output("snaps");
      /* (optional) snapshot file */
	
      sf_oaxa(snaps,az,1); 
      sf_oaxa(snaps,ax,2);
      sf_oaxa(snaps,ay,3);
      sf_oaxa(snaps,at,4);
      sf_settype(snaps,SF_FLOAT);
      sf_putint(snaps,"n4",nt/snap);
      sf_putfloat(snaps,"d4",dt*snap);
      sf_putfloat(snaps,"o4",0.);
    } else {
      snaps = NULL;
    }

  }

  //nk = cfft3_init(pad1,nz,nx,ny,&nz2,&nx2,&ny2);
  //n_local = ny2;
  //o_local = 0;
  nk = mcfft3_init(pad1,nz,nx,ny,&nz2,&nx2,&ny2,&n_local,&o_local);
  sf_warning("Cpuid=%d,n2=%d,n1=%d,n0=%d,local_n0=%d,local_0_start=%d",cpuid,nz2,nx2,ny2,n_local,o_local);

  nzx = nz*nx*ny;
  //nzx2 = nz2*nx2*ny2;
  nzx2 = n_local*nz2*nx2;
  ozx2 = o_local*nz2*nx2;

  /* propagator matrices */
  left = sf_input("left");
  right = sf_input("right");

  if (!sf_histint(left,"n1",&n2) || n2 != nzx) sf_error("Need n1=%d in left",nzx);
  if (!sf_histint(left,"n2",&m2))  sf_error("Need n2=%d in left",m2);
    
  if (!sf_histint(right,"n1",&n2) || n2 != m2) sf_error("Need n1=%d in right",m2);
  if (!sf_histint(right,"n2",&n2) || n2 != nk) sf_error("Need n2=%d in right",nk);
 
  lt = sf_complexalloc2(nzx,m2);
  rt = sf_complexalloc2(m2,nk);

  sf_complexread(lt[0],nzx*m2,left);
  sf_complexread(rt[0],m2*nk,right);

  /* read wavelet & reflectivity */
  ww=sf_complexalloc(nt);  sf_complexread(ww,nt ,Fw);
  rr=sf_floatalloc(nzx); sf_floatread(rr,nzx,Fr);

  curr = sf_complexalloc(nzx2);
  rcurr= sf_floatalloc(nzx2);

  cwave  = sf_complexalloc(nzx2);
  cwavem = sf_complexalloc(nzx2);
  wave = sf_complexalloc2(nzx2,m2);

  //icfft3_allocate(cwavem);

  for (iz=0; iz < nzx2; iz++) {
    curr[iz]=sf_cmplx(0.,0.);
    rcurr[iz]=0.;
  }

  sendbuf = rcurr;
  if (cpuid==0) {
    rcurr_all = sf_floatalloc(nz2*nx2*ny2);
    recvbuf = rcurr_all;
    rcounts = sf_intalloc(numprocs);
    displs  = sf_intalloc(numprocs);
  } else {
    rcurr_all = NULL;
    recvbuf = NULL;
    rcounts = NULL;
    displs = NULL;
  }

  MPI_Gather(&nzx2, 1, MPI_INT, rcounts, 1, MPI_INT, 0, MPI_COMM_WORLD);
  MPI_Gather(&ozx2, 1, MPI_INT, displs, 1, MPI_INT, 0, MPI_COMM_WORLD);

  /* MAIN LOOP */
  for (it=0; it<nt; it++) {
    if(verb) sf_warning("it=%d;",it);

    /* matrix multiplication */
    mcfft3(curr,cwave);

    for (im = 0; im < m2; im++) {
      for (iky = 0; iky < n_local; iky++) {
        for (ikx = 0; ikx < nx2; ikx++) {
          for (ikz = 0; ikz < nz2; ikz++) {
            i = ikz + ikx*nz2 + (o_local+iky)*nx2*nz2;
            j = ikz + ikx*nz2 + iky*nx2*nz2;
#ifdef SF_HAS_COMPLEX_H
            cwavem[j] = cwave[j]*rt[i][im];
#else
            cwavem[j] = sf_cmul(cwave[j],rt[i][im]);
#endif
          }
        }
      }
      imcfft3(wave[im],cwavem);
    }

    for (iy = 0; iy < n_local && (iy+o_local)<ny; iy++) {
      for (ix = 0; ix < nx; ix++) {
        for (iz=0; iz < nz; iz++) {
          i = iz + ix*nz + (o_local+iy)*nx*nz;  /* original grid */
          j = iz + ix*nz2+ iy*nx2*nz2; /* padded grid */
#ifdef SF_HAS_COMPLEX_H		
          c = ww[it] * rr[i];
#else
          c = sf_crmul(ww[it],rr[i]);
#endif

          for (im = 0; im < m2; im++) {
#ifdef SF_HAS_COMPLEX_H
            c += lt[im][i]*wave[im][j];
#else
            c += sf_cmul(lt[im][i],wave[im][j]);
#endif
          }
		    
          curr[j] = c;
          rcurr[j]= crealf(c);
        }
      }
    }

    /* output movie */
    if (NULL != snaps && 0 == it%snap) {
      MPI_Gatherv(sendbuf, nzx2, MPI_FLOAT, recvbuf, rcounts, displs, MPI_FLOAT, 0, MPI_COMM_WORLD);

      if (cpuid==0) {
        for (iy = 0; iy < ny; iy++)
          for (ix = 0; ix < nx; ix++)
            sf_floatwrite(rcurr_all+nz2*(ix+nx2*iy),nz,snaps);
      }
    }

  }
  if(verb) sf_warning(".");    
	    	
  /* write wavefield to output */
  MPI_Gatherv(sendbuf, nzx2, MPI_FLOAT, recvbuf, rcounts, displs, MPI_FLOAT, 0, MPI_COMM_WORLD);
  if (cpuid==0) {
    for (iy = 0; iy < ny; iy++)
      for (ix = 0; ix < nx; ix++)
        sf_floatwrite(rcurr_all+nz2*(ix+nx2*iy),nz,Fo);
  }
    
  mcfft3_finalize();

  MPI_Finalize();
  exit (0);
}
Exemplo n.º 2
0
int main(int argc, char* argv[])
{
    bool mig, sub;
    int it, nt, ix, nx, iz, nz, nx2, nz2, nzx, nzx2, ih, nh, nh2;
    int im, i, j, m2, it1, it2, its, ikz, ikx, ikh, n2, nk, snap;
    float dt, dx, dz, c, old, dh;
    float *curr, *prev, **img, **dat, **lft, **rht, **wave;
    sf_complex *cwave, *cwavem;
    sf_file data, image, left, right, snaps;

    /*MPI related*/
    int cpuid,numprocs;
    int provided;
    int n_local, o_local, nz_local;
    int ozx2;
    float *sendbuf, *recvbuf, *wave_all;
    int *rcounts, *displs;

    /*wall time*/
    double startTime, elapsedTime;
    double clockZero = 0.0;

    MPI_Init_thread(&argc,&argv,MPI_THREAD_FUNNELED,&provided);
    threads_ok = provided >= MPI_THREAD_FUNNELED;

    sf_init(argc,argv);

    MPI_Comm_rank(MPI_COMM_WORLD, &cpuid);
    MPI_Comm_size(MPI_COMM_WORLD, &numprocs);

    if (!sf_getbool("mig",&mig)) mig=false;
    /* if n, modeling; if y, migration */

    if (!sf_getint("snap",&snap)) snap=0;
    /* interval for snapshots */

    snaps = (snap > 0)? sf_output("snaps"): NULL;
    /* (optional) snapshot file */

    if (mig) { /* migration */
	data = sf_input("input");
	image = sf_output("output");

	if (!sf_histint(data,"n1",&nh)) sf_error("No n1=");
	if (!sf_histfloat(data,"d1",&dh)) sf_error("No d1=");

	if (!sf_histint(data,"n2",&nx)) sf_error("No n2=");
	if (!sf_histfloat(data,"d2",&dx)) sf_error("No d2=");

	if (!sf_histint(data,"n3",&nt)) sf_error("No n3=");
	if (!sf_histfloat(data,"d3",&dt)) sf_error("No d3=");

	if (!sf_getint("nz",&nz)) sf_error("Need nz=");
	/* time samples (if migration) */
	if (!sf_getfloat("dz",&dz)) sf_error("Need dz=");
	/* time sampling (if migration) */
        
        if (cpuid==0) {
	sf_putint(image,"o1",0.);
	sf_putint(image,"n1",nz);
	sf_putfloat(image,"d1",dz);
	sf_putstring(image,"label1","Depth");
	sf_putint(image,"o2",0.);
	sf_putint(image,"n2",nx);
	sf_putfloat(image,"d2",dx);
	sf_putstring(image,"label2","Midpoint");
	sf_putint(image,"n3",1); /* stack for now */
        }
    } else { /* modeling */
	image = sf_input("input");
	data = sf_output("output");

	if (!sf_histint(image,"n1",&nz)) sf_error("No n1=");
	if (!sf_histfloat(image,"d1",&dz)) sf_error("No d1=");

	if (!sf_histint(image,"n2",&nx)) sf_error("No n2=");
	if (!sf_histfloat(image,"d2",&dx)) sf_error("No d2=");

	if (!sf_getint("nt",&nt)) sf_error("Need nt=");
	/* time samples (if modeling) */
	if (!sf_getfloat("dt",&dt)) sf_error("Need dt=");
	/* time sampling (if modeling) */

	if (!sf_getint("nh",&nh)) sf_error("Need nh=");
        /* offset samples (if modeling) */
	if (!sf_getfloat("dh",&dh)) sf_error("Need dh=");
	/* offset sampling (if modeling) */

        if (cpuid==0) {
	sf_putint(data,"n1",nh);
	sf_putfloat(data,"d1",dh);
	sf_putstring(data,"label1","Half-Offset");
	sf_putint(data,"o2",0.);
	sf_putint(data,"n2",nx);
	sf_putfloat(data,"d2",dx);
	sf_putstring(data,"label2","Midpoint");
	sf_putint(data,"n3",nt);
	sf_putfloat(data,"d3",dt);
	sf_putstring(data,"label3","Time");
	sf_putstring(data,"unit3","s");
        }
    }

    if (cpuid==0) {
    if (NULL != snaps) {
      sf_putint(snaps,"n1",nh);
      sf_putfloat(snaps,"d1",dh);
      sf_putstring(snaps,"label1","Half-Offset");

      sf_putint(snaps,"n2",nx);
      sf_putfloat(snaps,"d2",dx);
      sf_putstring(snaps,"label2","Midpoint");

      sf_putint(snaps,"n3",nz);
      sf_putfloat(snaps,"d3",dz);
      sf_putstring(snaps,"label3","Depth");

      sf_putint(snaps,"n4",nt/snap);
      sf_putfloat(snaps,"d4",dt*snap);
      if (mig) {
        sf_putfloat(snaps,"o4",(nt-1)*dt);
      } else {
        sf_putfloat(snaps,"o4",0.);
      }
      sf_putstring(snaps,"label4","Time");
    }
    }

    /* Mark the starting time. */
    startTime = walltime( &clockZero );

    nk = mcfft3_init(1,nh,nx,nz,&nh2,&nx2,&nz2,&n_local,&o_local);
    nz_local = (n_local < nz-o_local)? n_local:nz-o_local;
    sf_warning("Cpuid=%d,n2=%d,n1=%d,n0=%d,local_n0=%d,local_0_start=%d,nz_local=%d",cpuid,nh2,nx2,nz2,n_local,o_local,nz_local);
    if (cpuid==0)
      if (o_local!=0) sf_error("Cpuid and o_local inconsistant!");

    nzx = nz*nx*nh;
    //nzx2 = nz2*nx2*nh2;
    nzx2 = n_local*nx2*nh2;
    ozx2 = o_local*nx2*nh2;

    img = sf_floatalloc2(nz,nx);
    dat = sf_floatalloc2(nh,nx);

    /* propagator matrices */
    left = sf_input("left");
    right = sf_input("right");

    if (!sf_histbool(left,"sub",&sub) && !sf_getbool("sub",&sub)) sub=true;
    /* if -1 is included in the matrix */

    if (!sf_histint(left,"n1",&n2) || n2 != nzx) sf_error("Need n1=%d in left",nzx);
    if (!sf_histint(left,"n2",&m2))  sf_error("No n2= in left");
    
    if (!sf_histint(right,"n1",&n2) || n2 != m2) sf_error("Need n1=%d in right",m2);
    if (!sf_histint(right,"n2",&n2) || n2 != nk) sf_error("Need n2=%d in right",nk);
 
    lft = sf_floatalloc2(nzx,m2);
    rht = sf_floatalloc2(m2,nk);

    sf_floatread(lft[0],nzx*m2,left);
    sf_floatread(rht[0],m2*nk,right);

    curr = sf_floatalloc(nzx2);
    prev = sf_floatalloc(nzx2);

    cwave  = sf_complexalloc(nk);
    cwavem = sf_complexalloc(nk);
    wave = sf_floatalloc2(nzx2,m2);

#ifdef _OPENMP
#pragma omp parallel for default(shared) private(iz)
#endif
    for (iz=0; iz < nzx2; iz++) {
	curr[iz]=0.;
	prev[iz]=0.;
    }

    sendbuf = prev;
    if (cpuid==0) {
      wave_all = sf_floatalloc(nh2*nx2*nz2);
      recvbuf = wave_all;
      rcounts = sf_intalloc(numprocs);
      displs  = sf_intalloc(numprocs);
    } else {
      wave_all = NULL;
      recvbuf = NULL;
      rcounts = NULL;
      displs = NULL;
    }

    MPI_Gather(&nzx2, 1, MPI_INT, rcounts, 1, MPI_INT, 0, MPI_COMM_WORLD);
    MPI_Gather(&ozx2, 1, MPI_INT, displs, 1, MPI_INT, 0, MPI_COMM_WORLD);

    if (mig) { /* migration */
	/* step backward in time */
	it1 = nt-1;
	it2 = -1;
	its = -1;	
    } else { /* modeling */
	sf_floatread(img[0],nz*nx,image);

	/* transpose and initialize at zero offset */
#ifdef _OPENMP
#pragma omp parallel for default(shared) private(iz,ix)
#endif
	for (iz=0; iz < nz_local; iz++) {
	    for (ix=0; ix < nx; ix++) {
		curr[nh2*(ix+iz*nx2)]=img[ix][iz+o_local];
	    }
	}
	
	/* step forward in time */
	it1 = 0;
	it2 = nt;
	its = +1;
    }

    /* time stepping */
    for (it=it1; it != it2; it += its) {
	sf_warning("it=%d;",it);

	if (mig) { /* migration <- read data */
	    sf_floatread(dat[0],nx*nh,data);
	} else {
#ifdef _OPENMP
#pragma omp parallel for default(shared) private(ix,ih)
#endif
	    for (ix=0; ix < nx; ix++) {
		for (ih=0; ih < nh; ih++) {
		    dat[ix][ih] = 0.;
		}
	    }
	}
	
	if (NULL != snaps && 0 == it%snap) {
          MPI_Gatherv(sendbuf, nzx2, MPI_FLOAT, recvbuf, rcounts, displs, MPI_FLOAT, 0, MPI_COMM_WORLD);
          if (cpuid==0) {
            for (iz = 0; iz < nz; iz++)
              for (ix = 0; ix < nx; ix++)
                sf_floatwrite(wave_all+nh2*(ix+nx2*iz),nh,snaps);
          }
        }

	/* at z=0 */
        if (cpuid==0) {
#ifdef _OPENMP
#pragma omp parallel for default(shared) private(ix,ih)
#endif
	for (ix=0; ix < nx; ix++) {
	    for (ih=0; ih < nh; ih++) {
		if (mig) {
		    curr[ix*nh2+ih] += dat[ix][ih];
		} else {
		    dat[ix][ih] = curr[ix*nh2+ih];
		}
	    }
	}
        }

	/* matrix multiplication */
	mcfft3(curr,cwave);

	for (im = 0; im < m2; im++) {
          //for (ik = 0; ik < nk; ik++) {
#ifdef _OPENMP
#pragma omp parallel for default(shared) private(ikz,ikx,ikh,i,j)
#endif
          for (ikz = 0; ikz < n_local; ikz++) {
            for (ikx = 0; ikx < nx2; ikx++) {
              for (ikh = 0; ikh < nh2; ikh++) {
                i = ikh + ikx*nh2 + (o_local+ikz)*nx2*nh2;
                j = ikh + ikx*nh2 + ikz*nx2*nh2;
#ifdef SF_HAS_COMPLEX_H
		cwavem[j] = cwave[j]*rht[i][im];
#else
		cwavem[j] = sf_crmul(cwave[j],rht[i][im]);
#endif
              }
            }
          }
          imcfft3(wave[im],cwavem);
	}

#ifdef _OPENMP
#pragma omp parallel for default(shared) private(ix,iz,ih,i,j,im,old,c)
#endif
        for (iz=0; iz < nz_local; iz++) {
	    for (ix = 0; ix < nx; ix++) {
		for (ih=0; ih < nh; ih++) {	
                    i = ih + ix*nh + (o_local+iz)*nx*nh;  /* original grid */
                    j = ih + ix*nh2+ iz*nx2*nh2; /* padded grid */
		
		    old = curr[j];

		    c = sub? 2*old: 0.0f;

		    c -= prev[j];

		    prev[j] = old;

		    for (im = 0; im < m2; im++) {
			c += lft[im][i]*wave[im][j];
		    }
		    
		    curr[j] = c;
		}
	    }
	}
	
	if (!mig) { /* modeling -> write out data */
          if (cpuid==0)
	    sf_floatwrite(dat[0],nx*nh,data);
	}
    }
    sf_warning(".");

    if (mig) {
      sendbuf = curr;
      MPI_Gatherv(sendbuf, nzx2, MPI_FLOAT, recvbuf, rcounts, displs, MPI_FLOAT, 0, MPI_COMM_WORLD);

      if (cpuid==0) {
        /* transpose */
#ifdef _OPENMP
#pragma omp parallel for default(shared) private(iz,ix)
#endif
        for (iz=0; iz < nz; iz++) {
          for (ix=0; ix < nx; ix++) {
            img[ix][iz] = wave_all[nh2*(ix+iz*nx2)];
          }
        }
	sf_floatwrite(img[0],nz*nx,image);
      }

    }

    mcfft3_finalize();

    /* Work's done. Get the elapsed wall time. */
    elapsedTime = walltime( &startTime );
    /* Print the wall time and terminate. */
    if (cpuid==0)
      printf("\nwall time = %.5fs\n", elapsedTime);

    MPI_Finalize();

    exit(0);
}