Пример #1
0
int main(void)
{

    size_t i, j, k, l;

	for(k = 0; k < DIM_Z; ++k)
		for(i = 0; i < DIM_X; ++i)
	   		for(j = 0; j < DIM_Y; ++j)
			{
				u[DY3(i, j, k)] = 0.002 * pi + (j + i + k) % 2;
				con[DY3(i, j, k)] = 0.01 * (pi / 2.0) - (j + i + k) % 2; 

				aim[DY3(i, j, k)] = 0.11 * (pi / 3.0) - (j + i * 3 + k) % 2;
				aip[DY3(i, j, k)] = 0.13 * (pi / 3.1) + (2 * j + i + k) % 2;

				ajm[DY3(i, j, k)] = 0.21 * (pi / 1.05) - (j + i) % 2;
				ajp[DY3(i, j, k)] = 0.05 * (pi / 0.1) + (j + i) % 2;

				akm[DY3(i, j, k)] = 0.21 + (j + i + k) % 2; 
				akp[DY3(i, j, k)] = 0.7 * (pi / 1.05) - (3 * j + i + k) % 2;
		 	}


    unsigned long long int startTime1, endTime1, startTime2, endTime2;

    ////////////////////START TIMER//////////////////
    /* clock_gettime(0, &tm);  */
    /* startTime1 = getRealNanosecondsCount(tm); */
    time_t beg = time(NULL);
    /////////////////////////////////////////////////

    for (l = 0; l < 90; ++l)
	for(k = 0; k < DIM_Z; ++k)
		for(i = 0; i < DIM_X; ++i)
	   		for(j = 0; j < DIM_Y; ++j)
			{
                temp1[DY3(i, j, k)]  = con[DY3(i, j, k)]  +
									  0.5  * ajm[DY3(i, j, k)] * u[DY3(i, j, k)] +
									  0.15 * ajp[DY3(i, j, k)] * u[DY3(i, j, k)] +
									  0.5  * akm[DY3(i, j, k)] * u[DY3(i, j, k)] +
									  0.15 * akp[DY3(i, j, k)] * u[DY3(i, j, k)] +
									  0.5  * ajm[DY3(i, j, k)] * u[DY3(i, j, k)] +
									  0.15 * ajp[DY3(i, j, k)] * u[DY3(i, j, k)] +
									  0.5  * akm[DY3(i, j, k)] * u[DY3(i, j, k)] +
									  0.15 * akp[DY3(i, j, k)] * u[DY3(i, j, k)] +
									  0.5  * ajm[DY3(i, j, k)] * u[DY3(i, j, k)] +
									  0.15 * ajp[DY3(i, j, k)] * u[DY3(i, j, k)] +
									  0.5  * akm[DY3(i, j, k)] * u[DY3(i, j, k)] +
									  0.15 * akp[DY3(i, j, k)] * u[DY3(i, j, k)];
            }

    ///////////////////////FINISH TIMER///////////////
    printf("%d\n", time(NULL) - beg);
    /* clock_gettime(0, &tm);  */
    /* endTime1 = getRealNanosecondsCount(tm); */
    //////////////////////////////////////////////////


    /* printf("Elapsed time1: %llu nsec\n", endTime1 - startTime1); */


    ////////////////////START TIMER//////////////////
    /* clock_gettime(0, &tm);  */
    /* startTime2  = getRealNanosecondsCount(tm); */
    beg = time(NULL);
    /////////////////////////////////////////////////

    for (l = 0; l < 90; ++l)
   	for(i = 0; i < SIZE; ++i)
    {
        temp2[i]  = con[i]  + 0.5  * ajm[i] * u[i] +
                              0.15 * ajp[i] * u[i] +
                              0.5  * akm[i] * u[i] +
                              0.15 * akp[i] * u[i] +
                              0.5  * ajm[i] * u[i] +
                              0.15 * ajp[i] * u[i] +
                              0.5  * akm[i] * u[i] +
                              0.15 * akp[i] * u[i] +
                              0.5  * ajm[i] * u[i] +
                              0.15 * ajp[i] * u[i] +
                              0.5  * akm[i] * u[i] +
                              0.15 * akp[i] * u[i];
    }

    ///////////////////////FINISH TIMER///////////////
    printf("%d\n", time(NULL) - beg);
    /* clock_gettime(0, &tm);  */
    /* endTime2 = getRealNanosecondsCount(tm); */
    //////////////////////////////////////////////////

    /* printf("Elapsed time2: %llu nsec\n", endTime2 - startTime2); */


    double checkSum1 = 0.0;
	double checkSum2 = 0.0;

   	for(i = 0; i < SIZE; ++i)
    {
        checkSum1 += temp1[i];
		checkSum2 += temp2[i];
    } 

    printf("Check_sum1: %31.30E\nCheck_sum2: %31.30E\n", checkSum1, checkSum2);

    return 0;
}
Пример #2
0
int main(int argc, char* argv[])
{
    bool verb,fsrf,snap,expl,dabc,abcone,is2D,cfl; 
    bool ignore_interpolation = false; /* ignore interpolation for receivers - makes code faster, but only works when receivers are on grid points */
    int  jsnap,ntsnap,jdata;
    float fmax, safety;
    enum SourceType srctype;
    /* I/O files */
    sf_file Fwav=NULL; /* wavelet   */
    sf_file Fsou=NULL; /* sources   */
    sf_file Frec=NULL; /* receivers */
    sf_file Fvel=NULL; /* velocity  */
    sf_file Fden=NULL; /* density   */
    sf_file Fdat=NULL; /* data      */
    sf_file Fwfl=NULL; /* wavefield */
/*set all y variables to be either zero or null to avoid compiler warnings
about being uninitialized */
    /* cube axes */
    sf_axis at,az,ax,ay=NULL; 
    sf_axis as,ar;

    int     nt,nz,nx,ny=0,ns,nr,nb;
    int     it,iz,ix,iy=0;
    float   dt,dz,dx,dy=0,idz,idx,idy=0;

    
    /* I/O arrays */
    float  *ww=NULL;           /* wavelet   */
    float  *dd=NULL;           /* data      */

    
    /* FD operator size */
    float co,cax,cbx,cay,cby,caz,cbz;

    /* wavefield cut params */
    sf_axis   acz=NULL,acx=NULL,acy=NULL;
    int       nqz,nqx,nqy;
    float     oqz,oqx,oqy;
    float     dqz,dqx,dqy;

    /*------------------------------------------------------------*/
    /* init RSF */
    sf_init(argc,argv);

    /*------------------------------------------------------------*/
    /* OMP parameters */

    if( !sf_getbool("ignint",&ignore_interpolation)) ignore_interpolation = false;
    if(! sf_getbool("verb",&verb)) verb=false; /* verbosity flag */
    if(! sf_getbool("snap",&snap)) snap=false; /* wavefield snapshots flag */
    if(! sf_getbool("free",&fsrf)) fsrf=false; /* free surface flag */
    if(! sf_getbool("expl",&expl)) expl=false; /* "exploding reflector" */
    if(! sf_getbool("dabc",&dabc)) dabc=false; /* absorbing BC */
    if(! sf_getbool("cfl",&cfl)) cfl=false; /* Use CFL check */ 
    if(! sf_getbool("abcone",&abcone)) abcone=false; /* Use Zero-incident boundary condition*/ 
   
    int ttype = 0;
    if(! sf_getint("srctype",&ttype)) ttype = 0; /* source type, see comments */
    if(ttype < 0 || ttype > 1) sf_error("Invalid source type specified");
           srctype = ttype;
    if (cfl) {
        if(! sf_getfloat("fmax",&fmax)) { /* max frequency for cfl check */
            sf_error("CFL: Must specify fmax for CFL check");
        }
        if(! sf_getfloat("safety",&safety) || safety < 0.0) safety= 0.8; /*safety factor for cfl check*/
    }
    /*------------------------------------------------------------*/

    /*------------------------------------------------------------*/
    /* I/O files */
    Fwav = sf_input ("in" ); /* wavelet   */
    Fvel = sf_input ("vel"); /* velocity  */
    Fsou = sf_input ("sou"); /* sources   */
    Frec = sf_input ("rec"); /* receivers */
    Fwfl = sf_output("wfl"); /* wavefield */
    Fdat = sf_output("out"); /* data      */
    Fden = sf_input ("den"); /* density   */

	/* Determine dimensionality, if 2D then axis 3 has n size of 1 */
	sf_axis test = sf_iaxa(Fvel,3);
	if(sf_n(test) == 1) is2D = true;
	else is2D = false;

    /*------------------------------------------------------------*/
    /* axes */
    at = sf_iaxa(Fwav,2); sf_setlabel(at,"t"); if(verb) sf_raxa(at); /* time */
    az = sf_iaxa(Fvel,1); sf_setlabel(az,"z"); if(verb) sf_raxa(az); /* depth */
    ax = sf_iaxa(Fvel,2); sf_setlabel(ax,"x"); if(verb) sf_raxa(ax); /* space */

    as = sf_iaxa(Fsou,2); sf_setlabel(as,"s"); if(verb) sf_raxa(as); /* sources */
    ar = sf_iaxa(Frec,2); sf_setlabel(ar,"r"); if(verb) sf_raxa(ar); /* receivers */

    nt = sf_n(at); dt = sf_d(at);
    nz = sf_n(az); dz = sf_d(az);
    nx = sf_n(ax); dx = sf_d(ax);

    ns = sf_n(as);
    nr = sf_n(ar);

    if(!is2D){ /*If 3D*/
		ay=sf_iaxa(Fvel,3); sf_setlabel(ay,"y"); if(verb) sf_raxa(ay); /*space*/
		ny=sf_n(ay); dy=sf_d(ay);
	}
    /*------------------------------------------------------------*/

    /*------------------------------------------------------------*/
    /* other execution parameters */
    if(! sf_getint("jdata",&jdata)) jdata=1;
    if(snap) {  /* save wavefield every *jsnap* time steps */
    	if(! sf_getint("jsnap",&jsnap)) jsnap=nt;        
    }
    /*------------------------------------------------------------*/
if(is2D){
/* Begin 2d code */
    /* FDM structure */
    fdm2d    fdm=NULL;
    abcone2d abc=NULL;
    sponge   spo=NULL;
    pt2d   *ss=NULL;           /* sources   */
    pt2d   *rr=NULL;           /* receivers */
   
    float **tt=NULL;
    float **ro=NULL;           /* density */
    float **roz=NULL;          /* normalized 1st derivative of density on axis 1 */
    float **rox=NULL;          /* normalized 1st derivative of density on axis 2 */
    float **vp=NULL;           /* velocity */
    float **vt=NULL;           /* temporary vp*vp * dt*dt */

    float **um,**uo,**up,**ua,**ut; /* wavefield: um = U @ t-1; uo = U @ t; up = U @ t+1 */

    /* linear interpolation weights/indices */
    lint2d cs,cr;
    float     **uc=NULL;

    /*------------------------------------------------------------*/
    /* expand domain for FD operators and ABC */
    if( !sf_getint("nb",&nb) || nb<NOP) nb=NOP;

    fdm=fdutil_init(verb,fsrf,az,ax,nb,1);

    /*------------------------------------------------------------*/

    /*------------------------------------------------------------*/
    /* setup output data header */
    sf_oaxa(Fdat,ar,1);

    sf_setn(at,nt/jdata);
    sf_setd(at,dt*jdata);
    sf_oaxa(Fdat,at,2);

    /* setup output wavefield header */
    if(snap) {
	if(!sf_getint  ("nqz",&nqz)) nqz=sf_n(az);
	if(!sf_getint  ("nqx",&nqx)) nqx=sf_n(ax);

	if(!sf_getfloat("oqz",&oqz)) oqz=sf_o(az);
	if(!sf_getfloat("oqx",&oqx)) oqx=sf_o(ax);

    sf_setn(az,fdm->nzpad); sf_seto(az,fdm->ozpad); if(verb) sf_raxa(az);
    sf_setn(ax,fdm->nxpad); sf_seto(ax,fdm->oxpad); if(verb) sf_raxa(ax);
	dqz=sf_d(az);
	dqx=sf_d(ax);

	acz = sf_maxa(nqz,oqz,dqz); sf_raxa(acz);
	acx = sf_maxa(nqx,oqx,dqx); sf_raxa(acx);
	/* check if the imaging window fits in the wavefield domain */

	uc=sf_floatalloc2(sf_n(acz),sf_n(acx));

	ntsnap=0;
	for(it=0; it<nt; it++) {
	    if(it%jsnap==0) ntsnap++;
	}
	sf_setn(at,  ntsnap);
	sf_setd(at,dt*jsnap);
	if(verb) sf_raxa(at);

	sf_oaxa(Fwfl,acz,1);
	sf_oaxa(Fwfl,acx,2);
	sf_oaxa(Fwfl,at, 3);
    }

    if(expl) {
    	ww = sf_floatalloc( 1);
    } else {
    	ww = sf_floatalloc(ns);
    }
    dd = sf_floatalloc(nr);

    /*------------------------------------------------------------*/
    /* setup source/receiver coordinates */
    ss = (pt2d*) sf_alloc(ns,sizeof(*ss)); 
    rr = (pt2d*) sf_alloc(nr,sizeof(*rr)); 

    pt2dread1(Fsou,ss,ns,2); /* read (x,z) coordinates */
    pt2dread1(Frec,rr,nr,2); /* read (x,z) coordinates */

    cs = lint2d_make(ns,ss,fdm);
    cr = lint2d_make(nr,rr,fdm);

    /*------------------------------------------------------------*/
    /* setup FD coefficients */
    idz = 1/dz;
    idx = 1/dx;

    co = C0 * (idx*idx+idz*idz);
    cax= CA *  idx*idx;
    cbx= CB *  idx*idx;
    caz= CA *  idz*idz;
    cbz= CB *  idz*idz;

    /*------------------------------------------------------------*/ 
    tt = sf_floatalloc2(nz,nx); 

    ro  =sf_floatalloc2(fdm->nzpad,fdm->nxpad);
    roz =sf_floatalloc2(fdm->nzpad,fdm->nxpad);
    rox =sf_floatalloc2(fdm->nzpad,fdm->nxpad);
    vp  =sf_floatalloc2(fdm->nzpad,fdm->nxpad); 
    vt  =sf_floatalloc2(fdm->nzpad,fdm->nxpad); 

    /* input density */
    sf_floatread(tt[0],nz*nx,Fden);     expand(tt,ro ,fdm);
    /* normalized density derivatives */
    for    (ix=NOP; ix<fdm->nxpad-NOP; ix++) {
	for(iz=NOP; iz<fdm->nzpad-NOP; iz++) {
	    roz[ix][iz] = DZ(ro,ix,iz,idz) / ro[ix][iz];
	    rox[ix][iz] = DX(ro,ix,iz,idx) / ro[ix][iz];
	}
    }   
    free(*ro); free(ro);

    /* input velocity */
    sf_floatread(tt[0],nz*nx,Fvel );    expand(tt,vp,fdm);
    float vpmax = 0.0; float vpmin = 10000000000000000;
    /* precompute vp^2 * dt^2 */
    for    (ix=0; ix<fdm->nxpad; ix++) {
        for(iz=0; iz<fdm->nzpad; iz++) {
            vt[ix][iz] = vp[ix][iz] * vp[ix][iz] * dt*dt;
            if (vp[ix][iz] < vpmin) vpmin = vp[ix][iz];
            else if (vp[ix][iz] > vpmax) vpmax = vp[ix][iz];
        }
    }
    if (cfl) cfl_acoustic(vpmin,vpmax,dx,-1.0f,dz,dt,fmax,safety,NUM_INTERVALS);
    if(fsrf) { /* free surface */
        for    (ix=0; ix<fdm->nxpad; ix++) {
            for(iz=0; iz<fdm->nb; iz++) {
                vt[ix][iz]=0;
            }
        }
    }

    free(*tt); free(tt);    
    /*------------------------------------------------------------*/

    /*------------------------------------------------------------*/
    /* allocate wavefield arrays */
    um=sf_floatalloc2(fdm->nzpad,fdm->nxpad);
    uo=sf_floatalloc2(fdm->nzpad,fdm->nxpad);
    up=sf_floatalloc2(fdm->nzpad,fdm->nxpad);
    ua=sf_floatalloc2(fdm->nzpad,fdm->nxpad);

    for    (ix=0; ix<fdm->nxpad; ix++) {
	for(iz=0; iz<fdm->nzpad; iz++) {
	    um[ix][iz]=0;
	    uo[ix][iz]=0;
	    up[ix][iz]=0;
	    ua[ix][iz]=0;
	}
    }

    /*------------------------------------------------------------*/
	if (abcone) abc = abcone2d_make(NOP,dt,vp,fsrf,fdm);
    if(dabc) {
	/* one-way abc setup */
	/* sponge abc setup */
	spo = sponge_make(fdm->nb);
    }

    /*------------------------------------------------------------*/
    /* 
     *  MAIN LOOP
     */
    /*------------------------------------------------------------*/
    if(verb) fprintf(stderr,"\n");
    for (it=0; it<nt; it++) {
	if(verb) fprintf(stderr,"%d/%d \r",it,nt);

#pragma omp parallel for				\
    schedule(dynamic) \
    private(ix,iz)					\
    shared(fdm,ua,uo,co,caz,cbz)
	for    (ix=NOP; ix<fdm->nxpad-NOP; ix++) {
	    for(iz=NOP; iz<fdm->nzpad-NOP; iz++) {
		
		/* 4th order Laplacian operator */
		ua[ix][iz] = 
		    co * uo[ix  ][iz  ] + 
		    caz*(uo[ix  ][iz-1] + uo[ix  ][iz+1]) +
		    cbz*(uo[ix  ][iz-2] + uo[ix  ][iz+2]) ; 
		/* density term */
        /*ua[ix][iz] -= (
		    DZ(uo,ix,iz,idz) * roz[ix][iz] +
		    DX(uo,ix,iz,idx) * rox[ix][iz] );
        */
	    }
	}   

#pragma omp parallel for				\
    schedule(dynamic)			\
    private(ix,iz)					\
    shared(fdm,ua,uo,co,cax,cbx)
	for    (ix=NOP; ix<fdm->nxpad-NOP; ix++) {
	    for(iz=NOP; iz<fdm->nzpad-NOP; iz++) {
        ua[ix][iz] =  ua[ix][iz] + 
		    cax*(uo[ix-1][iz  ] + uo[ix+1][iz  ]) +
		    cbx*(uo[ix-2][iz  ] + uo[ix+2][iz  ]);
            }
    }

	/* inject acceleration source */
    if (srctype == ACCELERATION){
            if(expl) {
                sf_floatread(ww, 1,Fwav);
                lint2d_inject1(ua,ww[0],cs);
            } else {
                  sf_floatread(ww,ns,Fwav);
                  lint2d_inject(ua,ww,cs);
            }
    }

	/* step forward in time */
#pragma omp parallel for	    \
    schedule(dynamic) \
    private(ix,iz)		    \
    shared(fdm,ua,uo,um,up,vt)
	for    (ix=0; ix<fdm->nxpad; ix++) {
	    for(iz=0; iz<fdm->nzpad; iz++) {
		up[ix][iz] = 2*uo[ix][iz] 
		    -          um[ix][iz] 
		    +          ua[ix][iz] * vt[ix][iz];
	    }
	}

    if(srctype == DISPLACEMENT){
            if(expl) {
                sf_floatread(ww, 1,Fwav);
                lint2d_inject1(up,ww[0],cs);
            } else {
                  sf_floatread(ww,ns,Fwav);
                  lint2d_inject(up,ww,cs);
            }
    }

    
	/* circulate wavefield arrays */
	ut=um;
	um=uo;
	uo=up;
	up=ut;
    
    if (abcone) abcone2d_apply(uo,um,NOP,abc,fdm);
	if(dabc) {
	    /* one-way abc apply */
	    sponge2d_apply(um,spo,fdm);
	    sponge2d_apply(uo,spo,fdm);
	    sponge2d_apply(up,spo,fdm);
	}

	/* extract data */
    if(ignore_interpolation){
        cut2d_extract(uo,dd,cr);
    } else {
	    lint2d_extract(uo,dd,cr);
    }

	if(snap && it%jsnap==0) {
	    cut2d(uo,uc,fdm,acz,acx);
	    sf_floatwrite(uc[0],sf_n(acz)*sf_n(acx),Fwfl);
	}
	if(        it%jdata==0) 
	    sf_floatwrite(dd,nr,Fdat);
    }
    if(verb) fprintf(stderr,"\n");    

    /*------------------------------------------------------------*/
    /* deallocate arrays */
    free(*um); free(um);
    free(*up); free(up);
    free(*uo); free(uo);
    free(*ua); free(ua);
    if(snap) { free(*uc); free(uc); }

    free(*rox); free(rox);
    free(*roz); free(roz);
    free(*vp);  free(vp);
    free(*vt);  free(vt);

    free(ww);
    free(ss);
    free(rr);
    free(dd);


    exit (0);
} else {
    /* FDM structure */
    fdm3d    fdm=NULL;
    abcone3d abc=NULL;
    sponge   spo=NULL;

    /* I/O arrays */
    pt3d   *ss=NULL;           /* sources   */
    pt3d   *rr=NULL;           /* receivers */
	/* Non-universal arrays */
    float***tt=NULL;
    float***ro=NULL;           /* density */
    float***roz=NULL;          /* normalized 1st derivative of density on axis 1 */
    float***rox=NULL;          /* normalized 1st derivative of density on axis 2 */
    float***roy=NULL;          /* normalized 1st derivative of density on axis 3 */
    float***vp=NULL;           /* velocity */
    float***vt=NULL;           /* temporary vp*vp * dt*dt */

    float***um,***uo,***up,***ua,***ut; /* wavefield: um = U @ t-1; uo = U @ t; up = U @ t+1 */

    /* linear interpolation weights/indices */
    lint3d cs,cr;

	/* Wavefield cut params that are not universal */
    float     ***uc=NULL;


    /*------------------------------------------------------------*/
    /* expand domain for FD operators and ABC */
    if( !sf_getint("nb",&nb) || nb<NOP) nb=NOP;

    fdm=fdutil3d_init(verb,fsrf,az,ax,ay,nb,1);

    sf_setn(az,fdm->nzpad); sf_seto(az,fdm->ozpad); if(verb) sf_raxa(az);
    sf_setn(ax,fdm->nxpad); sf_seto(ax,fdm->oxpad); if(verb) sf_raxa(ax);
    sf_setn(ay,fdm->nypad); sf_seto(ay,fdm->oypad); if(verb) sf_raxa(ay);
    /*------------------------------------------------------------*/

    /*------------------------------------------------------------*/
    /* setup output data header */
    sf_oaxa(Fdat,ar,1);

    sf_setn(at,nt/jdata);
    sf_setd(at,dt*jdata);
    sf_oaxa(Fdat,at,2);

    /* setup output wavefield header */
    if(snap) {
	if(!sf_getint  ("nqz",&nqz)) nqz=sf_n(az);
	if(!sf_getint  ("nqx",&nqx)) nqx=sf_n(ax);
	if(!sf_getint  ("nqy",&nqy)) nqy=sf_n(ay);

	if(!sf_getfloat("oqz",&oqz)) oqz=sf_o(az);
	if(!sf_getfloat("oqx",&oqx)) oqx=sf_o(ax);
	if(!sf_getfloat("oqy",&oqy)) oqy=sf_o(ay);

	dqz=sf_d(az);
	dqx=sf_d(ax);
	dqy=sf_d(ay);

	acz = sf_maxa(nqz,oqz,dqz); sf_raxa(acz);
	acx = sf_maxa(nqx,oqx,dqx); sf_raxa(acx);
	acy = sf_maxa(nqy,oqy,dqy); sf_raxa(acy);
	/* check if the imaging window fits in the wavefield domain */

	uc=sf_floatalloc3(sf_n(acz),sf_n(acx),sf_n(acy));

	ntsnap=0;
	for(it=0; it<nt; it++) {
	    if(it%jsnap==0) ntsnap++;
	}
	sf_setn(at,  ntsnap);
	sf_setd(at,dt*jsnap);
	if(verb) sf_raxa(at);

	sf_oaxa(Fwfl,acz,1);
	sf_oaxa(Fwfl,acx,2);
	sf_oaxa(Fwfl,acy,3);
	sf_oaxa(Fwfl,at, 4);
    }

    if(expl) {
	ww = sf_floatalloc( 1);
    } else {
	ww = sf_floatalloc(ns);
    }
    dd = sf_floatalloc(nr);

    /*------------------------------------------------------------*/
    /* setup source/receiver coordinates */
    ss = (pt3d*) sf_alloc(ns,sizeof(*ss)); 
    rr = (pt3d*) sf_alloc(nr,sizeof(*rr)); 

    pt3dread1(Fsou,ss,ns,3); /* read (x,y,z) coordinates */
    pt3dread1(Frec,rr,nr,3); /* read (x,y,z) coordinates */

    cs = lint3d_make(ns,ss,fdm);
    cr = lint3d_make(nr,rr,fdm);

    /*------------------------------------------------------------*/
    /* setup FD coefficients */
    idz = 1/dz;
    idx = 1/dx;
    idy = 1/dy;

    co = C0 * (idx*idx+idy*idy+idz*idz);
    cax= CA *  idx*idx;
    cbx= CB *  idx*idx;
    cay= CA *  idy*idy;
    cby= CB *  idy*idy;
    caz= CA *  idz*idz;
    cbz= CB *  idz*idz;

    /*------------------------------------------------------------*/ 
    tt = sf_floatalloc3(nz,nx,ny); 

    ro  =sf_floatalloc3(fdm->nzpad,fdm->nxpad,fdm->nypad);
    roz =sf_floatalloc3(fdm->nzpad,fdm->nxpad,fdm->nypad);
    rox =sf_floatalloc3(fdm->nzpad,fdm->nxpad,fdm->nypad);
    roy =sf_floatalloc3(fdm->nzpad,fdm->nxpad,fdm->nypad);
    vp  =sf_floatalloc3(fdm->nzpad,fdm->nxpad,fdm->nypad); 
    vt  =sf_floatalloc3(fdm->nzpad,fdm->nxpad,fdm->nypad); 

    /* input density */
    sf_floatread(tt[0][0],nz*nx*ny,Fden);     expand3d(tt,ro ,fdm);

    /* normalized density derivatives */
    for        (iy=NOP; iy<fdm->nypad-NOP; iy++) {
	for    (ix=NOP; ix<fdm->nxpad-NOP; ix++) {
	    for(iz=NOP; iz<fdm->nzpad-NOP; iz++) {
		roz[iy][ix][iz] = DZ3(ro,ix,iy,iz,idz) / ro[iy][ix][iz];
		rox[iy][ix][iz] = DX3(ro,ix,iy,iz,idx) / ro[iy][ix][iz];
		roy[iy][ix][iz] = DY3(ro,ix,iy,iz,idy) / ro[iy][ix][iz];
	    }
	}   
    }
    free(**ro);  free(*ro); free(ro);  

    /* input velocity */
    sf_floatread(tt[0][0],nz*nx*ny,Fvel );    expand3d(tt,vp,fdm);
    /* precompute vp^2 * dt^2 */
    float vpmin = 1000000000000; float vpmax = 0.0;
    for        (iy=0; iy<fdm->nypad; iy++) {
        for    (ix=0; ix<fdm->nxpad; ix++) {
            for(iz=0; iz<fdm->nzpad; iz++) {
                float vpt = vp[iy][ix][iz];
                vt[iy][ix][iz] = vp[iy][ix][iz] * vp[iy][ix][iz] * dt*dt;
                if (vpt > vpmax) vpmax = vpt;
                else if (vpt < vpmin) vpmin = vpt;
            }
        }
    }

    if (cfl) cfl_acoustic(vpmin,vpmax,dx,dy,dz,dt,fmax,safety,NUM_INTERVALS);

    if(fsrf) { /* free surface */
	for        (iy=0; iy<fdm->nypad; iy++) {
	    for    (ix=0; ix<fdm->nxpad; ix++) {
		for(iz=0; iz<fdm->nb; iz++) {
		    vt[iy][ix][iz]=0;
		}
	    }
	}
    }

    free(**tt);  free(*tt); free(tt);    
    /*------------------------------------------------------------*/

    /*------------------------------------------------------------*/
    /* allocate wavefield arrays */
    um=sf_floatalloc3(fdm->nzpad,fdm->nxpad,fdm->nypad);
    uo=sf_floatalloc3(fdm->nzpad,fdm->nxpad,fdm->nypad);
    up=sf_floatalloc3(fdm->nzpad,fdm->nxpad,fdm->nypad);
    ua=sf_floatalloc3(fdm->nzpad,fdm->nxpad,fdm->nypad);

    for        (iy=0; iy<fdm->nypad; iy++) {
	for    (ix=0; ix<fdm->nxpad; ix++) {
	    for(iz=0; iz<fdm->nzpad; iz++) {
		um[iy][ix][iz]=0;
		uo[iy][ix][iz]=0;
		up[iy][ix][iz]=0;
		ua[iy][ix][iz]=0;
	    }
	}
    }

    /*------------------------------------------------------------*/
	if (abcone) abc = abcone3d_make(NOP,dt,vp,fsrf,fdm);
    if(dabc) {
	/* one-way abc setup */
	/* sponge abc setup */
	spo = sponge_make(fdm->nb);
    }

    /*------------------------------------------------------------*/
    /* 
     *  MAIN LOOP
     */
    /*------------------------------------------------------------*/
    if(verb) fprintf(stderr,"\n");
    for (it=0; it<nt; it++) {
	if(verb) fprintf(stderr,"%d/%d \r",it,nt);

#pragma omp parallel for					\
    schedule(dynamic) \
    private(ix,iy,iz)						\
    shared(fdm,ua,uo,co,cax,cay,caz,cbx,cby,cbz,idx,idy,idz)
	for        (iy=NOP; iy<fdm->nypad-NOP; iy++) {
	    for    (ix=NOP; ix<fdm->nxpad-NOP; ix++) {
		for(iz=NOP; iz<fdm->nzpad-NOP; iz++) {
		    
		    /* 4th order Laplacian operator */
		    ua[iy][ix][iz] = 
			co * uo[iy  ][ix  ][iz  ] + 
			caz*(uo[iy  ][ix  ][iz-1] + uo[iy  ][ix  ][iz+1]) +
			cbz*(uo[iy  ][ix  ][iz-2] + uo[iy  ][ix  ][iz+2]);
		    
		    /* density term */
		    /*ua[iy][ix][iz] -= (
			DZ3(uo,ix,iy,iz,idz) * roz[iy][ix][iz] +
			DX3(uo,ix,iy,iz,idx) * rox[iy][ix][iz] +
			DY3(uo,ix,iy,iz,idy) * roy[iy][ix][iz] );
            */
		 }
	    }   
	}

#pragma omp parallel for					\
    schedule(dynamic) \
    private(ix,iy,iz)						\
    shared(fdm,ua,uo,co,cax,cay,caz,cbx,cby,cbz,idx,idy,idz)
	for        (iy=NOP; iy<fdm->nypad-NOP; iy++) {
	    for    (ix=NOP; ix<fdm->nxpad-NOP; ix++) {
		    for(iz=NOP; iz<fdm->nzpad-NOP; iz++) {
		    ua[iy][ix][iz] = ua[iy][ix][iz] + 
			cax*(uo[iy  ][ix-1][iz  ] + uo[iy  ][ix+1][iz  ]) +
			cbx*(uo[iy  ][ix-2][iz  ] + uo[iy  ][ix+2][iz  ]) ;
            }
        }
    }

#pragma omp parallel for					\
    schedule(dynamic) \
    private(ix,iy,iz)						\
    shared(fdm,ua,uo,co,cax,cay,caz,cbx,cby,cbz,idx,idy,idz)
	for        (iy=NOP; iy<fdm->nypad-NOP; iy++) {
	    for    (ix=NOP; ix<fdm->nxpad-NOP; ix++) {
		    for(iz=NOP; iz<fdm->nzpad-NOP; iz++) {
		    ua[iy][ix][iz] = ua[iy][ix][iz] + 
			cay*(uo[iy-1][ix  ][iz  ] + uo[iy+1][ix  ][iz  ]) +
			cby*(uo[iy-2][ix  ][iz  ] + uo[iy+2][ix  ][iz  ]);
            }
        }
    }

	/* inject acceleration source */
    if (srctype == ACCELERATION){
        if(expl) {
            sf_floatread(ww, 1,Fwav);
            lint3d_inject1(ua,ww[0],cs);
        } else {
            sf_floatread(ww,ns,Fwav);	
            lint3d_inject(ua,ww,cs);
        }
   }

	/* step forward in time */
#pragma omp parallel for	    \
    schedule(static) \
    private(ix,iy,iz)		    \
    shared(fdm,ua,uo,um,up,vt)
	for        (iy=0; iy<fdm->nypad; iy++) {
	    for    (ix=0; ix<fdm->nxpad; ix++) {
		for(iz=0; iz<fdm->nzpad; iz++) {
		    up[iy][ix][iz] = 2*uo[iy][ix][iz] 
			-              um[iy][ix][iz] 
			+              ua[iy][ix][iz] * vt[iy][ix][iz];
		}
	    }
	}

    if (srctype == DISPLACEMENT) {
        if(expl) {
            sf_floatread(ww, 1,Fwav);
            lint3d_inject1(up,ww[0],cs);
        } else {
            sf_floatread(ww,ns,Fwav);	
            lint3d_inject(up,ww,cs);
        }
    }
	/* circulate wavefield arrays */
	ut=um;
	um=uo;
	uo=up;
	up=ut;

    if(abcone) abcone3d_apply(uo,um,NOP,abc,fdm);
	if(dabc) {
	    /* one-way abc apply */
	    sponge3d_apply(um,spo,fdm);
	    sponge3d_apply(uo,spo,fdm);
	    sponge3d_apply(up,spo,fdm);
	}

	/* extract data */
    if (ignore_interpolation) {
	    cut3d_extract(uo,dd,cr);
    } else {
	    lint3d_extract(uo,dd,cr);
    }

	if(snap && it%jsnap==0) {
	    cut3d(uo,uc,fdm,acz,acx,acy);
	    sf_floatwrite(uc[0][0],sf_n(acz)*sf_n(acx)*sf_n(acy),Fwfl);
	}
	if(it%jdata==0) 
	    sf_floatwrite(dd,nr,Fdat);
    }
    if(verb) fprintf(stderr,"\n");    

    /*------------------------------------------------------------*/
    /* deallocate arrays */
    free(**um); free(*um); free(um);
    free(**up); free(*up); free(up);
    free(**uo); free(*uo); free(uo);
    free(**ua); free(*ua); free(ua);
    if (snap) { free(**uc); free(*uc); free(uc); }

    free(**rox); free(*rox); free(rox);
    free(**roy); free(*roy); free(roy);
    free(**roz); free(*roz); free(roz);

    free(**vp); free(*vp); free(vp);
    free(**vt); free(*vt); free(vt);

    free(ss);
    free(rr);
    free(dd);
    free(ww);

    exit (0);
    }
}