Ejemplo n.º 1
0
void pml1_txx(float *txxn1, float *vxn1, float *c11, 
	     float (*ldx)(float *, int),
             bool freesurface )
/*<stress decay in pml>*/
{
    int ix;
    /*Stress PML -- top*/
    if (freesurface == false) {
	for (ix=marg; ix<marg+pmlout; ix++) {
	    txxn1x[ix]=((1-dt*pmldx[ix]/2)*txxn0x[ix]-dt*c11[ix]*ldx(vxn1,ix-1))/(1+dt*pmldx[ix]/2);
	    txxn1[ix] = txxn1x[ix];
	}
    } else {
	for (ix=marg; ix<marg+pmlout; ix++) {
	    txxn1x[ix]=((1-dt*pmldx[ix]/2)*txxn0x[ix]-dt*0.0*ldx(vxn1,ix-1))/(1+dt*pmldx[ix]/2);
	    txxn1[ix]= txxn1x[ix];
	}
	
    }

    
    /*Stress PML -- bottom*/
    for (ix=nx+pmlout+marg; ix<nx+2*pmlout+marg; ix++) {
	txxn1x[ix]=((1-dt*pmldx[ix]/2)*txxn0x[ix]-dt*c11[ix]*ldx(vxn1,ix-1))/(1+dt*pmldx[ix]/2);
	txxn1[ix] = txxn1x[ix];
    }	
}
Ejemplo n.º 2
0
Archivo: pml.c Proyecto: 1014511134/src
void pml_txx(float **txxn1, float **vxn1, float **vzn1, float **c11, 
	     float (*ldx)(float **, int, int), 
	     float (*ldz)(float **, int, int),
             bool freesurface )
/*<stress decay in pml>*/
{
    int ix, iz;
    /*Stress PML -- top*/
    if (freesurface == false) {
	for (ix=marg; ix<nx+2*pmlout+marg; ix++) {
	    for (iz=marg; iz<marg+pmlout; iz++) {
		txxn1x[ix][iz]=((1-dt*pmldx[ix]/2)*txxn0x[ix][iz]-dt*c11[ix][iz]*ldx(vxn1,ix-1,iz))/(1+dt*pmldx[ix]/2);
		txxn1z[ix][iz]=((1-dt*pmldz[iz]/2)*txxn0z[ix][iz]-dt*c11[ix][iz]*ldz(vzn1,ix,iz-1))/(1+dt*pmldz[iz]/2);
		txxn1[ix][iz] = txxn1x[ix][iz]+txxn1z[ix][iz];
	    }
	}
    } else {
	for (ix=marg; ix<nx+2*pmlout+marg; ix++) {
	    for (iz=marg; iz<marg+pmlout; iz++) {
		txxn1x[ix][iz]=((1-dt*pmldx[ix]/2)*txxn0x[ix][iz]-dt*0.0*ldx(vxn1,ix-1,iz))/(1+dt*pmldx[ix]/2);
		txxn1z[ix][iz]=((1-dt*pmldz[iz]/2)*txxn0z[ix][iz]-dt*0.0*ldz(vzn1,ix,iz-1))/(1+dt*pmldz[iz]/2);
		txxn1[ix][iz] = txxn1x[ix][iz]+txxn1z[ix][iz];
	    }
	}
    }

    /*Stress PML -- left*/
    for (ix=marg; ix<marg+pmlout; ix++) {
	for (iz=marg+pmlout; iz<nz+pmlout+marg; iz++) {
	    txxn1x[ix][iz]=((1-dt*pmldx[ix]/2)*txxn0x[ix][iz]-dt*c11[ix][iz]*ldx(vxn1,ix-1,iz))/(1+dt*pmldx[ix]/2);
	    txxn1z[ix][iz]=((1-dt*pmldz[iz]/2)*txxn0z[ix][iz]-dt*c11[ix][iz]*ldz(vzn1,ix,iz-1))/(1+dt*pmldz[iz]/2);
	    txxn1[ix][iz] = txxn1x[ix][iz]+txxn1z[ix][iz];
	}
    }
    /*Stress PML -- right*/
    for (ix=nx+pmlout+marg; ix<nx+2*pmlout+marg; ix++) {
	for (iz=marg+pmlout; iz<nz+pmlout+marg; iz++) {
	    txxn1x[ix][iz]=((1-dt*pmldx[ix]/2)*txxn0x[ix][iz]-dt*c11[ix][iz]*ldx(vxn1,ix-1,iz))/(1+dt*pmldx[ix]/2);
	    txxn1z[ix][iz]=((1-dt*pmldz[iz]/2)*txxn0z[ix][iz]-dt*c11[ix][iz]*ldz(vzn1,ix,iz-1))/(1+dt*pmldz[iz]/2);
	    txxn1[ix][iz] = txxn1x[ix][iz]+txxn1z[ix][iz];
	}
    }
    /*Stress PML -- bottom*/
    for (ix=marg; ix<nx+2*pmlout+marg; ix++) {
	for (iz=marg+pmlout+nz; iz<nz+2*pmlout+marg; iz++) {
	    txxn1x[ix][iz]=((1-dt*pmldx[ix]/2)*txxn0x[ix][iz]-dt*c11[ix][iz]*ldx(vxn1,ix-1,iz))/(1+dt*pmldx[ix]/2);
	    txxn1z[ix][iz]=((1-dt*pmldz[iz]/2)*txxn0z[ix][iz]-dt*c11[ix][iz]*ldz(vzn1,ix,iz-1))/(1+dt*pmldz[iz]/2);
	    txxn1[ix][iz] = txxn1x[ix][iz]+txxn1z[ix][iz];
	}
    }	
}
Ejemplo n.º 3
0
void InterpreterRuntime::SignatureHandlerGenerator::pass_long() {
  Argument  jni_arg(jni_offset(), false);
  Register  Rtmp = O0;

#ifdef ASSERT
  if (TaggedStackInterpreter) {
    // check at least one tag is okay
    Label ok;
    __ ld_ptr(Llocals, Interpreter::local_tag_offset_in_bytes(offset() + 1), Rtmp);
    __ cmp(Rtmp, G0);
    __ brx(Assembler::equal, false, Assembler::pt, ok);
    __ delayed()->nop();
    __ stop("Native object has bad tag value");
    __ bind(ok);
  }
#endif // ASSERT

#ifdef _LP64
  __ ldx(Llocals, Interpreter::local_offset_in_bytes(offset() + 1), Rtmp);
  __ store_long_argument(Rtmp, jni_arg);
#else
  __ ld(Llocals, Interpreter::local_offset_in_bytes(offset() + 1), Rtmp);
  __ store_argument(Rtmp, jni_arg);
  __ ld(Llocals, Interpreter::local_offset_in_bytes(offset() + 0), Rtmp);
  Argument successor(jni_arg.successor());
  __ store_argument(Rtmp, successor);
#endif
}
inline void MacroAssembler::ld_ptr(const Address& a, Register d, int offset) {
#ifdef _LP64
  ldx(a, d, offset);
#else
  ld( a, d, offset);
#endif
}
inline void MacroAssembler::ld_ptr( Register s1, RegisterOrConstant s2, Register d ) {
#ifdef _LP64
  ldx(s1, s2, d);
#else
  ld( s1, s2, d);
#endif
}
Ejemplo n.º 6
0
void pml1_vxz(float *vxn1, float *vxn0, 
	     float *txxn0, float *denx, 
	     float (*ldx)(float *, int), 
	     bool freesurface)
/*<velocity vx,vz  decay in pml>*/
{
    int ix;
    /*Velocity PML --top*/
    if (freesurface == false) {
	for (ix=marg; ix<marg+pmlout; ix++) {
	    vxn1[ix]=((1-dt*pmldx[ix]/2)*vxn0[ix]-dt/denx[ix]*ldx(txxn0,ix))/(1+dt*pmldx[ix]/2);
	}
    } 
	
    /*Velocity PML  --bottom*/
    for (ix=nx+pmlout+marg; ix<nx+2*pmlout+marg; ix++) {
	vxn1[ix]=((1-dt*pmldx[ix]/2)*vxn0[ix]-dt/denx[ix]*ldx(txxn0,ix))/(1+dt*pmldx[ix]/2);
	
    }

 }
Ejemplo n.º 7
0
Archivo: pml.c Proyecto: 1014511134/src
void pml_vxz(float **vxn1, float **vzn1, float **vxn0, float **vzn0, 
	     float **txxn0, float **denx, float **denz,
	     float (*ldx)(float **, int, int), 
	     float (*ldz)(float **, int, int),
             bool freesurface)
/*<velocity vx,vz  decay in pml>*/
{
    int ix, iz;
    /*Velocity PML --top*/
    if (freesurface == false) {
	for (ix=marg; ix<nx+2*pmlout+marg; ix++) {
	    for (iz=marg; iz<marg+pmlout; iz++) {
		vxn1[ix][iz]=((1-dt*pmldx[ix]/2)*vxn0[ix][iz]-dt/denx[ix][iz]*ldx(txxn0,ix,iz))/(1+dt*pmldx[ix]/2);
		vzn1[ix][iz]=((1-dt*pmldz[iz]/2)*vzn0[ix][iz]-dt/denz[ix][iz]*ldz(txxn0,ix,iz))/(1+dt*pmldz[iz]/2);
	    }
	}
    } 
	
    /*Velocity PML  --left*/
    for (ix=marg; ix<marg+pmlout; ix++) {
	for (iz=marg+pmlout; iz<nz+pmlout+marg; iz++) {
	    vxn1[ix][iz]=((1-dt*pmldx[ix]/2)*vxn0[ix][iz]-dt/denx[ix][iz]*ldx(txxn0,ix,iz))/(1+dt*pmldx[ix]/2);
	    vzn1[ix][iz]=((1-dt*pmldz[iz]/2)*vzn0[ix][iz]-dt/denz[ix][iz]*ldz(txxn0,ix,iz))/(1+dt*pmldz[iz]/2);
	}
    }
    /*Velocity PML  --right*/
    for (ix=nx+pmlout+marg; ix<nx+2*pmlout+marg; ix++) {
	for (iz=marg+pmlout; iz<nz+pmlout+marg; iz++) {
	    vxn1[ix][iz]=((1-dt*pmldx[ix]/2)*vxn0[ix][iz]-dt/denx[ix][iz]*ldx(txxn0,ix,iz))/(1+dt*pmldx[ix]/2);
	    vzn1[ix][iz]=((1-dt*pmldz[iz]/2)*vzn0[ix][iz]-dt/denz[ix][iz]*ldz(txxn0,ix,iz))/(1+dt*pmldz[iz]/2);
	}
    }
    /*Velocity PML  --bottom*/
    for (ix=marg; ix<nx+2*pmlout+marg; ix++) {
	for (iz=marg+pmlout+nz; iz<nz+2*pmlout+marg; iz++) {
	    vxn1[ix][iz]=((1-dt*pmldx[ix]/2)*vxn0[ix][iz]-dt/denx[ix][iz]*ldx(txxn0,ix,iz))/(1+dt*pmldx[ix]/2);
	    vzn1[ix][iz]=((1-dt*pmldz[iz]/2)*vzn0[ix][iz]-dt/denz[ix][iz]*ldz(txxn0,ix,iz))/(1+dt*pmldz[iz]/2);
	}
    }
}
Ejemplo n.º 8
0
static char * LDX2() {
    //load neg val into x reg
    CPU *c = getCPU();
    int8_t operand = -99;
    OP_CODE_INFO *o = getOP_CODE_INFO(operand,0,modeImmediate);
    ldx(c,o);
    int8_t xVal = getRegByte(c,IND_X);
    mu_assert("LDX2 err, ACCUM reg != -99", xVal == -99);
    mu_run_test_with_args(testRegStatus,c,"10100000",
            "          NVUBDIZC    NVUBDIZC\nCLC1 err, %s != %s");
    freeOP_CODE_INFO(o);
    free(c);
    return 0;
}
void InterpreterRuntime::SignatureHandlerGenerator::pass_long() {
  Argument  jni_arg(jni_offset(), false);
  Register  Rtmp = O0;

#ifdef _LP64
  __ ldx(Llocals, Interpreter::local_offset_in_bytes(offset() + 1), Rtmp);
  __ store_long_argument(Rtmp, jni_arg);
#else
  __ ld(Llocals, Interpreter::local_offset_in_bytes(offset() + 1), Rtmp);
  __ store_argument(Rtmp, jni_arg);
  __ ld(Llocals, Interpreter::local_offset_in_bytes(offset() + 0), Rtmp);
  Argument successor(jni_arg.successor());
  __ store_argument(Rtmp, successor);
#endif
}
void InterpreterRuntime::SignatureHandlerGenerator::pass_long() {
  Argument  jni_arg(jni_offset(), false);
  Register  Rtmp = O0;
  Register  Rtmp1 = G3_scratch;

#ifdef _LP64
  __ ldx(Llocals, -(offset() + 1) * wordSize, Rtmp);
  __ store_long_argument(Rtmp, jni_arg);
#else
  __ ld(Llocals, -(offset() + 1) * wordSize, Rtmp);
  __ store_argument(Rtmp, jni_arg            );
  __ ld(Llocals, -(offset() + 0) * wordSize, Rtmp);
  __ store_argument(Rtmp, jni_arg.successor());
#endif
}
Ejemplo n.º 11
0
int sglfdback2(float ***mig1, float **mig2, float ***fwf, float **localrec, bool verb, bool wantwf, sf_file Ftmpbwf)
{
	float **txxn1, **txxn0, **vxn1, **vxn0, **vzn1, **vzn0;
	float **sill, **ccr, ***bwf;
	int wfit, htau;
	float tau;

	sill=sf_floatalloc2(nz, nx);
	ccr=sf_floatalloc2(nz, nx);
	bwf=sf_floatalloc3(nz, nx, wfnt);
	zero2(sill, nz, nx);
	zero2(ccr, nz, nx);
	zero3(mig1, nz, nx, ntau);

	txxn1=sf_floatalloc2(nzb, nxb);
	txxn0=sf_floatalloc2(nzb, nxb);
	vxn1=sf_floatalloc2(nzb, nxb);
	vxn0=sf_floatalloc2(nzb, nxb);
	vzn1=sf_floatalloc2(nzb, nxb);
	vzn0=sf_floatalloc2(nzb, nxb);

	zero2(txxn1, nzb, nxb);
	zero2(txxn0, nzb, nxb);
	zero2(vxn1, nzb, nxb);
	zero2(vxn0, nzb, nxb);
	zero2(vzn1, nzb, nxb);
	zero2(vzn0, nzb, nxb);

	zero2(txxn1x, nzb, nxb);
	zero2(txxn1z, nzb, nxb);
	zero2(txxn0x, nzb, nxb);
	zero2(txxn0z, nzb, nxb);
	
	wfit=wfnt-1;
	for(it=nt-1; it>=0; it--){
		if(verb) sf_warning("Backward it=%d/%d;", it+1, nt);
#ifdef _OPENMP
#pragma omp parallel for private(ix,iz)
#endif
		for(ix=nfd+pmlsize; ix<nfd+pmlsize+nx; ix++){
			for(iz=nfd+pmlsize; iz<nfd+pmlsize+nz; iz++){
				txxn0[ix][iz]=txxn1[ix][iz]+dt*bc11[ix][iz]*(ldx(vxn1, ix-1, iz) +ldz(vzn1, ix, iz-1));
			}
		}

		pml_txxb(txxn0, vxn1, vzn1);
#ifdef _OPENMP
#pragma omp parallel for private(ix)
#endif
		for(ix=0; ix<ng; ix++){
			txxn0[ix*ginv+pmlsize+nfd][pmlsize+nfd+gp]+=localrec[ix][it];
		}
#ifdef _OPENMP
#pragma omp parallel for private(ix,iz)
#endif
		for(ix=nfd+pmlsize; ix<nfd+pmlsize+nx; ix++){
			for(iz=nfd+pmlsize; iz<nfd+pmlsize+nz; iz++){
				vxn0[ix][iz]=vxn1[ix][iz]+dt/bdenx[ix][iz]*ldx(txxn0, ix, iz);
				vzn0[ix][iz]=vzn1[ix][iz]+dt/bdenz[ix][iz]*ldz(txxn0, ix, iz);
			}
		}

		pml_vxzb(vxn1, vzn1, vxn0, vzn0, txxn0);

		transp=txxn1; txxn1=txxn0; txxn0=transp;
		transp=vxn1; vxn1=vxn0; vxn0=transp;
		transp=vzn1; vzn1=vzn0; vzn0=transp;

		if(it%wfinv==0){
			for(ix=0; ix<nx; ix++)
				for(iz=0; iz<nz; iz++){
					bwf[wfit][ix][iz]=txxn0[ix+pmlsize+nfd][iz+pmlsize+nfd];
					ccr[ix][iz]+=fwf[wfit][ix][iz]*bwf[wfit][ix][iz];
					sill[ix][iz]+=fwf[wfit][ix][iz]*fwf[wfit][ix][iz];
				}
			wfit--;
		}
	} //end of it
	if(verb) sf_warning(".");

	for(itau=0; itau<ntau; itau++){
		tau=itau*dtau+tau0;
		htau=tau/wfdt;
		for(it=abs(htau); it<wfnt-abs(htau); it++){
			for(ix=0; ix<nx; ix++){
				for(iz=0; iz<nz; iz++){
					mig1[itau][ix][iz]+=fwf[it+htau][ix][iz]*bwf[it-htau][ix][iz];
				}
			}
		}//end of it
	} // end of itau

	for(ix=0; ix<nx; ix++){
		for(iz=0; iz<nz; iz++){
			mig2[ix][iz]=ccr[ix][iz]/(sill[ix][iz]+SF_EPS);
		}
	}

	if(wantwf) sf_floatwrite(bwf[0][0], wfnt*nx*nz, Ftmpbwf);
	return 0;
}
Ejemplo n.º 12
0
int sglfdback2(float **img1, float **img2, float ***wavfld, float **rcd, 
               bool verb, bool wantwf, float **den, float **c11, 
               geopar geop, srcpar srcp, pmlpar pmlp, sf_file Ftmpbwf)  
/*< staggered grid lowrank FD backward propagation + imaging >*/
{
    /*caculate arrays*/
    float **txxn1, **txxn0, **vxn1, **vzn1, **vxn0, **vzn0;
    
    float **denx, **denz;
    float **sill, **ccr;
    /*grid index*/
    int nx, nz, nt, ix, iz, it, gn, ginter;
    int nxb, nzb, snpint;
    int gp;
    float dt, dx, dz;
    int pmlout, marg;
    bool freesurface;

    /* tmp variable */
    int wfit;
    
    nx = geop->nx;
    nz = geop->nz;
    nxb = geop->nxb;
    nzb = geop->nzb;
    dx = geop->dx;
    dz = geop->dz;
    
    gp  = geop->gp;
    gn  = geop->gn;
    ginter= geop->ginter;
    snpint = geop->snpint;
    
    nt = srcp->nt;
    dt = srcp->dt;

    pmlout = pmlp->pmlout;
    freesurface = pmlp->freesurface;
    marg   = getmarg();

    denx = sf_floatalloc2(nzb, nxb);
    denz = sf_floatalloc2(nzb, nxb);

    sill = sf_floatalloc2(nz, nx);
    ccr  = img1;

#ifdef _OPENMP
#pragma omp parallel for private(ix, iz)
#endif    
    for (ix = 0; ix < nxb; ix++) {
	for ( iz= 0; iz < nzb; iz++) {
	    denx[ix][iz] = den[ix][iz];
	    denz[ix][iz] = den[ix][iz];
	}
    }
    /*den[ix+1/2][iz]*/

#ifdef _OPENMP
#pragma omp parallel for private(ix, iz)
#endif
    for ( ix = 0; ix < nxb-1; ix++) {
	for (iz = 0; iz < nzb; iz++) {
	    denx[ix][iz] = (den[ix+1][iz] + den[ix][iz])*0.5;
	}
    }
    /*den[ix][iz+1/2]*/

#ifdef _OPENMP
#pragma omp parallel for private(ix, iz)
#endif
    for ( ix = 0; ix < nxb; ix++) {
	for (iz = 0; iz < nzb-1; iz++) {
	    denz[ix][iz] = (den[ix][iz+1] + den[ix][iz])*0.5;
	}
    }
    
    txxn1 = sf_floatalloc2(nzb, nxb);
    txxn0 = sf_floatalloc2(nzb, nxb);
    vxn1  = sf_floatalloc2(nzb, nxb);
    vzn1  = sf_floatalloc2(nzb, nxb);
    vxn0  = sf_floatalloc2(nzb, nxb);
    vzn0  = sf_floatalloc2(nzb, nxb);

    init_pml(nz, nx, dt, marg, pmlp);
#ifdef _OPENMP
#pragma omp parallel for private(ix, iz)
#endif    
    for (ix = 0; ix < nxb; ix++) {
	for (iz = 0; iz < nzb; iz++) {
	    txxn1[ix][iz] = 0.0;
	}
    }
#ifdef _OPENMP
#pragma omp parallel for private(ix, iz)
#endif
    for (ix = 0; ix < nxb; ix++) {
	for (iz = 0; iz < nzb; iz++) {
	    txxn0[ix][iz] = 0.0;
	}
    }
#ifdef _OPENMP
#pragma omp parallel for private(ix, iz)
#endif
    for (ix = 0; ix < nxb; ix++) {
	for (iz = 0; iz < nzb; iz++) {
	    vxn1[ix][iz] = 0.0;  
	}
    }
#ifdef _OPENMP
#pragma omp parallel for private(ix, iz)
#endif
    for (ix = 0; ix < nxb; ix++) {
	for (iz = 0; iz < nzb; iz++) {
	    vxn0[ix][iz] = 0.0;
	}
    } 
#ifdef _OPENMP
#pragma omp parallel for private(ix, iz)
#endif
    for (ix = 0; ix < nxb; ix++) {
	for (iz = 0; iz < nzb; iz++) {
	    vzn1[ix][iz] = 0.0;  
	}
    }
#ifdef _OPENMP
#pragma omp parallel for private(ix, iz)
#endif
    for (ix = 0; ix < nxb; ix++) {
	for (iz = 0; iz < nzb; iz++) {
	    vzn0[ix][iz] = 0.0;
	}
    }
#ifdef _OPENMP
#pragma omp parallel for private(ix, iz)
#endif
    for (ix = 0; ix < nx; ix++) {
	for (iz = 0; iz < nz; iz++) {
	    sill[ix][iz] = 0.0;
	}
    }
#ifdef _OPENMP
#pragma omp parallel for private(ix, iz)
#endif
    for (ix = 0; ix < nx; ix++) {
	for (iz = 0; iz < nz; iz++) {
	    ccr[ix][iz] = 0.0;
	}
    }
        
    /*Main loop*/
    wfit = (int)(nt-1)/snpint;
    
    for (it = nt-1; it>=0; it--) {
	if  (verb) sf_warning("Backward it=%d/%d;", it, nt-1);

	/*Stress*/
#ifdef _OPENMP
#pragma omp parallel for private(ix, iz)
#endif
	for (ix = marg+pmlout; ix < nx+marg+pmlout; ix++) {
	    for ( iz = marg+pmlout; iz < nz+marg+pmlout; iz++) { 
		txxn0[ix][iz] = txxn1[ix][iz] + dt*c11[ix][iz]*(ldx(vxn1, ix-1, iz) + ldz(vzn1, ix, iz-1));
	    }
	}
	/*Stress PML */
	pml_txxb(txxn0, vxn1, vzn1, c11, ldx, ldz, freesurface);

#ifdef _OPENMP
#pragma omp parallel for private(ix, iz)
#endif	
	for (ix=0; ix<gn; ix++)  {
	    txxn0[ix*ginter+pmlout+marg][pmlout+marg+gp] = rcd[ix][it];
	}
	
	/*velocity*/
#ifdef _OPENMP
#pragma omp parallel for private(ix, iz)
#endif
	for (ix = marg+pmlout; ix < nx+pmlout+marg; ix++ ) {
	    for (iz = marg+pmlout; iz < nz+pmlout+marg; iz++) {
		vxn0[ix][iz] = vxn1[ix][iz] + dt/denx[ix][iz]*ldx(txxn0, ix, iz);
		vzn0[ix][iz] = vzn1[ix][iz] + dt/denz[ix][iz]*ldz(txxn0, ix, iz);
	    }
	}

	/*Velocity PML */
	pml_vxzb(vxn1, vzn1, vxn0, vzn0, txxn0, denx, denz, ldx, ldz, freesurface);

	/*n1 -> n0*/
	time_step_exch(txxn1, txxn0, it);
	time_step_exch(vxn1, vxn0, it);
	time_step_exch(vzn1, vzn0, it);
	pml_tstep_exchb(it);
	
	if ( wantwf && it%snpint == 0 ) {

	    for ( ix = 0; ix < nx; ix++) 
		sf_floatwrite(txxn0[ix+pmlout+marg]+pmlout+marg, nz, Ftmpbwf);
	}

	if (it%snpint == 0 ) {
#ifdef _OPENMP
#pragma omp parallel for private(ix, iz)
#endif
	    for (ix=0; ix<nx; ix++) {
		for (iz=0; iz<nz; iz++) {
		    ccr[ix][iz] += wavfld[wfit][ix][iz]*txxn0[ix+pmlout+marg][iz+pmlout+marg];
		}
	    }
#ifdef _OPENMP
#pragma omp parallel for private(ix, iz)
#endif
	    for (ix=0; ix<nx; ix++) {
		for (iz=0; iz<nz; iz++) {
		    sill[ix][iz] += wavfld[wfit][ix][iz]*wavfld[wfit][ix][iz];
		}
	    }
	    wfit--;
	}
    } /*Main loop*/
    if (verb) sf_warning(".");
    
#ifdef _OPENMP
#pragma omp parallel for private(ix, iz)
#endif    
    for (ix=0; ix<nx; ix++) {
	for (iz=0; iz<nz; iz++) {
	    img2[ix][iz] = ccr[ix][iz]/(sill[ix][iz]+SF_EPS);//
	}
    } 
    
    return 0;

}
Ejemplo n.º 13
0
int sglfdfor2(float ***fwf, float **rcd, bool verb)
{
	float **txxn1, **txxn0, **vxn1, **vxn0, **vzn1, **vzn0;
	int wfit;

	txxn1=sf_floatalloc2(nzb, nxb);
	txxn0=sf_floatalloc2(nzb, nxb);
	vxn1=sf_floatalloc2(nzb, nxb);
	vxn0=sf_floatalloc2(nzb, nxb);
	vzn1=sf_floatalloc2(nzb, nxb);
	vzn0=sf_floatalloc2(nzb, nxb);

	zero2(txxn1, nzb, nxb);
	zero2(txxn0, nzb, nxb);
	zero2(vxn1, nzb, nxb);
	zero2(vxn0, nzb, nxb);
	zero2(vzn1, nzb, nxb);
	zero2(vzn0, nzb, nxb);

	zero2(txxn1x, nzb, nxb);
	zero2(txxn1z, nzb, nxb);
	zero2(txxn0x, nzb, nxb);
	zero2(txxn0z, nzb, nxb);

	wfit=0;
	for(it=0; it<nt; it++){
//		sf_warning("test txxn1[801][30]=%d",txxn1[801][30])
		if(verb) sf_warning("Forward it=%d/%d;", it+1, nt);
#ifdef _OPENMP
#pragma omp parallel for private(ix,iz)
#endif
		for(ix=nfd+pmlsize; ix<nfd+pmlsize+nx; ix++){
			for(iz=nfd+pmlsize; iz<nfd+pmlsize+nz; iz++){
				vxn1[ix][iz]=vxn0[ix][iz]-dt/fdenx[ix][iz]*ldx(txxn0, ix, iz);
				vzn1[ix][iz]=vzn0[ix][iz]-dt/fdenz[ix][iz]*ldz(txxn0, ix, iz);
			}
		}

		pml_vxz(vxn1, vzn1, vxn0, vzn0, txxn0);
#ifdef _OPENMP
#pragma omp parallel for private(ix,iz)
#endif
		for(ix=nfd+pmlsize; ix<nfd+pmlsize+nx; ix++){
			for(iz=nfd+pmlsize; iz<nfd+pmlsize+nz; iz++){
				txxn1[ix][iz]=txxn0[ix][iz]-dt*fc11[ix][iz]*(ldx(vxn1, ix-1, iz) + ldz(vzn1, ix, iz-1));
			}
		}

		pml_txx(txxn1, vxn1, vzn1);

		if((it*dt)<srctrunc){
			explsource(txxn1);
		}

		if(it%wfinv==0){
#ifdef _OPENMP
#pragma omp parallel for private(ix,iz)
#endif
			for(ix=0; ix<nx; ix++){
				for(iz=0; iz<nz; iz++){
					fwf[wfit][ix][iz]=txxn0[ix+nfd+pmlsize][iz+nfd+pmlsize];
				}
			}
			wfit++;
		}
#ifdef _OPENMP
#pragma omp parallel for private(ix)
#endif
		for(ix=0; ix<ng; ix++){
			rcd[ix][it]=txxn0[ix*ginv+pmlsize+nfd][pmlsize+nfd+gp];
		}

		transp=txxn0; txxn0=txxn1; txxn1=transp;
		transp=vxn0;  vxn0=vxn1;   vxn1=transp;
		transp=vzn0;  vzn0=vzn1;   vzn1=transp;
	} // end of it
	if(verb) sf_warning(".");
	return 0;;
}
Ejemplo n.º 14
0
int sglfdfor2(float ***wavfld, float **rcd, bool verb,
              float **den, float **c11, 
              geopar geop, srcpar srcp, pmlpar pmlp)
/*< staggered grid lowrank FD forward modeling >*/
{
    /*caculate arrays*/
    float **txxn1, **txxn0, **vxn1, **vzn1, **vxn0, **vzn0;

    float **denx, **denz;
    /*grid index*/
    int nx, nz, nt, ix, iz, it;
    int nxb, nzb, snpint;
    int spx, spz, gp,  gn, ginter;
    float dt, dx, dz;
    int pmlout, marg;
    bool freesurface;
    
    /* tmp variable */
    int wfit;
    int nth;

    nx = geop->nx;
    nz = geop->nz;
    nxb = geop->nxb;
    nzb = geop->nzb;
    dx = geop->dx;
    dz = geop->dz;

    spx = geop->spx;
    spz = geop->spz;
    gp  = geop->gp;
    gn  = geop->gn;
    ginter = geop->ginter;

    snpint = geop->snpint;

    nt = srcp->nt;
    dt = srcp->dt;

    pmlout = pmlp->pmlout;
    freesurface = pmlp->freesurface;
    marg   = getmarg();
    

    denx = sf_floatalloc2(nzb, nxb);
    denz = sf_floatalloc2(nzb, nxb);

#ifdef _OPENMP
#pragma omp parallel  
{
    nth = omp_get_num_threads();
}
#endif
 sf_warning(">>>> Using %d threads <<<<<", nth);

#ifdef _OPENMP
#pragma omp parallel for private(ix, iz)
#endif
    for (ix = 0; ix < nxb; ix++) {
	for ( iz= 0; iz < nzb; iz++) {
	    denx[ix][iz] = den[ix][iz];
	    denz[ix][iz] = den[ix][iz];
	}
    }
    /*den[ix+1/2][iz]*/

#ifdef _OPENMP
#pragma omp parallel for private(ix, iz)
#endif
    for ( ix = 0; ix < nxb-1; ix++) {
	for (iz = 0; iz < nzb; iz++) {
	    denx[ix][iz] = (den[ix+1][iz] + den[ix][iz])*0.5;
	}
    }
    /*den[ix][iz+1/2]*/
#ifdef _OPENMP
#pragma omp parallel for private(ix, iz)
#endif
    for ( ix = 0; ix < nxb; ix++) {
	for (iz = 0; iz < nzb-1; iz++) {
	    denz[ix][iz] = (den[ix][iz+1] + den[ix][iz])*0.5;
	}
    }

    txxn1 = sf_floatalloc2(nzb, nxb);
    txxn0 = sf_floatalloc2(nzb, nxb);
    vxn1  = sf_floatalloc2(nzb, nxb);
    vzn1  = sf_floatalloc2(nzb, nxb);
    vxn0  = sf_floatalloc2(nzb, nxb);
    vzn0  = sf_floatalloc2(nzb, nxb);

    init_pml(nz, nx, dt, marg, pmlp);
#ifdef _OPENMP
#pragma omp parallel for private(ix, iz)
#endif
    for (ix = 0; ix < nxb; ix++) {
	for (iz = 0; iz < nzb; iz++) {
	    txxn1[ix][iz] = 0.0;
	 }
    }
#ifdef _OPENMP
#pragma omp parallel for private(ix, iz)
#endif
    for (ix = 0; ix < nxb; ix++) {
	for (iz = 0; iz < nzb; iz++) {
	    txxn0[ix][iz] = 0.0;
	}
    }
#ifdef _OPENMP
#pragma omp parallel for private(ix, iz)
#endif
    for (ix = 0; ix < nxb; ix++) {
	for (iz = 0; iz < nzb; iz++) {
	    vxn1[ix][iz] = 0.0;  
	}
    }
#ifdef _OPENMP
#pragma omp parallel for private(ix, iz)
#endif
    for (ix = 0; ix < nxb; ix++) {
	for (iz = 0; iz < nzb; iz++) {
	    vxn0[ix][iz] = 0.0;
	}
    } 
#ifdef _OPENMP
#pragma omp parallel for private(ix, iz)
#endif
    for (ix = 0; ix < nxb; ix++) {
	for (iz = 0; iz < nzb; iz++) {
	    vzn1[ix][iz] = 0.0;  
	}
    }
#ifdef _OPENMP
#pragma omp parallel for private(ix, iz)
#endif
    for (ix = 0; ix < nxb; ix++) {
	for (iz = 0; iz < nzb; iz++) {
	    vzn0[ix][iz] = 0.0;
	}
    }
#ifdef _OPENMP
#pragma omp parallel for private(ix, iz)
#endif  
    for (it = 0; it < nt; it++) {
	for (ix = 0; ix < gn; ix++) {
	    rcd[ix][it] = 0.0;
	}
    }  

    /*Main loop*/
    wfit = 0;
    for (it = 0; it < nt; it++) {
	if (verb) sf_warning("Forward it=%d/%d;", it, nt-1);
	
	/*velocity*/
#ifdef _OPENMP
#pragma omp parallel for private(ix, iz)
#endif
	for (ix = marg+pmlout; ix < nx+pmlout+marg; ix++ ) {
	    for (iz = marg+pmlout; iz < nz+pmlout+marg; iz++) {
		vxn1[ix][iz] = vxn0[ix][iz] - dt/denx[ix][iz]*ldx(txxn0, ix, iz);
		vzn1[ix][iz] = vzn0[ix][iz] - dt/denz[ix][iz]*ldz(txxn0, ix, iz);
	    }
	}
	
	/*Velocity PML */
	pml_vxz(vxn1, vzn1, vxn0, vzn0, txxn0, denx, denz, ldx, ldz, freesurface);
	
	/*Stress*/
#ifdef _OPENMP
#pragma omp parallel for private(ix, iz)
#endif
	for (ix = marg+pmlout; ix < nx+marg+pmlout; ix++) {
	    for ( iz = marg+pmlout; iz < nz+marg+pmlout; iz++) { 
		txxn1[ix][iz] = txxn0[ix][iz] - dt*c11[ix][iz]*(ldx(vxn1, ix-1, iz) + ldz(vzn1, ix, iz-1));
	    }
	}
	 
	/*Stress PML */
	pml_txx(txxn1, vxn1, vzn1, c11, ldx, ldz, freesurface);
	
	if ((it*dt)<=srcp->trunc) {
	    explsourcet(txxn1, srcp->wavelet, it, dt, spx+pmlout+marg, spz+pmlout+marg, nxb, nzb, srcp);
	}
	
	if ( it%snpint == 0 ) {
#ifdef _OPENMP
#pragma omp parallel for private(ix, iz)
#endif
	    for ( ix = 0; ix < nx; ix++) 
		for ( iz = 0; iz<nz; iz++ )
		    wavfld[wfit][ix][iz] = txxn0[ix+pmlout+marg][iz+pmlout+marg];
	    wfit++;
	}
	
	//sf_warning("test I am at 257");
	
#ifdef _OPENMP
#pragma omp parallel for private(ix)
#endif	 
	for ( ix =0 ; ix < gn; ix++) {
	    rcd[ix][it] = txxn0[ix*ginter+pmlout+marg][pmlout+marg+gp];
	    //sf_warning("gn=%d ix=%d ginter=%d ix*ginter=%d", gn, ginter, ix, ix*ginter);
	}

	//sf_warning("test I am at 266");
	
	/*n1 -> n0*/
	time_step_exch(txxn0, txxn1, it);
	time_step_exch(vxn0, vxn1, it);
	time_step_exch(vzn0, vzn1, it);
	pml_tstep_exch(it);
	
    } /*Main loop*/
    if (verb) sf_warning(".");
    return wfit;
    
}
address JNI_FastGetField::generate_fast_get_long_field() {
  const char *name = "jni_fast_GetLongField";
  ResourceMark rm;
  BufferBlob* blob = BufferBlob::create(name, BUFFER_SIZE*wordSize);
  CodeBuffer cbuf(blob);
  MacroAssembler* masm = new MacroAssembler(&cbuf);
  address fast_entry = __ pc();

  Label label1, label2;

  AddressLiteral cnt_addrlit(SafepointSynchronize::safepoint_counter_addr());
  __ sethi (cnt_addrlit, G3);
  Address cnt_addr(G3, cnt_addrlit.low10());
  __ ld (cnt_addr, G4);
  __ andcc (G4, 1, G0);
  __ br (Assembler::notZero, false, Assembler::pn, label1);
  __ delayed()->srl (O2, 2, O4);
  __ ld_ptr (O1, 0, O5);
  __ add (O5, O4, O5);

#ifndef _LP64
  assert(count < LIST_CAPACITY-1, "LIST_CAPACITY too small");
  speculative_load_pclist[count++] = __ pc();
  __ ld (O5, 0, G2);

  speculative_load_pclist[count] = __ pc();
  __ ld (O5, 4, O3);
#else
  assert(count < LIST_CAPACITY, "LIST_CAPACITY too small");
  speculative_load_pclist[count] = __ pc();
  __ ldx (O5, 0, O3);
#endif

  __ ld (cnt_addr, G1);
  __ cmp (G1, G4);
  __ br (Assembler::notEqual, false, Assembler::pn, label2);
  __ delayed()->mov (O7, G1);

#ifndef _LP64
  __ mov (G2, O0);
  __ retl ();
  __ delayed()->mov (O3, O1);
#else
  __ retl ();
  __ delayed()->mov (O3, O0);
#endif

#ifndef _LP64
  slowcase_entry_pclist[count-1] = __ pc();
  slowcase_entry_pclist[count++] = __ pc() ;
#else
  slowcase_entry_pclist[count++] = __ pc();
#endif

  __ bind (label1);
  __ mov (O7, G1);

  address slow_case_addr = jni_GetLongField_addr();
  __ bind (label2);
  __ call (slow_case_addr, relocInfo::none);
  __ delayed()->mov (G1, O7);

  __ flush ();

  return fast_entry;
}
inline void MacroAssembler::ldx( const Address& a, Register d, int offset) {
  if (a.has_index()) { assert(offset == 0, ""); ldx( a.base(), a.index(),         d); }
  else               {                          ldx( a.base(), a.disp() + offset, d); }
}
Ejemplo n.º 17
0
int main(int argc, char* argv[]) 
{
    clock_t tstart,tend;
    double  duration;
   
    /*flag*/
    bool verb;
    
    /*I/O*/
    sf_file Fvel, Fden, Fsrc;
    sf_file Fwf/*wave field*/, Frec/*record*/, Fic/*Initial Condition*/; 
    sf_file FG, Fsx;
    /*I/O for MMS*/
    sf_file Fpsrc, Fvsrc, Fpint, Fvint;
    
    sf_axis at, ax;
    sf_axis icaxis;

    /*I/O arrays*/
    float *src; /*point source*/ 
    float *vel, *den, *c11, *record;
    float *ic;
    float *sxtmp;
    /*I/O for MMS*/
    float **psrc, **vsrc, *pint, *vint;
    
    /*Grid index variables*/
    int nx, nt, ix, it;
    int nxb;
    float dt, dx;

    /*caculate arrays*/
    float *txxn1, *txxn0, *vxn1, *vxn0;
    float *denx;

    /*source*/
    spara sp={0};
    bool  srcdecay, srcmms, inject;
    float srctrunc;
    float slx;
    int   spx;

    /*PML*/
    int   pmlout, pmld0, decaybegin;
    int   decay;
    float gamma = GAMMA;
    int   mx; /*margin*/
    
    /*options*/
    bool  freesurface;
    int   gp;
    float gdep;
    int   snapinter;
      
    int it0;
    int icnx;
    
    tstart = clock();
    sf_init(argc, argv);
    if (!sf_getbool("verb", &verb)) verb=false; /*verbosity*/

    /*Set I/O file*/
    Fsrc = sf_input("in");  
    /*source wavelet*/
    Fvel    = sf_input("vel");
    /*velocity*/
    Fden    = sf_input("den");
    /*density*/
    Fwf     = sf_output("out");
    /*wavefield snap*/
    Frec    = sf_output("rec");
    /*record*/
    
    FG  = sf_input("G"); 
    Fsx = sf_input("sx");
    
    if (SF_FLOAT != sf_gettype(Fsrc)) sf_error("Need float input");
    if (SF_FLOAT != sf_gettype(Fvel)) sf_error("Need float input");
    if (SF_FLOAT != sf_gettype(Fden)) sf_error("Need float input");
    
    /*parameters of source*/
    if (!sf_getbool("srcmms", &srcmms)) srcmms = false;
    /*source type: if y, use point source */
    if (!srcmms && !sf_getfloat("slx", &slx)) sf_error("Need slx input");
    /*source location in x */
    if (!srcmms && slx<0.0) sf_error("slx need >=0.0");
    if (!sf_getbool("srcdecay",&srcdecay)) srcdecay=false;
    /*source decay y=use*/
    if (!sf_getfloat("srctrunc",&srctrunc)) srctrunc=1000;
    /*source trunc time (s)*/
    if (!sf_getbool("inject", &inject)) inject = true;
    /* inject=y use inject source; inject=n use initial condition*/
    if (srcmms && !inject) sf_error("Initial condition and MMS are conflicted");
     /*parameters of geometry*/
    if (!sf_getfloat("gdep", &gdep)) gdep = 0.0;
    /*depth of geophone */
    if (gdep <0.0) sf_error("gdep need to be >=0.0");
    /*source and receiver location*/

    if (!sf_getint("snapinter", &snapinter)) snapinter=1;
    /* snap interval */
    if (!sf_getint("pmlsize", &pmlout)) pmlout=PMLOUT;
    /* size of PML layer */
    if (!sf_getint("pmld0", &pmld0)) pmld0=PMLD0;
    /* PML parameter */
    if (!sf_getint("decay",&decay)) decay=DECAY_FLAG;
    /* Flag of decay boundary condtion: 1 = use ; 0 = not use */
    if (!sf_getint("decaybegin",&decaybegin)) decaybegin=DECAY_BEGIN;
    /* Begin time of using decay boundary condition */
    if (!sf_getbool("freesurface", &freesurface)) freesurface=false;
    /*free surface*/
    if (!sf_histint(FG,"n2", &lenx)) sf_error("No n2= in input");

    /* Read/Write axes */   
    at = sf_iaxa(Fsrc,1); nt = sf_n(at); dt = sf_d(at);
    ax = sf_iaxa(Fvel,1); nxb = sf_n(ax); dx = sf_d(ax);   

    /*read FD coefficients*/
    G = sf_floatalloc2(nxb, lenx);
    sf_floatread(G[0], nxb*lenx, FG);

    /*read FD schemes*/
    sxtmp = sf_floatalloc(lenx);
    sx = sf_intalloc(lenx);
    sf_floatread(sxtmp, lenx, Fsx);
    mx = 0;
    for (ix=0; ix<lenx; ix++) {
	sx[ix] = (int)sxtmp[ix];
	mx = abs(sx[ix])>mx? abs(sx[ix]):mx;
    }
    marg = mx;
    
    nx = nxb - 2*pmlout - 2*marg;
        
    record = sf_floatalloc(nt);

    /*set wavefield axes*/     
    sf_setn(at, (int)(nt-1)/snapinter+1); /*set axis for snap file*/
    sf_setd(at,dt*snapinter);
    sf_setn(ax, nx);   
    sf_oaxa(Fwf,ax,1); 
    sf_oaxa(Fwf,at,2);

    /*set for record*/
    sf_setn(ax,1);
    
    /*read model*/
    vel  = sf_floatalloc(nxb);
    den  = sf_floatalloc(nxb);
    c11  = sf_floatalloc(nxb);
    denx = sf_floatalloc(nxb);
    
    sf_floatread(vel, nxb, Fvel);
    sf_floatread(den, nxb, Fden);
    for (ix = 0; ix < nxb; ix++) {
	c11[ix] = den[ix]*vel[ix]*vel[ix];
	denx[ix] = den[ix];
	if(c11[ix] <= 0.0) sf_warning("c11=%f: ix=%d ",c11[ix], ix);
    }
    /*den[ix+1/2]*/
    for ( ix = 0; ix < nxb-1; ix++) {
	denx[ix] = (den[ix+1] + den[ix])*0.5;
    }
    
    /*source and receiver location*/
    spx = (int)(slx/dx+0.5);
    gp  = (int)(gdep/dx+0.5);
    /*read source*/
    src = sf_floatalloc(nt);
    sf_floatread(src,nt,Fsrc);
       
    /*Initial Condition*/
    if (inject == false) {
	Fic = sf_input("ic");  
	/*initial condition*/
	if (SF_FLOAT != sf_gettype(Fic)) sf_error("Need float input of ic");
	icaxis = sf_iaxa(Fic, 1); 
	icnx = sf_n(icaxis);
	if (nx != icnx) sf_error("I.C. and velocity should be the same size.");
	ic = sf_floatalloc(nx);
	sf_floatread(ic, nx, Fic);	
    } else {
	ic = NULL;
    }

    /* Method of Manufactured Solution*/
    if (inject && srcmms) {
	Fpsrc = sf_input("presrc");
	Fvsrc = sf_input("velsrc");
	Fpint = sf_input("preinit");
	Fvint = sf_input("velinit");
		
	if (SF_FLOAT != sf_gettype(Fpsrc)) sf_error("Need float input of presrc");
	if (SF_FLOAT != sf_gettype(Fvsrc)) sf_error("Need float input of velsrc");
	if (SF_FLOAT != sf_gettype(Fpint)) sf_error("Need float input of preinit");
	if (SF_FLOAT != sf_gettype(Fvint)) sf_error("Need float input of velinit");
	
	psrc = sf_floatalloc2(nx, nt);
	vsrc = sf_floatalloc2(nx, nt);
	pint = sf_floatalloc(nx);
	vint = sf_floatalloc(nx);
	
	sf_floatread(psrc[0], nx*nt, Fpsrc);
	sf_floatread(vsrc[0], nx*nt, Fvsrc);
	sf_floatread(pint, nx, Fpint);
	sf_floatread(vint, nx, Fvint);
    } else {
	psrc = NULL;
	vsrc = NULL;
	pint = NULL;
	vint = NULL;
    }
            
    txxn1 = sf_floatalloc(nxb);
    txxn0 = sf_floatalloc(nxb);
    vxn1  = sf_floatalloc(nxb);
    vxn0  = sf_floatalloc(nxb);
 
    init_pml1(nx, dt, pmlout, marg, pmld0, decay, decaybegin, gamma);
    
    for (ix = 0; ix < nxb; ix++) {
	txxn1[ix] = 0.0;
    }
    for (ix = 0; ix < nxb; ix++) {
	txxn0[ix] = 0.0;
    }
    for (ix = 0; ix < nxb; ix++) {
	vxn1[ix] = 0.0;  
    }
    for (ix = 0; ix < nxb; ix++) {
	vxn0[ix] = 0.0;
    }
    for (it = 0; it < nt; it++) {
	record[it] = 0.0;
    }  
        
    sp.trunc=srctrunc;
    sp.srange=10;
    sp.alpha=0.5;
    sp.decay=srcdecay?1:0;
   	
    if (verb) {
	sf_warning("============================");
	sf_warning("nx=%d nt=%d", nx, nt);
	sf_warning("dx=%f dt=%f", dx, dt);
	sf_warning("lenx=%d marg=%d pmlout=%d", lenx, marg, pmlout);
	sf_warning("srctrunc=%f srcdecay=%d", sp.trunc, sp.decay);
	sf_warning("slx=%f, spx=%d, gdep=%f gp=%d",slx,spx,gdep,gp);
	for(ix=0; ix<lenx; ix++){
	    sf_warning("[sxx]=[%d,] G=%f",sx[ix], G[ix][0]);
	}
	sf_warning("============================"); 
    } 

    /* MAIN LOOP */
    it0 = 0;
    if (inject == false) {
	it0 = 1;
	for(ix = 0; ix < nx; ix++) {
	    txxn0[ix+marg+pmlout] = ic[ix];
	}
	sf_floatwrite(txxn0+pmlout+marg, nx, Fwf);
	record[0] = txxn0[pmlout+marg+gp];
    }
    
    /* MMS */
    if (inject && srcmms ) {
	it0 = 0;
	for (ix=0; ix <nx; ix++) {
	    txxn0[ix+marg+pmlout] = pint[ix]; /*P(x,0)*/
	    vxn0[ix+marg+pmlout]  = vint[ix]; /*U(x, -dt/2)*/
	}
    }
    
    for (it = it0; it < nt; it++) {
	if(verb) sf_warning("it=%d/%d;", it, nt-1);
	
	/*velocity*/
	for (ix = marg+pmlout; ix < nx+pmlout+marg; ix++ ) {
	    vxn1[ix] = vxn0[ix] - dt/denx[ix]*ldx(txxn0, ix);
	}

	/* MMS */
	if (inject && srcmms) 
	    for (ix = 0; ix < nx; ix++)
		vxn1[ix+marg+pmlout] += vsrc[it][ix]*dt;

	for (ix = 0; ix < nxb; ix++)
	    vxn0[ix] = vxn1[ix];

	/*Velocity PML */
	/* pml1_vxz(vxn1, vxn0, txxn0, denx, ldx, freesurface); */

	/*Stress*/
	for (ix = marg+pmlout; ix < nx+marg+pmlout; ix++) {
	    txxn1[ix] = txxn0[ix] - dt*c11[ix]*ldx(vxn1, ix-1);
	}
	
	/*Stress PML */
	/* pml1_txx(txxn1, vxn1, c11, ldx, freesurface); */

	if (inject) {
	    if (!srcmms && (it*dt)<=sp.trunc) {
		txxn1[marg+pmlout+spx] += src[it]*dt;
	    }
	    if (srcmms) {
		for (ix = 0; ix < nx; ix++) 
	    	    txxn1[ix+marg+pmlout] +=psrc[it][ix]*dt;
	    }
	}
	    
	if ( it%snapinter==0 ) {
	    sf_floatwrite(txxn0+pmlout+marg, nx, Fwf);
	}
	
	record[it] = txxn0[pmlout+marg+gp];
	for (ix=0; ix<nxb; ix++)
	    txxn0[ix] = txxn1[ix];

    }/*End of LOOP TIME*/
    
    sf_warning(".");
    sf_floatwrite(record, nt, Frec);
 
    tend = clock();
    duration=(double)(tend-tstart)/CLOCKS_PER_SEC;
    sf_warning(">> The CPU time of sfsglfd1pml is: %f seconds << ", duration);
    exit(0);
}    
Ejemplo n.º 18
0
// Call an accessor method (assuming it is resolved, otherwise drop into
// vanilla (slow path) entry.
address InterpreterGenerator::generate_accessor_entry(void) {
  if (!UseFastAccessorMethods && (!FLAG_IS_ERGO(UseFastAccessorMethods))) {
    return NULL;
  }

  Label Lslow_path, Lacquire;

  const Register
         Rclass_or_obj = R3_ARG1,
         Rconst_method = R4_ARG2,
         Rcodes        = Rconst_method,
         Rcpool_cache  = R5_ARG3,
         Rscratch      = R11_scratch1,
         Rjvmti_mode   = Rscratch,
         Roffset       = R12_scratch2,
         Rflags        = R6_ARG4,
         Rbtable       = R7_ARG5;

  static address branch_table[number_of_states];

  address entry = __ pc();

  // Check for safepoint:
  // Ditch this, real man don't need safepoint checks.

  // Also check for JVMTI mode
  // Check for null obj, take slow path if so.
  __ ld(Rclass_or_obj, Interpreter::stackElementSize, CC_INTERP_ONLY(R17_tos) NOT_CC_INTERP(R15_esp));
  __ lwz(Rjvmti_mode, thread_(interp_only_mode));
  __ cmpdi(CCR1, Rclass_or_obj, 0);
  __ cmpwi(CCR0, Rjvmti_mode, 0);
  __ crorc(/*CCR0 eq*/2, /*CCR1 eq*/4+2, /*CCR0 eq*/2);
  __ beq(CCR0, Lslow_path); // this==null or jvmti_mode!=0

  // Do 2 things in parallel:
  // 1. Load the index out of the first instruction word, which looks like this:
  //    <0x2a><0xb4><index (2 byte, native endianess)>.
  // 2. Load constant pool cache base.
  __ ld(Rconst_method, in_bytes(Method::const_offset()), R19_method);
  __ ld(Rcpool_cache, in_bytes(ConstMethod::constants_offset()), Rconst_method);

  __ lhz(Rcodes, in_bytes(ConstMethod::codes_offset()) + 2, Rconst_method); // Lower half of 32 bit field.
  __ ld(Rcpool_cache, ConstantPool::cache_offset_in_bytes(), Rcpool_cache);

  // Get the const pool entry by means of <index>.
  const int codes_shift = exact_log2(in_words(ConstantPoolCacheEntry::size()) * BytesPerWord);
  __ slwi(Rscratch, Rcodes, codes_shift); // (codes&0xFFFF)<<codes_shift
  __ add(Rcpool_cache, Rscratch, Rcpool_cache);

  // Check if cpool cache entry is resolved.
  // We are resolved if the indices offset contains the current bytecode.
  ByteSize cp_base_offset = ConstantPoolCache::base_offset();
  // Big Endian:
  __ lbz(Rscratch, in_bytes(cp_base_offset) + in_bytes(ConstantPoolCacheEntry::indices_offset()) + 7 - 2, Rcpool_cache);
  __ cmpwi(CCR0, Rscratch, Bytecodes::_getfield);
  __ bne(CCR0, Lslow_path);
  __ isync(); // Order succeeding loads wrt. load of _indices field from cpool_cache.

  // Finally, start loading the value: Get cp cache entry into regs.
  __ ld(Rflags, in_bytes(cp_base_offset) + in_bytes(ConstantPoolCacheEntry::flags_offset()), Rcpool_cache);
  __ ld(Roffset, in_bytes(cp_base_offset) + in_bytes(ConstantPoolCacheEntry::f2_offset()), Rcpool_cache);

  // Following code is from templateTable::getfield_or_static
  // Load pointer to branch table
  __ load_const_optimized(Rbtable, (address)branch_table, Rscratch);

  // Get volatile flag
  __ rldicl(Rscratch, Rflags, 64-ConstantPoolCacheEntry::is_volatile_shift, 63); // extract volatile bit
  // note: sync is needed before volatile load on PPC64

  // Check field type
  __ rldicl(Rflags, Rflags, 64-ConstantPoolCacheEntry::tos_state_shift, 64-ConstantPoolCacheEntry::tos_state_bits);

#ifdef ASSERT
  Label LFlagInvalid;
  __ cmpldi(CCR0, Rflags, number_of_states);
  __ bge(CCR0, LFlagInvalid);

  __ ld(R9_ARG7, 0, R1_SP);
  __ ld(R10_ARG8, 0, R21_sender_SP);
  __ cmpd(CCR0, R9_ARG7, R10_ARG8);
  __ asm_assert_eq("backlink", 0x543);
#endif // ASSERT
  __ mr(R1_SP, R21_sender_SP); // Cut the stack back to where the caller started.

  // Load from branch table and dispatch (volatile case: one instruction ahead)
  __ sldi(Rflags, Rflags, LogBytesPerWord);
  __ cmpwi(CCR6, Rscratch, 1); // volatile?
  if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
    __ sldi(Rscratch, Rscratch, exact_log2(BytesPerInstWord)); // volatile ? size of 1 instruction : 0
  }
  __ ldx(Rbtable, Rbtable, Rflags);

  if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
    __ subf(Rbtable, Rscratch, Rbtable); // point to volatile/non-volatile entry point
  }
  __ mtctr(Rbtable);
  __ bctr();

#ifdef ASSERT
  __ bind(LFlagInvalid);
  __ stop("got invalid flag", 0x6541);

  bool all_uninitialized = true,
       all_initialized   = true;
  for (int i = 0; i<number_of_states; ++i) {
    all_uninitialized = all_uninitialized && (branch_table[i] == NULL);
    all_initialized   = all_initialized   && (branch_table[i] != NULL);
  }
  assert(all_uninitialized != all_initialized, "consistency"); // either or

  __ fence(); // volatile entry point (one instruction before non-volatile_entry point)
  if (branch_table[vtos] == 0) branch_table[vtos] = __ pc(); // non-volatile_entry point
  if (branch_table[dtos] == 0) branch_table[dtos] = __ pc(); // non-volatile_entry point
  if (branch_table[ftos] == 0) branch_table[ftos] = __ pc(); // non-volatile_entry point
  __ stop("unexpected type", 0x6551);
#endif

  if (branch_table[itos] == 0) { // generate only once
    __ align(32, 28, 28); // align load
    __ fence(); // volatile entry point (one instruction before non-volatile_entry point)
    branch_table[itos] = __ pc(); // non-volatile_entry point
    __ lwax(R3_RET, Rclass_or_obj, Roffset);
    __ beq(CCR6, Lacquire);
    __ blr();
  }

  if (branch_table[ltos] == 0) { // generate only once
    __ align(32, 28, 28); // align load
    __ fence(); // volatile entry point (one instruction before non-volatile_entry point)
    branch_table[ltos] = __ pc(); // non-volatile_entry point
    __ ldx(R3_RET, Rclass_or_obj, Roffset);
    __ beq(CCR6, Lacquire);
    __ blr();
  }

  if (branch_table[btos] == 0) { // generate only once
    __ align(32, 28, 28); // align load
    __ fence(); // volatile entry point (one instruction before non-volatile_entry point)
    branch_table[btos] = __ pc(); // non-volatile_entry point
    __ lbzx(R3_RET, Rclass_or_obj, Roffset);
    __ extsb(R3_RET, R3_RET);
    __ beq(CCR6, Lacquire);
    __ blr();
  }

  if (branch_table[ctos] == 0) { // generate only once
    __ align(32, 28, 28); // align load
    __ fence(); // volatile entry point (one instruction before non-volatile_entry point)
    branch_table[ctos] = __ pc(); // non-volatile_entry point
    __ lhzx(R3_RET, Rclass_or_obj, Roffset);
    __ beq(CCR6, Lacquire);
    __ blr();
  }

  if (branch_table[stos] == 0) { // generate only once
    __ align(32, 28, 28); // align load
    __ fence(); // volatile entry point (one instruction before non-volatile_entry point)
    branch_table[stos] = __ pc(); // non-volatile_entry point
    __ lhax(R3_RET, Rclass_or_obj, Roffset);
    __ beq(CCR6, Lacquire);
    __ blr();
  }

  if (branch_table[atos] == 0) { // generate only once
    __ align(32, 28, 28); // align load
    __ fence(); // volatile entry point (one instruction before non-volatile_entry point)
    branch_table[atos] = __ pc(); // non-volatile_entry point
    __ load_heap_oop(R3_RET, (RegisterOrConstant)Roffset, Rclass_or_obj);
    __ verify_oop(R3_RET);
    //__ dcbt(R3_RET); // prefetch
    __ beq(CCR6, Lacquire);
    __ blr();
  }

  __ align(32, 12);
  __ bind(Lacquire);
  __ twi_0(R3_RET);
  __ isync(); // acquire
  __ blr();

#ifdef ASSERT
  for (int i = 0; i<number_of_states; ++i) {
    assert(branch_table[i], "accessor_entry initialization");
    //tty->print_cr("accessor_entry: branch_table[%d] = 0x%llx (opcode 0x%llx)", i, branch_table[i], *((unsigned int*)branch_table[i]));
  }
#endif

  __ bind(Lslow_path);
  __ branch_to_entry(Interpreter::entry_for_kind(Interpreter::zerolocals), Rscratch);
  __ flush();

  return entry;
}
inline void MacroAssembler::ldx( Register s1, RegisterOrConstant s2, Register d) { ldx( Address(s1, s2), d); }
Ejemplo n.º 20
0
/** 
 * @brief emulate instruction 
 * @return returns false if something goes wrong (e.g. illegal instruction)
 *
 * Current limitations:
 * 
 * - Illegal instructions are not implemented
 * - Excess cycles due to page boundary crossing are not calculated
 * - Some known architectural bugs are not emulated
 */
bool Cpu::emulate()
{
  /* fetch instruction */
  uint8_t insn = fetch_op();
  bool retval = true;
  /* emulate instruction */
  switch(insn)
  {
  /* BRK */
  case 0x0: brk(); break;
  /* ORA (nn,X) */
  case 0x1: ora(load_byte(addr_indx()),6); break;
  /* ORA nn */
  case 0x5: ora(load_byte(addr_zero()),3); break;
  /* ASL nn */
  case 0x6: asl_mem(addr_zero(),5); break;
  /* PHP */
  case 0x8: php(); break;
  /* ORA #nn */
  case 0x9: ora(fetch_op(),2); break;
  /* ASL A */
  case 0xA: asl_a(); break;
  /* ORA nnnn */
  case 0xD: ora(load_byte(addr_abs()),4); break;
  /* ASL nnnn */
  case 0xE: asl_mem(addr_abs(),6); break; 
  /* BPL nn */
  case 0x10: bpl(); break;
  /* ORA (nn,Y) */
  case 0x11: ora(load_byte(addr_indy()),5); break;
  /* ORA nn,X */
  case 0x15: ora(load_byte(addr_zerox()),4); break;
  /* ASL nn,X */
  case 0x16: asl_mem(addr_zerox(),6); break;
  /* CLC */
  case 0x18: clc(); break;
  /* ORA nnnn,Y */
  case 0x19: ora(load_byte(addr_absy()),4); break;
  /* ORA nnnn,X */
  case 0x1D: ora(load_byte(addr_absx()),4); break;
  /* ASL nnnn,X */
  case 0x1E: asl_mem(addr_absx(),7); break;
  /* JSR */
  case 0x20: jsr(); break;
  /* AND (nn,X) */
  case 0x21: _and(load_byte(addr_indx()),6); break;
  /* BIT nn */
  case 0x24: bit(addr_zero(),3); break;
  /* AND nn */
  case 0x25: _and(load_byte(addr_zero()),3); break;
  /* ROL nn */
  case 0x26: rol_mem(addr_zero(),5); break;
  /* PLP */
  case 0x28: plp(); break;
  /* AND #nn */
  case 0x29: _and(fetch_op(),2); break;
  /* ROL A */
  case 0x2A: rol_a(); break;
  /* BIT nnnn */
  case 0x2C: bit(addr_abs(),4); break;
  /* AND nnnn */
  case 0x2D: _and(load_byte(addr_abs()),4); break;
  /* ROL nnnn */
  case 0x2E: rol_mem(addr_abs(),6); break;
  /* BMI nn */
  case 0x30: bmi(); break;
  /* AND (nn,Y) */
  case 0x31: _and(load_byte(addr_indy()),5); break;               
  /* AND nn,X */
  case 0x35: _and(load_byte(addr_zerox()),4); break;
  /* ROL nn,X */
  case 0x36: rol_mem(addr_zerox(),6); break;
  /* SEC */
  case 0x38: sec(); break;
  /* AND nnnn,Y */
  case 0x39: _and(load_byte(addr_absy()),4); break;
  /* AND nnnn,X */
  case 0x3D: _and(load_byte(addr_absx()),4); break;
  /* ROL nnnn,X */
  case 0x3E: rol_mem(addr_absx(),7); break;
  /* RTI */
  case 0x40: rti(); break;
  /* EOR (nn,X) */
  case 0x41: eor(load_byte(addr_indx()),6); break;
  /* EOR nn */
  case 0x45: eor(load_byte(addr_zero()),3); break;
  /* LSR nn */
  case 0x46: lsr_mem(addr_zero(),5); break;
  /* PHA */
  case 0x48: pha(); break;
  /* EOR #nn */
  case 0x49: eor(fetch_op(),2); break;
  /* BVC */
  case 0x50: bvc(); break;
  /* JMP nnnn */
  case 0x4C: jmp(); break;
  /* EOR nnnn */
  case 0x4D: eor(load_byte(addr_abs()),4); break;
  /* LSR A */
  case 0x4A: lsr_a(); break;
  /* LSR nnnn */
  case 0x4E: lsr_mem(addr_abs(),6); break;
  /* EOR (nn,Y) */
  case 0x51: eor(load_byte(addr_indy()),5); break;
  /* EOR nn,X */
  case 0x55: eor(load_byte(addr_zerox()),4); break;
  /* LSR nn,X */
  case 0x56: lsr_mem(addr_zerox(),6); break;
  /* CLI */
  case 0x58: cli(); break;
  /* EOR nnnn,Y */
  case 0x59: eor(load_byte(addr_absy()),4); break;
  /* EOR nnnn,X */
  case 0x5D: eor(load_byte(addr_absx()),4); break;
  /* LSR nnnn,X */
  case 0x5E: lsr_mem(addr_absx(),7); break;
  /* RTS */
  case 0x60: rts(); break;
  /* ADC (nn,X) */
  case 0x61: adc(load_byte(addr_indx()),6); break;
  /* ADC nn */
  case 0x65: adc(load_byte(addr_zero()),3); break;
  /* ROR nn */
  case 0x66: ror_mem(addr_zero(),5); break;
  /* PLA */
  case 0x68: pla(); break;
  /* ADC #nn */
  case 0x69: adc(fetch_op(),2); break;
  /* ROR A */
  case 0x6A: ror_a(); break;
  /* JMP (nnnn) */
  case 0x6C: jmp_ind(); break;
  /* ADC nnnn */
  case 0x6D: adc(load_byte(addr_abs()),4); break;
  /* ROR nnnn */
  case 0x6E: ror_mem(addr_abs(),6); break;
  /* BVS */
  case 0x70: bvs(); break;
  /* ADC (nn,Y) */
  case 0x71: adc(load_byte(addr_indy()),5); break;
  /* ADC nn,X */
  case 0x75: adc(load_byte(addr_zerox()),4); break;
  /* ROR nn,X */
  case 0x76: ror_mem(addr_zerox(),6); break;
  /* SEI */
  case 0x78: sei(); break;
  /* ADC nnnn,Y */
  case 0x79: adc(load_byte(addr_absy()),4); break;
  /* ADC nnnn,X */
  case 0x7D: adc(load_byte(addr_absx()),4); break;
  /* ROR nnnn,X */
  case 0x7E: ror_mem(addr_absx(),7); break;
  /* STA (nn,X) */
  case 0x81: sta(addr_indx(),6); break;
  /* STY nn */
  case 0x84: sty(addr_zero(),3); break;
  /* STA nn */
  case 0x85: sta(addr_zero(),3); break;
  /* STX nn */
  case 0x86: stx(addr_zero(),3); break;
  /* DEY */
  case 0x88: dey(); break;
  /* TXA */
  case 0x8A: txa(); break;
  /* STY nnnn */
  case 0x8C: sty(addr_abs(),4); break;
  /* STA nnnn */
  case 0x8D: sta(addr_abs(),4); break;
  /* STX nnnn */
  case 0x8E: stx(addr_abs(),4); break;
  /* BCC nn */
  case 0x90: bcc(); break;
  /* STA (nn,Y) */
  case 0x91: sta(addr_indy(),6); break;
  /* STY nn,X */
  case 0x94: sty(addr_zerox(),4); break;
  /* STA nn,X */
  case 0x95: sta(addr_zerox(),4); break;
  /* STX nn,Y */
  case 0x96: stx(addr_zeroy(),4); break;
  /* TYA */
  case 0x98: tya(); break;
  /* STA nnnn,Y */
  case 0x99: sta(addr_absy(),5); break;
  /* TXS */
  case 0x9A: txs(); break;
  /* STA nnnn,X */
  case 0x9D: sta(addr_absx(),5); break;
  /* LDY #nn */
  case 0xA0: ldy(fetch_op(),2); break; 
  /* LDA (nn,X) */
  case 0xA1: lda(load_byte(addr_indx()),6); break;
  /* LDX #nn */
  case 0xA2: ldx(fetch_op(),2); break;
  /* LDY nn */
  case 0xA4: ldy(load_byte(addr_zero()),3); break;
  /* LDA nn */
  case 0xA5: lda(load_byte(addr_zero()),3); break;
  /* LDX nn */
  case 0xA6: ldx(load_byte(addr_zero()),3); break;
  /* TAY */
  case 0xA8: tay(); break;
  /* LDA #nn */
  case 0xA9: lda(fetch_op(),2); break;
  /* TAX */
  case 0xAA: tax(); break;
  /* LDY nnnn */
  case 0xAC: ldy(load_byte(addr_abs()),4); break;
  /* LDA nnnn */
  case 0xAD: lda(load_byte(addr_abs()),4); break;
  /* LDX nnnn */
  case 0xAE: ldx(load_byte(addr_abs()),4); break;
  /* BCS nn */
  case 0xB0: bcs(); break;
  /* LDA (nn,Y) */
  case 0xB1: lda(load_byte(addr_indy()),5); break;
  /* LDY nn,X */
  case 0xB4: ldy(load_byte(addr_zerox()),3); break;
  /* LDA nn,X */
  case 0xB5: lda(load_byte(addr_zerox()),3); break;
  /* LDX nn,Y */
  case 0xB6: ldx(load_byte(addr_zeroy()),3); break;
  /* CLV */
  case 0xB8: clv(); break;
  /* LDA nnnn,Y */
  case 0xB9: lda(load_byte(addr_absy()),4); break;
  /* TSX */
  case 0xBA: tsx(); break;
  /* LDY nnnn,X */
  case 0xBC: ldy(load_byte(addr_absx()),4); break;
  /* LDA nnnn,X */
  case 0xBD: lda(load_byte(addr_absx()),4); break;
  /* LDX nnnn,Y */
  case 0xBE: ldx(load_byte(addr_absy()),4); break;
  /* CPY #nn */
  case 0xC0: cpy(fetch_op(),2); break;
  /* CMP (nn,X) */
  case 0xC1: cmp(load_byte(addr_indx()),6); break;
  /* CPY nn */
  case 0xC4: cpy(load_byte(addr_zero()),3); break;
  /* CMP nn */
  case 0xC5: cmp(load_byte(addr_zero()),3); break;
  /* DEC nn */
  case 0xC6: dec(addr_zero(),5); break;
  /* INY */
  case 0xC8: iny(); break;
  /* CMP #nn */
  case 0xC9: cmp(fetch_op(),2); break;
  /* DEX */
  case 0xCA: dex(); break;
  /* CPY nnnn */
  case 0xCC: cpy(load_byte(addr_abs()),4); break;
  /* CMP nnnn */
  case 0xCD: cmp(load_byte(addr_abs()),4); break;
  /* DEC nnnn */
  case 0xCE: dec(addr_abs(),6); break;
  /* BNE nn */
  case 0xD0: bne(); break;
  /* CMP (nn,Y) */
  case 0xD1: cmp(load_byte(addr_indy()),5); break;
  /* CMP nn,X */
  case 0xD5: cmp(load_byte(addr_zerox()),4); break;
  /* DEC nn,X */
  case 0xD6: dec(addr_zerox(),6); break;
  /* CLD */
  case 0xD8: cld(); break;
  /* CMP nnnn,Y */
  case 0xD9: cmp(load_byte(addr_absy()),4); break;
  /* CMP nnnn,X */
  case 0xDD: cmp(load_byte(addr_absx()),4); break;
  /* DEC nnnn,X */
  case 0xDE: dec(addr_absx(),7); break;
  /* CPX #nn */
  case 0xE0: cpx(fetch_op(),2); break;
  /* SBC (nn,X) */
  case 0xE1: sbc(load_byte(addr_indx()),6); break;
  /* CPX nn */
  case 0xE4: cpx(load_byte(addr_zero()),3); break;
  /* SBC nn */
  case 0xE5: sbc(load_byte(addr_zero()),3); break;
  /* INC nn */
  case 0xE6: inc(addr_zero(),5); break;
  /* INX */
  case 0xE8: inx(); break;
  /* SBC #nn */
  case 0xE9: sbc(fetch_op(),2); break;
  /* NOP */
  case 0xEA: nop(); break;
  /* CPX nnnn */
  case 0xEC: cpx(load_byte(addr_abs()),4); break;
  /* SBC nnnn */
  case 0xED: sbc(load_byte(addr_abs()),4); break;
  /* INC nnnn */
  case 0xEE: inc(addr_abs(),6); break;
  /* BEQ nn */
  case 0xF0: beq(); break;
  /* SBC (nn,Y) */
  case 0xF1: sbc(load_byte(addr_indy()),5); break;
  /* SBC nn,X */
  case 0xF5: sbc(load_byte(addr_zerox()),4); break;
  /* INC nn,X */
  case 0xF6: inc(addr_zerox(),6); break;
  /* SED */
  case 0xF8: sed(); break;
  /* SBC nnnn,Y */
  case 0xF9: sbc(load_byte(addr_absy()),4); break;
  /* SBC nnnn,X */
  case 0xFD: sbc(load_byte(addr_absx()),4); break;
  /* INC nnnn,X */
  case 0xFE: inc(addr_absx(),7); break;
  /* Unknown or illegal instruction */
  default:
    D("Unknown instruction: %X at %04x\n", insn,pc());
    retval = false;
  }
  return retval;
}