void pml1_txx(float *txxn1, float *vxn1, float *c11, float (*ldx)(float *, int), bool freesurface ) /*<stress decay in pml>*/ { int ix; /*Stress PML -- top*/ if (freesurface == false) { for (ix=marg; ix<marg+pmlout; ix++) { txxn1x[ix]=((1-dt*pmldx[ix]/2)*txxn0x[ix]-dt*c11[ix]*ldx(vxn1,ix-1))/(1+dt*pmldx[ix]/2); txxn1[ix] = txxn1x[ix]; } } else { for (ix=marg; ix<marg+pmlout; ix++) { txxn1x[ix]=((1-dt*pmldx[ix]/2)*txxn0x[ix]-dt*0.0*ldx(vxn1,ix-1))/(1+dt*pmldx[ix]/2); txxn1[ix]= txxn1x[ix]; } } /*Stress PML -- bottom*/ for (ix=nx+pmlout+marg; ix<nx+2*pmlout+marg; ix++) { txxn1x[ix]=((1-dt*pmldx[ix]/2)*txxn0x[ix]-dt*c11[ix]*ldx(vxn1,ix-1))/(1+dt*pmldx[ix]/2); txxn1[ix] = txxn1x[ix]; } }
void pml_txx(float **txxn1, float **vxn1, float **vzn1, float **c11, float (*ldx)(float **, int, int), float (*ldz)(float **, int, int), bool freesurface ) /*<stress decay in pml>*/ { int ix, iz; /*Stress PML -- top*/ if (freesurface == false) { for (ix=marg; ix<nx+2*pmlout+marg; ix++) { for (iz=marg; iz<marg+pmlout; iz++) { txxn1x[ix][iz]=((1-dt*pmldx[ix]/2)*txxn0x[ix][iz]-dt*c11[ix][iz]*ldx(vxn1,ix-1,iz))/(1+dt*pmldx[ix]/2); txxn1z[ix][iz]=((1-dt*pmldz[iz]/2)*txxn0z[ix][iz]-dt*c11[ix][iz]*ldz(vzn1,ix,iz-1))/(1+dt*pmldz[iz]/2); txxn1[ix][iz] = txxn1x[ix][iz]+txxn1z[ix][iz]; } } } else { for (ix=marg; ix<nx+2*pmlout+marg; ix++) { for (iz=marg; iz<marg+pmlout; iz++) { txxn1x[ix][iz]=((1-dt*pmldx[ix]/2)*txxn0x[ix][iz]-dt*0.0*ldx(vxn1,ix-1,iz))/(1+dt*pmldx[ix]/2); txxn1z[ix][iz]=((1-dt*pmldz[iz]/2)*txxn0z[ix][iz]-dt*0.0*ldz(vzn1,ix,iz-1))/(1+dt*pmldz[iz]/2); txxn1[ix][iz] = txxn1x[ix][iz]+txxn1z[ix][iz]; } } } /*Stress PML -- left*/ for (ix=marg; ix<marg+pmlout; ix++) { for (iz=marg+pmlout; iz<nz+pmlout+marg; iz++) { txxn1x[ix][iz]=((1-dt*pmldx[ix]/2)*txxn0x[ix][iz]-dt*c11[ix][iz]*ldx(vxn1,ix-1,iz))/(1+dt*pmldx[ix]/2); txxn1z[ix][iz]=((1-dt*pmldz[iz]/2)*txxn0z[ix][iz]-dt*c11[ix][iz]*ldz(vzn1,ix,iz-1))/(1+dt*pmldz[iz]/2); txxn1[ix][iz] = txxn1x[ix][iz]+txxn1z[ix][iz]; } } /*Stress PML -- right*/ for (ix=nx+pmlout+marg; ix<nx+2*pmlout+marg; ix++) { for (iz=marg+pmlout; iz<nz+pmlout+marg; iz++) { txxn1x[ix][iz]=((1-dt*pmldx[ix]/2)*txxn0x[ix][iz]-dt*c11[ix][iz]*ldx(vxn1,ix-1,iz))/(1+dt*pmldx[ix]/2); txxn1z[ix][iz]=((1-dt*pmldz[iz]/2)*txxn0z[ix][iz]-dt*c11[ix][iz]*ldz(vzn1,ix,iz-1))/(1+dt*pmldz[iz]/2); txxn1[ix][iz] = txxn1x[ix][iz]+txxn1z[ix][iz]; } } /*Stress PML -- bottom*/ for (ix=marg; ix<nx+2*pmlout+marg; ix++) { for (iz=marg+pmlout+nz; iz<nz+2*pmlout+marg; iz++) { txxn1x[ix][iz]=((1-dt*pmldx[ix]/2)*txxn0x[ix][iz]-dt*c11[ix][iz]*ldx(vxn1,ix-1,iz))/(1+dt*pmldx[ix]/2); txxn1z[ix][iz]=((1-dt*pmldz[iz]/2)*txxn0z[ix][iz]-dt*c11[ix][iz]*ldz(vzn1,ix,iz-1))/(1+dt*pmldz[iz]/2); txxn1[ix][iz] = txxn1x[ix][iz]+txxn1z[ix][iz]; } } }
void InterpreterRuntime::SignatureHandlerGenerator::pass_long() { Argument jni_arg(jni_offset(), false); Register Rtmp = O0; #ifdef ASSERT if (TaggedStackInterpreter) { // check at least one tag is okay Label ok; __ ld_ptr(Llocals, Interpreter::local_tag_offset_in_bytes(offset() + 1), Rtmp); __ cmp(Rtmp, G0); __ brx(Assembler::equal, false, Assembler::pt, ok); __ delayed()->nop(); __ stop("Native object has bad tag value"); __ bind(ok); } #endif // ASSERT #ifdef _LP64 __ ldx(Llocals, Interpreter::local_offset_in_bytes(offset() + 1), Rtmp); __ store_long_argument(Rtmp, jni_arg); #else __ ld(Llocals, Interpreter::local_offset_in_bytes(offset() + 1), Rtmp); __ store_argument(Rtmp, jni_arg); __ ld(Llocals, Interpreter::local_offset_in_bytes(offset() + 0), Rtmp); Argument successor(jni_arg.successor()); __ store_argument(Rtmp, successor); #endif }
inline void MacroAssembler::ld_ptr(const Address& a, Register d, int offset) { #ifdef _LP64 ldx(a, d, offset); #else ld( a, d, offset); #endif }
inline void MacroAssembler::ld_ptr( Register s1, RegisterOrConstant s2, Register d ) { #ifdef _LP64 ldx(s1, s2, d); #else ld( s1, s2, d); #endif }
void pml1_vxz(float *vxn1, float *vxn0, float *txxn0, float *denx, float (*ldx)(float *, int), bool freesurface) /*<velocity vx,vz decay in pml>*/ { int ix; /*Velocity PML --top*/ if (freesurface == false) { for (ix=marg; ix<marg+pmlout; ix++) { vxn1[ix]=((1-dt*pmldx[ix]/2)*vxn0[ix]-dt/denx[ix]*ldx(txxn0,ix))/(1+dt*pmldx[ix]/2); } } /*Velocity PML --bottom*/ for (ix=nx+pmlout+marg; ix<nx+2*pmlout+marg; ix++) { vxn1[ix]=((1-dt*pmldx[ix]/2)*vxn0[ix]-dt/denx[ix]*ldx(txxn0,ix))/(1+dt*pmldx[ix]/2); } }
void pml_vxz(float **vxn1, float **vzn1, float **vxn0, float **vzn0, float **txxn0, float **denx, float **denz, float (*ldx)(float **, int, int), float (*ldz)(float **, int, int), bool freesurface) /*<velocity vx,vz decay in pml>*/ { int ix, iz; /*Velocity PML --top*/ if (freesurface == false) { for (ix=marg; ix<nx+2*pmlout+marg; ix++) { for (iz=marg; iz<marg+pmlout; iz++) { vxn1[ix][iz]=((1-dt*pmldx[ix]/2)*vxn0[ix][iz]-dt/denx[ix][iz]*ldx(txxn0,ix,iz))/(1+dt*pmldx[ix]/2); vzn1[ix][iz]=((1-dt*pmldz[iz]/2)*vzn0[ix][iz]-dt/denz[ix][iz]*ldz(txxn0,ix,iz))/(1+dt*pmldz[iz]/2); } } } /*Velocity PML --left*/ for (ix=marg; ix<marg+pmlout; ix++) { for (iz=marg+pmlout; iz<nz+pmlout+marg; iz++) { vxn1[ix][iz]=((1-dt*pmldx[ix]/2)*vxn0[ix][iz]-dt/denx[ix][iz]*ldx(txxn0,ix,iz))/(1+dt*pmldx[ix]/2); vzn1[ix][iz]=((1-dt*pmldz[iz]/2)*vzn0[ix][iz]-dt/denz[ix][iz]*ldz(txxn0,ix,iz))/(1+dt*pmldz[iz]/2); } } /*Velocity PML --right*/ for (ix=nx+pmlout+marg; ix<nx+2*pmlout+marg; ix++) { for (iz=marg+pmlout; iz<nz+pmlout+marg; iz++) { vxn1[ix][iz]=((1-dt*pmldx[ix]/2)*vxn0[ix][iz]-dt/denx[ix][iz]*ldx(txxn0,ix,iz))/(1+dt*pmldx[ix]/2); vzn1[ix][iz]=((1-dt*pmldz[iz]/2)*vzn0[ix][iz]-dt/denz[ix][iz]*ldz(txxn0,ix,iz))/(1+dt*pmldz[iz]/2); } } /*Velocity PML --bottom*/ for (ix=marg; ix<nx+2*pmlout+marg; ix++) { for (iz=marg+pmlout+nz; iz<nz+2*pmlout+marg; iz++) { vxn1[ix][iz]=((1-dt*pmldx[ix]/2)*vxn0[ix][iz]-dt/denx[ix][iz]*ldx(txxn0,ix,iz))/(1+dt*pmldx[ix]/2); vzn1[ix][iz]=((1-dt*pmldz[iz]/2)*vzn0[ix][iz]-dt/denz[ix][iz]*ldz(txxn0,ix,iz))/(1+dt*pmldz[iz]/2); } } }
static char * LDX2() { //load neg val into x reg CPU *c = getCPU(); int8_t operand = -99; OP_CODE_INFO *o = getOP_CODE_INFO(operand,0,modeImmediate); ldx(c,o); int8_t xVal = getRegByte(c,IND_X); mu_assert("LDX2 err, ACCUM reg != -99", xVal == -99); mu_run_test_with_args(testRegStatus,c,"10100000", " NVUBDIZC NVUBDIZC\nCLC1 err, %s != %s"); freeOP_CODE_INFO(o); free(c); return 0; }
void InterpreterRuntime::SignatureHandlerGenerator::pass_long() { Argument jni_arg(jni_offset(), false); Register Rtmp = O0; #ifdef _LP64 __ ldx(Llocals, Interpreter::local_offset_in_bytes(offset() + 1), Rtmp); __ store_long_argument(Rtmp, jni_arg); #else __ ld(Llocals, Interpreter::local_offset_in_bytes(offset() + 1), Rtmp); __ store_argument(Rtmp, jni_arg); __ ld(Llocals, Interpreter::local_offset_in_bytes(offset() + 0), Rtmp); Argument successor(jni_arg.successor()); __ store_argument(Rtmp, successor); #endif }
void InterpreterRuntime::SignatureHandlerGenerator::pass_long() { Argument jni_arg(jni_offset(), false); Register Rtmp = O0; Register Rtmp1 = G3_scratch; #ifdef _LP64 __ ldx(Llocals, -(offset() + 1) * wordSize, Rtmp); __ store_long_argument(Rtmp, jni_arg); #else __ ld(Llocals, -(offset() + 1) * wordSize, Rtmp); __ store_argument(Rtmp, jni_arg ); __ ld(Llocals, -(offset() + 0) * wordSize, Rtmp); __ store_argument(Rtmp, jni_arg.successor()); #endif }
int sglfdback2(float ***mig1, float **mig2, float ***fwf, float **localrec, bool verb, bool wantwf, sf_file Ftmpbwf) { float **txxn1, **txxn0, **vxn1, **vxn0, **vzn1, **vzn0; float **sill, **ccr, ***bwf; int wfit, htau; float tau; sill=sf_floatalloc2(nz, nx); ccr=sf_floatalloc2(nz, nx); bwf=sf_floatalloc3(nz, nx, wfnt); zero2(sill, nz, nx); zero2(ccr, nz, nx); zero3(mig1, nz, nx, ntau); txxn1=sf_floatalloc2(nzb, nxb); txxn0=sf_floatalloc2(nzb, nxb); vxn1=sf_floatalloc2(nzb, nxb); vxn0=sf_floatalloc2(nzb, nxb); vzn1=sf_floatalloc2(nzb, nxb); vzn0=sf_floatalloc2(nzb, nxb); zero2(txxn1, nzb, nxb); zero2(txxn0, nzb, nxb); zero2(vxn1, nzb, nxb); zero2(vxn0, nzb, nxb); zero2(vzn1, nzb, nxb); zero2(vzn0, nzb, nxb); zero2(txxn1x, nzb, nxb); zero2(txxn1z, nzb, nxb); zero2(txxn0x, nzb, nxb); zero2(txxn0z, nzb, nxb); wfit=wfnt-1; for(it=nt-1; it>=0; it--){ if(verb) sf_warning("Backward it=%d/%d;", it+1, nt); #ifdef _OPENMP #pragma omp parallel for private(ix,iz) #endif for(ix=nfd+pmlsize; ix<nfd+pmlsize+nx; ix++){ for(iz=nfd+pmlsize; iz<nfd+pmlsize+nz; iz++){ txxn0[ix][iz]=txxn1[ix][iz]+dt*bc11[ix][iz]*(ldx(vxn1, ix-1, iz) +ldz(vzn1, ix, iz-1)); } } pml_txxb(txxn0, vxn1, vzn1); #ifdef _OPENMP #pragma omp parallel for private(ix) #endif for(ix=0; ix<ng; ix++){ txxn0[ix*ginv+pmlsize+nfd][pmlsize+nfd+gp]+=localrec[ix][it]; } #ifdef _OPENMP #pragma omp parallel for private(ix,iz) #endif for(ix=nfd+pmlsize; ix<nfd+pmlsize+nx; ix++){ for(iz=nfd+pmlsize; iz<nfd+pmlsize+nz; iz++){ vxn0[ix][iz]=vxn1[ix][iz]+dt/bdenx[ix][iz]*ldx(txxn0, ix, iz); vzn0[ix][iz]=vzn1[ix][iz]+dt/bdenz[ix][iz]*ldz(txxn0, ix, iz); } } pml_vxzb(vxn1, vzn1, vxn0, vzn0, txxn0); transp=txxn1; txxn1=txxn0; txxn0=transp; transp=vxn1; vxn1=vxn0; vxn0=transp; transp=vzn1; vzn1=vzn0; vzn0=transp; if(it%wfinv==0){ for(ix=0; ix<nx; ix++) for(iz=0; iz<nz; iz++){ bwf[wfit][ix][iz]=txxn0[ix+pmlsize+nfd][iz+pmlsize+nfd]; ccr[ix][iz]+=fwf[wfit][ix][iz]*bwf[wfit][ix][iz]; sill[ix][iz]+=fwf[wfit][ix][iz]*fwf[wfit][ix][iz]; } wfit--; } } //end of it if(verb) sf_warning("."); for(itau=0; itau<ntau; itau++){ tau=itau*dtau+tau0; htau=tau/wfdt; for(it=abs(htau); it<wfnt-abs(htau); it++){ for(ix=0; ix<nx; ix++){ for(iz=0; iz<nz; iz++){ mig1[itau][ix][iz]+=fwf[it+htau][ix][iz]*bwf[it-htau][ix][iz]; } } }//end of it } // end of itau for(ix=0; ix<nx; ix++){ for(iz=0; iz<nz; iz++){ mig2[ix][iz]=ccr[ix][iz]/(sill[ix][iz]+SF_EPS); } } if(wantwf) sf_floatwrite(bwf[0][0], wfnt*nx*nz, Ftmpbwf); return 0; }
int sglfdback2(float **img1, float **img2, float ***wavfld, float **rcd, bool verb, bool wantwf, float **den, float **c11, geopar geop, srcpar srcp, pmlpar pmlp, sf_file Ftmpbwf) /*< staggered grid lowrank FD backward propagation + imaging >*/ { /*caculate arrays*/ float **txxn1, **txxn0, **vxn1, **vzn1, **vxn0, **vzn0; float **denx, **denz; float **sill, **ccr; /*grid index*/ int nx, nz, nt, ix, iz, it, gn, ginter; int nxb, nzb, snpint; int gp; float dt, dx, dz; int pmlout, marg; bool freesurface; /* tmp variable */ int wfit; nx = geop->nx; nz = geop->nz; nxb = geop->nxb; nzb = geop->nzb; dx = geop->dx; dz = geop->dz; gp = geop->gp; gn = geop->gn; ginter= geop->ginter; snpint = geop->snpint; nt = srcp->nt; dt = srcp->dt; pmlout = pmlp->pmlout; freesurface = pmlp->freesurface; marg = getmarg(); denx = sf_floatalloc2(nzb, nxb); denz = sf_floatalloc2(nzb, nxb); sill = sf_floatalloc2(nz, nx); ccr = img1; #ifdef _OPENMP #pragma omp parallel for private(ix, iz) #endif for (ix = 0; ix < nxb; ix++) { for ( iz= 0; iz < nzb; iz++) { denx[ix][iz] = den[ix][iz]; denz[ix][iz] = den[ix][iz]; } } /*den[ix+1/2][iz]*/ #ifdef _OPENMP #pragma omp parallel for private(ix, iz) #endif for ( ix = 0; ix < nxb-1; ix++) { for (iz = 0; iz < nzb; iz++) { denx[ix][iz] = (den[ix+1][iz] + den[ix][iz])*0.5; } } /*den[ix][iz+1/2]*/ #ifdef _OPENMP #pragma omp parallel for private(ix, iz) #endif for ( ix = 0; ix < nxb; ix++) { for (iz = 0; iz < nzb-1; iz++) { denz[ix][iz] = (den[ix][iz+1] + den[ix][iz])*0.5; } } txxn1 = sf_floatalloc2(nzb, nxb); txxn0 = sf_floatalloc2(nzb, nxb); vxn1 = sf_floatalloc2(nzb, nxb); vzn1 = sf_floatalloc2(nzb, nxb); vxn0 = sf_floatalloc2(nzb, nxb); vzn0 = sf_floatalloc2(nzb, nxb); init_pml(nz, nx, dt, marg, pmlp); #ifdef _OPENMP #pragma omp parallel for private(ix, iz) #endif for (ix = 0; ix < nxb; ix++) { for (iz = 0; iz < nzb; iz++) { txxn1[ix][iz] = 0.0; } } #ifdef _OPENMP #pragma omp parallel for private(ix, iz) #endif for (ix = 0; ix < nxb; ix++) { for (iz = 0; iz < nzb; iz++) { txxn0[ix][iz] = 0.0; } } #ifdef _OPENMP #pragma omp parallel for private(ix, iz) #endif for (ix = 0; ix < nxb; ix++) { for (iz = 0; iz < nzb; iz++) { vxn1[ix][iz] = 0.0; } } #ifdef _OPENMP #pragma omp parallel for private(ix, iz) #endif for (ix = 0; ix < nxb; ix++) { for (iz = 0; iz < nzb; iz++) { vxn0[ix][iz] = 0.0; } } #ifdef _OPENMP #pragma omp parallel for private(ix, iz) #endif for (ix = 0; ix < nxb; ix++) { for (iz = 0; iz < nzb; iz++) { vzn1[ix][iz] = 0.0; } } #ifdef _OPENMP #pragma omp parallel for private(ix, iz) #endif for (ix = 0; ix < nxb; ix++) { for (iz = 0; iz < nzb; iz++) { vzn0[ix][iz] = 0.0; } } #ifdef _OPENMP #pragma omp parallel for private(ix, iz) #endif for (ix = 0; ix < nx; ix++) { for (iz = 0; iz < nz; iz++) { sill[ix][iz] = 0.0; } } #ifdef _OPENMP #pragma omp parallel for private(ix, iz) #endif for (ix = 0; ix < nx; ix++) { for (iz = 0; iz < nz; iz++) { ccr[ix][iz] = 0.0; } } /*Main loop*/ wfit = (int)(nt-1)/snpint; for (it = nt-1; it>=0; it--) { if (verb) sf_warning("Backward it=%d/%d;", it, nt-1); /*Stress*/ #ifdef _OPENMP #pragma omp parallel for private(ix, iz) #endif for (ix = marg+pmlout; ix < nx+marg+pmlout; ix++) { for ( iz = marg+pmlout; iz < nz+marg+pmlout; iz++) { txxn0[ix][iz] = txxn1[ix][iz] + dt*c11[ix][iz]*(ldx(vxn1, ix-1, iz) + ldz(vzn1, ix, iz-1)); } } /*Stress PML */ pml_txxb(txxn0, vxn1, vzn1, c11, ldx, ldz, freesurface); #ifdef _OPENMP #pragma omp parallel for private(ix, iz) #endif for (ix=0; ix<gn; ix++) { txxn0[ix*ginter+pmlout+marg][pmlout+marg+gp] = rcd[ix][it]; } /*velocity*/ #ifdef _OPENMP #pragma omp parallel for private(ix, iz) #endif for (ix = marg+pmlout; ix < nx+pmlout+marg; ix++ ) { for (iz = marg+pmlout; iz < nz+pmlout+marg; iz++) { vxn0[ix][iz] = vxn1[ix][iz] + dt/denx[ix][iz]*ldx(txxn0, ix, iz); vzn0[ix][iz] = vzn1[ix][iz] + dt/denz[ix][iz]*ldz(txxn0, ix, iz); } } /*Velocity PML */ pml_vxzb(vxn1, vzn1, vxn0, vzn0, txxn0, denx, denz, ldx, ldz, freesurface); /*n1 -> n0*/ time_step_exch(txxn1, txxn0, it); time_step_exch(vxn1, vxn0, it); time_step_exch(vzn1, vzn0, it); pml_tstep_exchb(it); if ( wantwf && it%snpint == 0 ) { for ( ix = 0; ix < nx; ix++) sf_floatwrite(txxn0[ix+pmlout+marg]+pmlout+marg, nz, Ftmpbwf); } if (it%snpint == 0 ) { #ifdef _OPENMP #pragma omp parallel for private(ix, iz) #endif for (ix=0; ix<nx; ix++) { for (iz=0; iz<nz; iz++) { ccr[ix][iz] += wavfld[wfit][ix][iz]*txxn0[ix+pmlout+marg][iz+pmlout+marg]; } } #ifdef _OPENMP #pragma omp parallel for private(ix, iz) #endif for (ix=0; ix<nx; ix++) { for (iz=0; iz<nz; iz++) { sill[ix][iz] += wavfld[wfit][ix][iz]*wavfld[wfit][ix][iz]; } } wfit--; } } /*Main loop*/ if (verb) sf_warning("."); #ifdef _OPENMP #pragma omp parallel for private(ix, iz) #endif for (ix=0; ix<nx; ix++) { for (iz=0; iz<nz; iz++) { img2[ix][iz] = ccr[ix][iz]/(sill[ix][iz]+SF_EPS);// } } return 0; }
int sglfdfor2(float ***fwf, float **rcd, bool verb) { float **txxn1, **txxn0, **vxn1, **vxn0, **vzn1, **vzn0; int wfit; txxn1=sf_floatalloc2(nzb, nxb); txxn0=sf_floatalloc2(nzb, nxb); vxn1=sf_floatalloc2(nzb, nxb); vxn0=sf_floatalloc2(nzb, nxb); vzn1=sf_floatalloc2(nzb, nxb); vzn0=sf_floatalloc2(nzb, nxb); zero2(txxn1, nzb, nxb); zero2(txxn0, nzb, nxb); zero2(vxn1, nzb, nxb); zero2(vxn0, nzb, nxb); zero2(vzn1, nzb, nxb); zero2(vzn0, nzb, nxb); zero2(txxn1x, nzb, nxb); zero2(txxn1z, nzb, nxb); zero2(txxn0x, nzb, nxb); zero2(txxn0z, nzb, nxb); wfit=0; for(it=0; it<nt; it++){ // sf_warning("test txxn1[801][30]=%d",txxn1[801][30]) if(verb) sf_warning("Forward it=%d/%d;", it+1, nt); #ifdef _OPENMP #pragma omp parallel for private(ix,iz) #endif for(ix=nfd+pmlsize; ix<nfd+pmlsize+nx; ix++){ for(iz=nfd+pmlsize; iz<nfd+pmlsize+nz; iz++){ vxn1[ix][iz]=vxn0[ix][iz]-dt/fdenx[ix][iz]*ldx(txxn0, ix, iz); vzn1[ix][iz]=vzn0[ix][iz]-dt/fdenz[ix][iz]*ldz(txxn0, ix, iz); } } pml_vxz(vxn1, vzn1, vxn0, vzn0, txxn0); #ifdef _OPENMP #pragma omp parallel for private(ix,iz) #endif for(ix=nfd+pmlsize; ix<nfd+pmlsize+nx; ix++){ for(iz=nfd+pmlsize; iz<nfd+pmlsize+nz; iz++){ txxn1[ix][iz]=txxn0[ix][iz]-dt*fc11[ix][iz]*(ldx(vxn1, ix-1, iz) + ldz(vzn1, ix, iz-1)); } } pml_txx(txxn1, vxn1, vzn1); if((it*dt)<srctrunc){ explsource(txxn1); } if(it%wfinv==0){ #ifdef _OPENMP #pragma omp parallel for private(ix,iz) #endif for(ix=0; ix<nx; ix++){ for(iz=0; iz<nz; iz++){ fwf[wfit][ix][iz]=txxn0[ix+nfd+pmlsize][iz+nfd+pmlsize]; } } wfit++; } #ifdef _OPENMP #pragma omp parallel for private(ix) #endif for(ix=0; ix<ng; ix++){ rcd[ix][it]=txxn0[ix*ginv+pmlsize+nfd][pmlsize+nfd+gp]; } transp=txxn0; txxn0=txxn1; txxn1=transp; transp=vxn0; vxn0=vxn1; vxn1=transp; transp=vzn0; vzn0=vzn1; vzn1=transp; } // end of it if(verb) sf_warning("."); return 0;; }
int sglfdfor2(float ***wavfld, float **rcd, bool verb, float **den, float **c11, geopar geop, srcpar srcp, pmlpar pmlp) /*< staggered grid lowrank FD forward modeling >*/ { /*caculate arrays*/ float **txxn1, **txxn0, **vxn1, **vzn1, **vxn0, **vzn0; float **denx, **denz; /*grid index*/ int nx, nz, nt, ix, iz, it; int nxb, nzb, snpint; int spx, spz, gp, gn, ginter; float dt, dx, dz; int pmlout, marg; bool freesurface; /* tmp variable */ int wfit; int nth; nx = geop->nx; nz = geop->nz; nxb = geop->nxb; nzb = geop->nzb; dx = geop->dx; dz = geop->dz; spx = geop->spx; spz = geop->spz; gp = geop->gp; gn = geop->gn; ginter = geop->ginter; snpint = geop->snpint; nt = srcp->nt; dt = srcp->dt; pmlout = pmlp->pmlout; freesurface = pmlp->freesurface; marg = getmarg(); denx = sf_floatalloc2(nzb, nxb); denz = sf_floatalloc2(nzb, nxb); #ifdef _OPENMP #pragma omp parallel { nth = omp_get_num_threads(); } #endif sf_warning(">>>> Using %d threads <<<<<", nth); #ifdef _OPENMP #pragma omp parallel for private(ix, iz) #endif for (ix = 0; ix < nxb; ix++) { for ( iz= 0; iz < nzb; iz++) { denx[ix][iz] = den[ix][iz]; denz[ix][iz] = den[ix][iz]; } } /*den[ix+1/2][iz]*/ #ifdef _OPENMP #pragma omp parallel for private(ix, iz) #endif for ( ix = 0; ix < nxb-1; ix++) { for (iz = 0; iz < nzb; iz++) { denx[ix][iz] = (den[ix+1][iz] + den[ix][iz])*0.5; } } /*den[ix][iz+1/2]*/ #ifdef _OPENMP #pragma omp parallel for private(ix, iz) #endif for ( ix = 0; ix < nxb; ix++) { for (iz = 0; iz < nzb-1; iz++) { denz[ix][iz] = (den[ix][iz+1] + den[ix][iz])*0.5; } } txxn1 = sf_floatalloc2(nzb, nxb); txxn0 = sf_floatalloc2(nzb, nxb); vxn1 = sf_floatalloc2(nzb, nxb); vzn1 = sf_floatalloc2(nzb, nxb); vxn0 = sf_floatalloc2(nzb, nxb); vzn0 = sf_floatalloc2(nzb, nxb); init_pml(nz, nx, dt, marg, pmlp); #ifdef _OPENMP #pragma omp parallel for private(ix, iz) #endif for (ix = 0; ix < nxb; ix++) { for (iz = 0; iz < nzb; iz++) { txxn1[ix][iz] = 0.0; } } #ifdef _OPENMP #pragma omp parallel for private(ix, iz) #endif for (ix = 0; ix < nxb; ix++) { for (iz = 0; iz < nzb; iz++) { txxn0[ix][iz] = 0.0; } } #ifdef _OPENMP #pragma omp parallel for private(ix, iz) #endif for (ix = 0; ix < nxb; ix++) { for (iz = 0; iz < nzb; iz++) { vxn1[ix][iz] = 0.0; } } #ifdef _OPENMP #pragma omp parallel for private(ix, iz) #endif for (ix = 0; ix < nxb; ix++) { for (iz = 0; iz < nzb; iz++) { vxn0[ix][iz] = 0.0; } } #ifdef _OPENMP #pragma omp parallel for private(ix, iz) #endif for (ix = 0; ix < nxb; ix++) { for (iz = 0; iz < nzb; iz++) { vzn1[ix][iz] = 0.0; } } #ifdef _OPENMP #pragma omp parallel for private(ix, iz) #endif for (ix = 0; ix < nxb; ix++) { for (iz = 0; iz < nzb; iz++) { vzn0[ix][iz] = 0.0; } } #ifdef _OPENMP #pragma omp parallel for private(ix, iz) #endif for (it = 0; it < nt; it++) { for (ix = 0; ix < gn; ix++) { rcd[ix][it] = 0.0; } } /*Main loop*/ wfit = 0; for (it = 0; it < nt; it++) { if (verb) sf_warning("Forward it=%d/%d;", it, nt-1); /*velocity*/ #ifdef _OPENMP #pragma omp parallel for private(ix, iz) #endif for (ix = marg+pmlout; ix < nx+pmlout+marg; ix++ ) { for (iz = marg+pmlout; iz < nz+pmlout+marg; iz++) { vxn1[ix][iz] = vxn0[ix][iz] - dt/denx[ix][iz]*ldx(txxn0, ix, iz); vzn1[ix][iz] = vzn0[ix][iz] - dt/denz[ix][iz]*ldz(txxn0, ix, iz); } } /*Velocity PML */ pml_vxz(vxn1, vzn1, vxn0, vzn0, txxn0, denx, denz, ldx, ldz, freesurface); /*Stress*/ #ifdef _OPENMP #pragma omp parallel for private(ix, iz) #endif for (ix = marg+pmlout; ix < nx+marg+pmlout; ix++) { for ( iz = marg+pmlout; iz < nz+marg+pmlout; iz++) { txxn1[ix][iz] = txxn0[ix][iz] - dt*c11[ix][iz]*(ldx(vxn1, ix-1, iz) + ldz(vzn1, ix, iz-1)); } } /*Stress PML */ pml_txx(txxn1, vxn1, vzn1, c11, ldx, ldz, freesurface); if ((it*dt)<=srcp->trunc) { explsourcet(txxn1, srcp->wavelet, it, dt, spx+pmlout+marg, spz+pmlout+marg, nxb, nzb, srcp); } if ( it%snpint == 0 ) { #ifdef _OPENMP #pragma omp parallel for private(ix, iz) #endif for ( ix = 0; ix < nx; ix++) for ( iz = 0; iz<nz; iz++ ) wavfld[wfit][ix][iz] = txxn0[ix+pmlout+marg][iz+pmlout+marg]; wfit++; } //sf_warning("test I am at 257"); #ifdef _OPENMP #pragma omp parallel for private(ix) #endif for ( ix =0 ; ix < gn; ix++) { rcd[ix][it] = txxn0[ix*ginter+pmlout+marg][pmlout+marg+gp]; //sf_warning("gn=%d ix=%d ginter=%d ix*ginter=%d", gn, ginter, ix, ix*ginter); } //sf_warning("test I am at 266"); /*n1 -> n0*/ time_step_exch(txxn0, txxn1, it); time_step_exch(vxn0, vxn1, it); time_step_exch(vzn0, vzn1, it); pml_tstep_exch(it); } /*Main loop*/ if (verb) sf_warning("."); return wfit; }
address JNI_FastGetField::generate_fast_get_long_field() { const char *name = "jni_fast_GetLongField"; ResourceMark rm; BufferBlob* blob = BufferBlob::create(name, BUFFER_SIZE*wordSize); CodeBuffer cbuf(blob); MacroAssembler* masm = new MacroAssembler(&cbuf); address fast_entry = __ pc(); Label label1, label2; AddressLiteral cnt_addrlit(SafepointSynchronize::safepoint_counter_addr()); __ sethi (cnt_addrlit, G3); Address cnt_addr(G3, cnt_addrlit.low10()); __ ld (cnt_addr, G4); __ andcc (G4, 1, G0); __ br (Assembler::notZero, false, Assembler::pn, label1); __ delayed()->srl (O2, 2, O4); __ ld_ptr (O1, 0, O5); __ add (O5, O4, O5); #ifndef _LP64 assert(count < LIST_CAPACITY-1, "LIST_CAPACITY too small"); speculative_load_pclist[count++] = __ pc(); __ ld (O5, 0, G2); speculative_load_pclist[count] = __ pc(); __ ld (O5, 4, O3); #else assert(count < LIST_CAPACITY, "LIST_CAPACITY too small"); speculative_load_pclist[count] = __ pc(); __ ldx (O5, 0, O3); #endif __ ld (cnt_addr, G1); __ cmp (G1, G4); __ br (Assembler::notEqual, false, Assembler::pn, label2); __ delayed()->mov (O7, G1); #ifndef _LP64 __ mov (G2, O0); __ retl (); __ delayed()->mov (O3, O1); #else __ retl (); __ delayed()->mov (O3, O0); #endif #ifndef _LP64 slowcase_entry_pclist[count-1] = __ pc(); slowcase_entry_pclist[count++] = __ pc() ; #else slowcase_entry_pclist[count++] = __ pc(); #endif __ bind (label1); __ mov (O7, G1); address slow_case_addr = jni_GetLongField_addr(); __ bind (label2); __ call (slow_case_addr, relocInfo::none); __ delayed()->mov (G1, O7); __ flush (); return fast_entry; }
inline void MacroAssembler::ldx( const Address& a, Register d, int offset) { if (a.has_index()) { assert(offset == 0, ""); ldx( a.base(), a.index(), d); } else { ldx( a.base(), a.disp() + offset, d); } }
int main(int argc, char* argv[]) { clock_t tstart,tend; double duration; /*flag*/ bool verb; /*I/O*/ sf_file Fvel, Fden, Fsrc; sf_file Fwf/*wave field*/, Frec/*record*/, Fic/*Initial Condition*/; sf_file FG, Fsx; /*I/O for MMS*/ sf_file Fpsrc, Fvsrc, Fpint, Fvint; sf_axis at, ax; sf_axis icaxis; /*I/O arrays*/ float *src; /*point source*/ float *vel, *den, *c11, *record; float *ic; float *sxtmp; /*I/O for MMS*/ float **psrc, **vsrc, *pint, *vint; /*Grid index variables*/ int nx, nt, ix, it; int nxb; float dt, dx; /*caculate arrays*/ float *txxn1, *txxn0, *vxn1, *vxn0; float *denx; /*source*/ spara sp={0}; bool srcdecay, srcmms, inject; float srctrunc; float slx; int spx; /*PML*/ int pmlout, pmld0, decaybegin; int decay; float gamma = GAMMA; int mx; /*margin*/ /*options*/ bool freesurface; int gp; float gdep; int snapinter; int it0; int icnx; tstart = clock(); sf_init(argc, argv); if (!sf_getbool("verb", &verb)) verb=false; /*verbosity*/ /*Set I/O file*/ Fsrc = sf_input("in"); /*source wavelet*/ Fvel = sf_input("vel"); /*velocity*/ Fden = sf_input("den"); /*density*/ Fwf = sf_output("out"); /*wavefield snap*/ Frec = sf_output("rec"); /*record*/ FG = sf_input("G"); Fsx = sf_input("sx"); if (SF_FLOAT != sf_gettype(Fsrc)) sf_error("Need float input"); if (SF_FLOAT != sf_gettype(Fvel)) sf_error("Need float input"); if (SF_FLOAT != sf_gettype(Fden)) sf_error("Need float input"); /*parameters of source*/ if (!sf_getbool("srcmms", &srcmms)) srcmms = false; /*source type: if y, use point source */ if (!srcmms && !sf_getfloat("slx", &slx)) sf_error("Need slx input"); /*source location in x */ if (!srcmms && slx<0.0) sf_error("slx need >=0.0"); if (!sf_getbool("srcdecay",&srcdecay)) srcdecay=false; /*source decay y=use*/ if (!sf_getfloat("srctrunc",&srctrunc)) srctrunc=1000; /*source trunc time (s)*/ if (!sf_getbool("inject", &inject)) inject = true; /* inject=y use inject source; inject=n use initial condition*/ if (srcmms && !inject) sf_error("Initial condition and MMS are conflicted"); /*parameters of geometry*/ if (!sf_getfloat("gdep", &gdep)) gdep = 0.0; /*depth of geophone */ if (gdep <0.0) sf_error("gdep need to be >=0.0"); /*source and receiver location*/ if (!sf_getint("snapinter", &snapinter)) snapinter=1; /* snap interval */ if (!sf_getint("pmlsize", &pmlout)) pmlout=PMLOUT; /* size of PML layer */ if (!sf_getint("pmld0", &pmld0)) pmld0=PMLD0; /* PML parameter */ if (!sf_getint("decay",&decay)) decay=DECAY_FLAG; /* Flag of decay boundary condtion: 1 = use ; 0 = not use */ if (!sf_getint("decaybegin",&decaybegin)) decaybegin=DECAY_BEGIN; /* Begin time of using decay boundary condition */ if (!sf_getbool("freesurface", &freesurface)) freesurface=false; /*free surface*/ if (!sf_histint(FG,"n2", &lenx)) sf_error("No n2= in input"); /* Read/Write axes */ at = sf_iaxa(Fsrc,1); nt = sf_n(at); dt = sf_d(at); ax = sf_iaxa(Fvel,1); nxb = sf_n(ax); dx = sf_d(ax); /*read FD coefficients*/ G = sf_floatalloc2(nxb, lenx); sf_floatread(G[0], nxb*lenx, FG); /*read FD schemes*/ sxtmp = sf_floatalloc(lenx); sx = sf_intalloc(lenx); sf_floatread(sxtmp, lenx, Fsx); mx = 0; for (ix=0; ix<lenx; ix++) { sx[ix] = (int)sxtmp[ix]; mx = abs(sx[ix])>mx? abs(sx[ix]):mx; } marg = mx; nx = nxb - 2*pmlout - 2*marg; record = sf_floatalloc(nt); /*set wavefield axes*/ sf_setn(at, (int)(nt-1)/snapinter+1); /*set axis for snap file*/ sf_setd(at,dt*snapinter); sf_setn(ax, nx); sf_oaxa(Fwf,ax,1); sf_oaxa(Fwf,at,2); /*set for record*/ sf_setn(ax,1); /*read model*/ vel = sf_floatalloc(nxb); den = sf_floatalloc(nxb); c11 = sf_floatalloc(nxb); denx = sf_floatalloc(nxb); sf_floatread(vel, nxb, Fvel); sf_floatread(den, nxb, Fden); for (ix = 0; ix < nxb; ix++) { c11[ix] = den[ix]*vel[ix]*vel[ix]; denx[ix] = den[ix]; if(c11[ix] <= 0.0) sf_warning("c11=%f: ix=%d ",c11[ix], ix); } /*den[ix+1/2]*/ for ( ix = 0; ix < nxb-1; ix++) { denx[ix] = (den[ix+1] + den[ix])*0.5; } /*source and receiver location*/ spx = (int)(slx/dx+0.5); gp = (int)(gdep/dx+0.5); /*read source*/ src = sf_floatalloc(nt); sf_floatread(src,nt,Fsrc); /*Initial Condition*/ if (inject == false) { Fic = sf_input("ic"); /*initial condition*/ if (SF_FLOAT != sf_gettype(Fic)) sf_error("Need float input of ic"); icaxis = sf_iaxa(Fic, 1); icnx = sf_n(icaxis); if (nx != icnx) sf_error("I.C. and velocity should be the same size."); ic = sf_floatalloc(nx); sf_floatread(ic, nx, Fic); } else { ic = NULL; } /* Method of Manufactured Solution*/ if (inject && srcmms) { Fpsrc = sf_input("presrc"); Fvsrc = sf_input("velsrc"); Fpint = sf_input("preinit"); Fvint = sf_input("velinit"); if (SF_FLOAT != sf_gettype(Fpsrc)) sf_error("Need float input of presrc"); if (SF_FLOAT != sf_gettype(Fvsrc)) sf_error("Need float input of velsrc"); if (SF_FLOAT != sf_gettype(Fpint)) sf_error("Need float input of preinit"); if (SF_FLOAT != sf_gettype(Fvint)) sf_error("Need float input of velinit"); psrc = sf_floatalloc2(nx, nt); vsrc = sf_floatalloc2(nx, nt); pint = sf_floatalloc(nx); vint = sf_floatalloc(nx); sf_floatread(psrc[0], nx*nt, Fpsrc); sf_floatread(vsrc[0], nx*nt, Fvsrc); sf_floatread(pint, nx, Fpint); sf_floatread(vint, nx, Fvint); } else { psrc = NULL; vsrc = NULL; pint = NULL; vint = NULL; } txxn1 = sf_floatalloc(nxb); txxn0 = sf_floatalloc(nxb); vxn1 = sf_floatalloc(nxb); vxn0 = sf_floatalloc(nxb); init_pml1(nx, dt, pmlout, marg, pmld0, decay, decaybegin, gamma); for (ix = 0; ix < nxb; ix++) { txxn1[ix] = 0.0; } for (ix = 0; ix < nxb; ix++) { txxn0[ix] = 0.0; } for (ix = 0; ix < nxb; ix++) { vxn1[ix] = 0.0; } for (ix = 0; ix < nxb; ix++) { vxn0[ix] = 0.0; } for (it = 0; it < nt; it++) { record[it] = 0.0; } sp.trunc=srctrunc; sp.srange=10; sp.alpha=0.5; sp.decay=srcdecay?1:0; if (verb) { sf_warning("============================"); sf_warning("nx=%d nt=%d", nx, nt); sf_warning("dx=%f dt=%f", dx, dt); sf_warning("lenx=%d marg=%d pmlout=%d", lenx, marg, pmlout); sf_warning("srctrunc=%f srcdecay=%d", sp.trunc, sp.decay); sf_warning("slx=%f, spx=%d, gdep=%f gp=%d",slx,spx,gdep,gp); for(ix=0; ix<lenx; ix++){ sf_warning("[sxx]=[%d,] G=%f",sx[ix], G[ix][0]); } sf_warning("============================"); } /* MAIN LOOP */ it0 = 0; if (inject == false) { it0 = 1; for(ix = 0; ix < nx; ix++) { txxn0[ix+marg+pmlout] = ic[ix]; } sf_floatwrite(txxn0+pmlout+marg, nx, Fwf); record[0] = txxn0[pmlout+marg+gp]; } /* MMS */ if (inject && srcmms ) { it0 = 0; for (ix=0; ix <nx; ix++) { txxn0[ix+marg+pmlout] = pint[ix]; /*P(x,0)*/ vxn0[ix+marg+pmlout] = vint[ix]; /*U(x, -dt/2)*/ } } for (it = it0; it < nt; it++) { if(verb) sf_warning("it=%d/%d;", it, nt-1); /*velocity*/ for (ix = marg+pmlout; ix < nx+pmlout+marg; ix++ ) { vxn1[ix] = vxn0[ix] - dt/denx[ix]*ldx(txxn0, ix); } /* MMS */ if (inject && srcmms) for (ix = 0; ix < nx; ix++) vxn1[ix+marg+pmlout] += vsrc[it][ix]*dt; for (ix = 0; ix < nxb; ix++) vxn0[ix] = vxn1[ix]; /*Velocity PML */ /* pml1_vxz(vxn1, vxn0, txxn0, denx, ldx, freesurface); */ /*Stress*/ for (ix = marg+pmlout; ix < nx+marg+pmlout; ix++) { txxn1[ix] = txxn0[ix] - dt*c11[ix]*ldx(vxn1, ix-1); } /*Stress PML */ /* pml1_txx(txxn1, vxn1, c11, ldx, freesurface); */ if (inject) { if (!srcmms && (it*dt)<=sp.trunc) { txxn1[marg+pmlout+spx] += src[it]*dt; } if (srcmms) { for (ix = 0; ix < nx; ix++) txxn1[ix+marg+pmlout] +=psrc[it][ix]*dt; } } if ( it%snapinter==0 ) { sf_floatwrite(txxn0+pmlout+marg, nx, Fwf); } record[it] = txxn0[pmlout+marg+gp]; for (ix=0; ix<nxb; ix++) txxn0[ix] = txxn1[ix]; }/*End of LOOP TIME*/ sf_warning("."); sf_floatwrite(record, nt, Frec); tend = clock(); duration=(double)(tend-tstart)/CLOCKS_PER_SEC; sf_warning(">> The CPU time of sfsglfd1pml is: %f seconds << ", duration); exit(0); }
// Call an accessor method (assuming it is resolved, otherwise drop into // vanilla (slow path) entry. address InterpreterGenerator::generate_accessor_entry(void) { if (!UseFastAccessorMethods && (!FLAG_IS_ERGO(UseFastAccessorMethods))) { return NULL; } Label Lslow_path, Lacquire; const Register Rclass_or_obj = R3_ARG1, Rconst_method = R4_ARG2, Rcodes = Rconst_method, Rcpool_cache = R5_ARG3, Rscratch = R11_scratch1, Rjvmti_mode = Rscratch, Roffset = R12_scratch2, Rflags = R6_ARG4, Rbtable = R7_ARG5; static address branch_table[number_of_states]; address entry = __ pc(); // Check for safepoint: // Ditch this, real man don't need safepoint checks. // Also check for JVMTI mode // Check for null obj, take slow path if so. __ ld(Rclass_or_obj, Interpreter::stackElementSize, CC_INTERP_ONLY(R17_tos) NOT_CC_INTERP(R15_esp)); __ lwz(Rjvmti_mode, thread_(interp_only_mode)); __ cmpdi(CCR1, Rclass_or_obj, 0); __ cmpwi(CCR0, Rjvmti_mode, 0); __ crorc(/*CCR0 eq*/2, /*CCR1 eq*/4+2, /*CCR0 eq*/2); __ beq(CCR0, Lslow_path); // this==null or jvmti_mode!=0 // Do 2 things in parallel: // 1. Load the index out of the first instruction word, which looks like this: // <0x2a><0xb4><index (2 byte, native endianess)>. // 2. Load constant pool cache base. __ ld(Rconst_method, in_bytes(Method::const_offset()), R19_method); __ ld(Rcpool_cache, in_bytes(ConstMethod::constants_offset()), Rconst_method); __ lhz(Rcodes, in_bytes(ConstMethod::codes_offset()) + 2, Rconst_method); // Lower half of 32 bit field. __ ld(Rcpool_cache, ConstantPool::cache_offset_in_bytes(), Rcpool_cache); // Get the const pool entry by means of <index>. const int codes_shift = exact_log2(in_words(ConstantPoolCacheEntry::size()) * BytesPerWord); __ slwi(Rscratch, Rcodes, codes_shift); // (codes&0xFFFF)<<codes_shift __ add(Rcpool_cache, Rscratch, Rcpool_cache); // Check if cpool cache entry is resolved. // We are resolved if the indices offset contains the current bytecode. ByteSize cp_base_offset = ConstantPoolCache::base_offset(); // Big Endian: __ lbz(Rscratch, in_bytes(cp_base_offset) + in_bytes(ConstantPoolCacheEntry::indices_offset()) + 7 - 2, Rcpool_cache); __ cmpwi(CCR0, Rscratch, Bytecodes::_getfield); __ bne(CCR0, Lslow_path); __ isync(); // Order succeeding loads wrt. load of _indices field from cpool_cache. // Finally, start loading the value: Get cp cache entry into regs. __ ld(Rflags, in_bytes(cp_base_offset) + in_bytes(ConstantPoolCacheEntry::flags_offset()), Rcpool_cache); __ ld(Roffset, in_bytes(cp_base_offset) + in_bytes(ConstantPoolCacheEntry::f2_offset()), Rcpool_cache); // Following code is from templateTable::getfield_or_static // Load pointer to branch table __ load_const_optimized(Rbtable, (address)branch_table, Rscratch); // Get volatile flag __ rldicl(Rscratch, Rflags, 64-ConstantPoolCacheEntry::is_volatile_shift, 63); // extract volatile bit // note: sync is needed before volatile load on PPC64 // Check field type __ rldicl(Rflags, Rflags, 64-ConstantPoolCacheEntry::tos_state_shift, 64-ConstantPoolCacheEntry::tos_state_bits); #ifdef ASSERT Label LFlagInvalid; __ cmpldi(CCR0, Rflags, number_of_states); __ bge(CCR0, LFlagInvalid); __ ld(R9_ARG7, 0, R1_SP); __ ld(R10_ARG8, 0, R21_sender_SP); __ cmpd(CCR0, R9_ARG7, R10_ARG8); __ asm_assert_eq("backlink", 0x543); #endif // ASSERT __ mr(R1_SP, R21_sender_SP); // Cut the stack back to where the caller started. // Load from branch table and dispatch (volatile case: one instruction ahead) __ sldi(Rflags, Rflags, LogBytesPerWord); __ cmpwi(CCR6, Rscratch, 1); // volatile? if (support_IRIW_for_not_multiple_copy_atomic_cpu) { __ sldi(Rscratch, Rscratch, exact_log2(BytesPerInstWord)); // volatile ? size of 1 instruction : 0 } __ ldx(Rbtable, Rbtable, Rflags); if (support_IRIW_for_not_multiple_copy_atomic_cpu) { __ subf(Rbtable, Rscratch, Rbtable); // point to volatile/non-volatile entry point } __ mtctr(Rbtable); __ bctr(); #ifdef ASSERT __ bind(LFlagInvalid); __ stop("got invalid flag", 0x6541); bool all_uninitialized = true, all_initialized = true; for (int i = 0; i<number_of_states; ++i) { all_uninitialized = all_uninitialized && (branch_table[i] == NULL); all_initialized = all_initialized && (branch_table[i] != NULL); } assert(all_uninitialized != all_initialized, "consistency"); // either or __ fence(); // volatile entry point (one instruction before non-volatile_entry point) if (branch_table[vtos] == 0) branch_table[vtos] = __ pc(); // non-volatile_entry point if (branch_table[dtos] == 0) branch_table[dtos] = __ pc(); // non-volatile_entry point if (branch_table[ftos] == 0) branch_table[ftos] = __ pc(); // non-volatile_entry point __ stop("unexpected type", 0x6551); #endif if (branch_table[itos] == 0) { // generate only once __ align(32, 28, 28); // align load __ fence(); // volatile entry point (one instruction before non-volatile_entry point) branch_table[itos] = __ pc(); // non-volatile_entry point __ lwax(R3_RET, Rclass_or_obj, Roffset); __ beq(CCR6, Lacquire); __ blr(); } if (branch_table[ltos] == 0) { // generate only once __ align(32, 28, 28); // align load __ fence(); // volatile entry point (one instruction before non-volatile_entry point) branch_table[ltos] = __ pc(); // non-volatile_entry point __ ldx(R3_RET, Rclass_or_obj, Roffset); __ beq(CCR6, Lacquire); __ blr(); } if (branch_table[btos] == 0) { // generate only once __ align(32, 28, 28); // align load __ fence(); // volatile entry point (one instruction before non-volatile_entry point) branch_table[btos] = __ pc(); // non-volatile_entry point __ lbzx(R3_RET, Rclass_or_obj, Roffset); __ extsb(R3_RET, R3_RET); __ beq(CCR6, Lacquire); __ blr(); } if (branch_table[ctos] == 0) { // generate only once __ align(32, 28, 28); // align load __ fence(); // volatile entry point (one instruction before non-volatile_entry point) branch_table[ctos] = __ pc(); // non-volatile_entry point __ lhzx(R3_RET, Rclass_or_obj, Roffset); __ beq(CCR6, Lacquire); __ blr(); } if (branch_table[stos] == 0) { // generate only once __ align(32, 28, 28); // align load __ fence(); // volatile entry point (one instruction before non-volatile_entry point) branch_table[stos] = __ pc(); // non-volatile_entry point __ lhax(R3_RET, Rclass_or_obj, Roffset); __ beq(CCR6, Lacquire); __ blr(); } if (branch_table[atos] == 0) { // generate only once __ align(32, 28, 28); // align load __ fence(); // volatile entry point (one instruction before non-volatile_entry point) branch_table[atos] = __ pc(); // non-volatile_entry point __ load_heap_oop(R3_RET, (RegisterOrConstant)Roffset, Rclass_or_obj); __ verify_oop(R3_RET); //__ dcbt(R3_RET); // prefetch __ beq(CCR6, Lacquire); __ blr(); } __ align(32, 12); __ bind(Lacquire); __ twi_0(R3_RET); __ isync(); // acquire __ blr(); #ifdef ASSERT for (int i = 0; i<number_of_states; ++i) { assert(branch_table[i], "accessor_entry initialization"); //tty->print_cr("accessor_entry: branch_table[%d] = 0x%llx (opcode 0x%llx)", i, branch_table[i], *((unsigned int*)branch_table[i])); } #endif __ bind(Lslow_path); __ branch_to_entry(Interpreter::entry_for_kind(Interpreter::zerolocals), Rscratch); __ flush(); return entry; }
inline void MacroAssembler::ldx( Register s1, RegisterOrConstant s2, Register d) { ldx( Address(s1, s2), d); }
/** * @brief emulate instruction * @return returns false if something goes wrong (e.g. illegal instruction) * * Current limitations: * * - Illegal instructions are not implemented * - Excess cycles due to page boundary crossing are not calculated * - Some known architectural bugs are not emulated */ bool Cpu::emulate() { /* fetch instruction */ uint8_t insn = fetch_op(); bool retval = true; /* emulate instruction */ switch(insn) { /* BRK */ case 0x0: brk(); break; /* ORA (nn,X) */ case 0x1: ora(load_byte(addr_indx()),6); break; /* ORA nn */ case 0x5: ora(load_byte(addr_zero()),3); break; /* ASL nn */ case 0x6: asl_mem(addr_zero(),5); break; /* PHP */ case 0x8: php(); break; /* ORA #nn */ case 0x9: ora(fetch_op(),2); break; /* ASL A */ case 0xA: asl_a(); break; /* ORA nnnn */ case 0xD: ora(load_byte(addr_abs()),4); break; /* ASL nnnn */ case 0xE: asl_mem(addr_abs(),6); break; /* BPL nn */ case 0x10: bpl(); break; /* ORA (nn,Y) */ case 0x11: ora(load_byte(addr_indy()),5); break; /* ORA nn,X */ case 0x15: ora(load_byte(addr_zerox()),4); break; /* ASL nn,X */ case 0x16: asl_mem(addr_zerox(),6); break; /* CLC */ case 0x18: clc(); break; /* ORA nnnn,Y */ case 0x19: ora(load_byte(addr_absy()),4); break; /* ORA nnnn,X */ case 0x1D: ora(load_byte(addr_absx()),4); break; /* ASL nnnn,X */ case 0x1E: asl_mem(addr_absx(),7); break; /* JSR */ case 0x20: jsr(); break; /* AND (nn,X) */ case 0x21: _and(load_byte(addr_indx()),6); break; /* BIT nn */ case 0x24: bit(addr_zero(),3); break; /* AND nn */ case 0x25: _and(load_byte(addr_zero()),3); break; /* ROL nn */ case 0x26: rol_mem(addr_zero(),5); break; /* PLP */ case 0x28: plp(); break; /* AND #nn */ case 0x29: _and(fetch_op(),2); break; /* ROL A */ case 0x2A: rol_a(); break; /* BIT nnnn */ case 0x2C: bit(addr_abs(),4); break; /* AND nnnn */ case 0x2D: _and(load_byte(addr_abs()),4); break; /* ROL nnnn */ case 0x2E: rol_mem(addr_abs(),6); break; /* BMI nn */ case 0x30: bmi(); break; /* AND (nn,Y) */ case 0x31: _and(load_byte(addr_indy()),5); break; /* AND nn,X */ case 0x35: _and(load_byte(addr_zerox()),4); break; /* ROL nn,X */ case 0x36: rol_mem(addr_zerox(),6); break; /* SEC */ case 0x38: sec(); break; /* AND nnnn,Y */ case 0x39: _and(load_byte(addr_absy()),4); break; /* AND nnnn,X */ case 0x3D: _and(load_byte(addr_absx()),4); break; /* ROL nnnn,X */ case 0x3E: rol_mem(addr_absx(),7); break; /* RTI */ case 0x40: rti(); break; /* EOR (nn,X) */ case 0x41: eor(load_byte(addr_indx()),6); break; /* EOR nn */ case 0x45: eor(load_byte(addr_zero()),3); break; /* LSR nn */ case 0x46: lsr_mem(addr_zero(),5); break; /* PHA */ case 0x48: pha(); break; /* EOR #nn */ case 0x49: eor(fetch_op(),2); break; /* BVC */ case 0x50: bvc(); break; /* JMP nnnn */ case 0x4C: jmp(); break; /* EOR nnnn */ case 0x4D: eor(load_byte(addr_abs()),4); break; /* LSR A */ case 0x4A: lsr_a(); break; /* LSR nnnn */ case 0x4E: lsr_mem(addr_abs(),6); break; /* EOR (nn,Y) */ case 0x51: eor(load_byte(addr_indy()),5); break; /* EOR nn,X */ case 0x55: eor(load_byte(addr_zerox()),4); break; /* LSR nn,X */ case 0x56: lsr_mem(addr_zerox(),6); break; /* CLI */ case 0x58: cli(); break; /* EOR nnnn,Y */ case 0x59: eor(load_byte(addr_absy()),4); break; /* EOR nnnn,X */ case 0x5D: eor(load_byte(addr_absx()),4); break; /* LSR nnnn,X */ case 0x5E: lsr_mem(addr_absx(),7); break; /* RTS */ case 0x60: rts(); break; /* ADC (nn,X) */ case 0x61: adc(load_byte(addr_indx()),6); break; /* ADC nn */ case 0x65: adc(load_byte(addr_zero()),3); break; /* ROR nn */ case 0x66: ror_mem(addr_zero(),5); break; /* PLA */ case 0x68: pla(); break; /* ADC #nn */ case 0x69: adc(fetch_op(),2); break; /* ROR A */ case 0x6A: ror_a(); break; /* JMP (nnnn) */ case 0x6C: jmp_ind(); break; /* ADC nnnn */ case 0x6D: adc(load_byte(addr_abs()),4); break; /* ROR nnnn */ case 0x6E: ror_mem(addr_abs(),6); break; /* BVS */ case 0x70: bvs(); break; /* ADC (nn,Y) */ case 0x71: adc(load_byte(addr_indy()),5); break; /* ADC nn,X */ case 0x75: adc(load_byte(addr_zerox()),4); break; /* ROR nn,X */ case 0x76: ror_mem(addr_zerox(),6); break; /* SEI */ case 0x78: sei(); break; /* ADC nnnn,Y */ case 0x79: adc(load_byte(addr_absy()),4); break; /* ADC nnnn,X */ case 0x7D: adc(load_byte(addr_absx()),4); break; /* ROR nnnn,X */ case 0x7E: ror_mem(addr_absx(),7); break; /* STA (nn,X) */ case 0x81: sta(addr_indx(),6); break; /* STY nn */ case 0x84: sty(addr_zero(),3); break; /* STA nn */ case 0x85: sta(addr_zero(),3); break; /* STX nn */ case 0x86: stx(addr_zero(),3); break; /* DEY */ case 0x88: dey(); break; /* TXA */ case 0x8A: txa(); break; /* STY nnnn */ case 0x8C: sty(addr_abs(),4); break; /* STA nnnn */ case 0x8D: sta(addr_abs(),4); break; /* STX nnnn */ case 0x8E: stx(addr_abs(),4); break; /* BCC nn */ case 0x90: bcc(); break; /* STA (nn,Y) */ case 0x91: sta(addr_indy(),6); break; /* STY nn,X */ case 0x94: sty(addr_zerox(),4); break; /* STA nn,X */ case 0x95: sta(addr_zerox(),4); break; /* STX nn,Y */ case 0x96: stx(addr_zeroy(),4); break; /* TYA */ case 0x98: tya(); break; /* STA nnnn,Y */ case 0x99: sta(addr_absy(),5); break; /* TXS */ case 0x9A: txs(); break; /* STA nnnn,X */ case 0x9D: sta(addr_absx(),5); break; /* LDY #nn */ case 0xA0: ldy(fetch_op(),2); break; /* LDA (nn,X) */ case 0xA1: lda(load_byte(addr_indx()),6); break; /* LDX #nn */ case 0xA2: ldx(fetch_op(),2); break; /* LDY nn */ case 0xA4: ldy(load_byte(addr_zero()),3); break; /* LDA nn */ case 0xA5: lda(load_byte(addr_zero()),3); break; /* LDX nn */ case 0xA6: ldx(load_byte(addr_zero()),3); break; /* TAY */ case 0xA8: tay(); break; /* LDA #nn */ case 0xA9: lda(fetch_op(),2); break; /* TAX */ case 0xAA: tax(); break; /* LDY nnnn */ case 0xAC: ldy(load_byte(addr_abs()),4); break; /* LDA nnnn */ case 0xAD: lda(load_byte(addr_abs()),4); break; /* LDX nnnn */ case 0xAE: ldx(load_byte(addr_abs()),4); break; /* BCS nn */ case 0xB0: bcs(); break; /* LDA (nn,Y) */ case 0xB1: lda(load_byte(addr_indy()),5); break; /* LDY nn,X */ case 0xB4: ldy(load_byte(addr_zerox()),3); break; /* LDA nn,X */ case 0xB5: lda(load_byte(addr_zerox()),3); break; /* LDX nn,Y */ case 0xB6: ldx(load_byte(addr_zeroy()),3); break; /* CLV */ case 0xB8: clv(); break; /* LDA nnnn,Y */ case 0xB9: lda(load_byte(addr_absy()),4); break; /* TSX */ case 0xBA: tsx(); break; /* LDY nnnn,X */ case 0xBC: ldy(load_byte(addr_absx()),4); break; /* LDA nnnn,X */ case 0xBD: lda(load_byte(addr_absx()),4); break; /* LDX nnnn,Y */ case 0xBE: ldx(load_byte(addr_absy()),4); break; /* CPY #nn */ case 0xC0: cpy(fetch_op(),2); break; /* CMP (nn,X) */ case 0xC1: cmp(load_byte(addr_indx()),6); break; /* CPY nn */ case 0xC4: cpy(load_byte(addr_zero()),3); break; /* CMP nn */ case 0xC5: cmp(load_byte(addr_zero()),3); break; /* DEC nn */ case 0xC6: dec(addr_zero(),5); break; /* INY */ case 0xC8: iny(); break; /* CMP #nn */ case 0xC9: cmp(fetch_op(),2); break; /* DEX */ case 0xCA: dex(); break; /* CPY nnnn */ case 0xCC: cpy(load_byte(addr_abs()),4); break; /* CMP nnnn */ case 0xCD: cmp(load_byte(addr_abs()),4); break; /* DEC nnnn */ case 0xCE: dec(addr_abs(),6); break; /* BNE nn */ case 0xD0: bne(); break; /* CMP (nn,Y) */ case 0xD1: cmp(load_byte(addr_indy()),5); break; /* CMP nn,X */ case 0xD5: cmp(load_byte(addr_zerox()),4); break; /* DEC nn,X */ case 0xD6: dec(addr_zerox(),6); break; /* CLD */ case 0xD8: cld(); break; /* CMP nnnn,Y */ case 0xD9: cmp(load_byte(addr_absy()),4); break; /* CMP nnnn,X */ case 0xDD: cmp(load_byte(addr_absx()),4); break; /* DEC nnnn,X */ case 0xDE: dec(addr_absx(),7); break; /* CPX #nn */ case 0xE0: cpx(fetch_op(),2); break; /* SBC (nn,X) */ case 0xE1: sbc(load_byte(addr_indx()),6); break; /* CPX nn */ case 0xE4: cpx(load_byte(addr_zero()),3); break; /* SBC nn */ case 0xE5: sbc(load_byte(addr_zero()),3); break; /* INC nn */ case 0xE6: inc(addr_zero(),5); break; /* INX */ case 0xE8: inx(); break; /* SBC #nn */ case 0xE9: sbc(fetch_op(),2); break; /* NOP */ case 0xEA: nop(); break; /* CPX nnnn */ case 0xEC: cpx(load_byte(addr_abs()),4); break; /* SBC nnnn */ case 0xED: sbc(load_byte(addr_abs()),4); break; /* INC nnnn */ case 0xEE: inc(addr_abs(),6); break; /* BEQ nn */ case 0xF0: beq(); break; /* SBC (nn,Y) */ case 0xF1: sbc(load_byte(addr_indy()),5); break; /* SBC nn,X */ case 0xF5: sbc(load_byte(addr_zerox()),4); break; /* INC nn,X */ case 0xF6: inc(addr_zerox(),6); break; /* SED */ case 0xF8: sed(); break; /* SBC nnnn,Y */ case 0xF9: sbc(load_byte(addr_absy()),4); break; /* SBC nnnn,X */ case 0xFD: sbc(load_byte(addr_absx()),4); break; /* INC nnnn,X */ case 0xFE: inc(addr_absx(),7); break; /* Unknown or illegal instruction */ default: D("Unknown instruction: %X at %04x\n", insn,pc()); retval = false; } return retval; }