void recMTSAB() { if( GPR_IS_CONST1(_Rs_) ) { xMOV(ptr32[&cpuRegs.sa], ((g_cpuConstRegs[_Rs_].UL[0] & 0xF) ^ (_Imm_ & 0xF)) ); } else { _eeMoveGPRtoR(eax, _Rs_); xAND(eax, 0xF); xXOR(eax, _Imm_&0xf); xMOV(ptr[&cpuRegs.sa], eax); } }
xScopedStackFrame::xScopedStackFrame(bool base_frame, bool save_base_pointer, int offset) { m_base_frame = base_frame; m_save_base_pointer = save_base_pointer; m_offset = offset; #ifdef __x86_64__ m_offset += 8; // Call stores the return address (4 bytes) // Note rbp can surely be optimized in 64 bits if (m_base_frame) { xPUSH( rbp ); xMOV( rbp, rsp ); m_offset += 8; } else if (m_save_base_pointer) { xPUSH( rbp ); m_offset += 8; } xPUSH( rbx ); xPUSH( r12 ); xPUSH( r13 ); xPUSH( r14 ); xPUSH( r15 ); m_offset += 40; #else m_offset += 4; // Call stores the return address (4 bytes) // Create a new frame if (m_base_frame) { xPUSH( ebp ); xMOV( ebp, esp ); m_offset += 4; } else if (m_save_base_pointer) { xPUSH( ebp ); m_offset += 4; } // Save the register context xPUSH( edi ); xPUSH( esi ); xPUSH( ebx ); m_offset += 12; #endif ALIGN_STACK(-(16 - m_offset % 16)); }
// emits "setup" code for a COP0 branch test. The instruction immediately following // this should be a conditional Jump -- JZ or JNZ normally. static void _setupBranchTest() { _eeFlushAllUnused(); // COP0 branch conditionals are based on the following equation: // (((psHu16(DMAC_STAT) | ~psHu16(DMAC_PCR)) & 0x3ff) == 0x3ff) // BC0F checks if the statement is false, BC0T checks if the statement is true. // note: We only want to compare the 16 bit values of DMAC_STAT and PCR. // But using 32-bit loads here is ok (and faster), because we mask off // everything except the lower 10 bits away. xMOV(eax, ptr[(&psHu32(DMAC_PCR) )]); xMOV(ecx, 0x3ff ); // ECX is our 10-bit mask var xNOT(eax); xOR(eax, ptr[(&psHu32(DMAC_STAT) )]); xAND(eax, ecx); xCMP(eax, ecx); }
void recMFSA() { int mmreg; if (!_Rd_) return; mmreg = _checkXMMreg(XMMTYPE_GPRREG, _Rd_, MODE_WRITE); if( mmreg >= 0 ) { xMOVL.PS(xRegisterSSE(mmreg), ptr[&cpuRegs.sa]); } else if( (mmreg = _checkMMXreg(MMX_GPR+_Rd_, MODE_WRITE)) >= 0 ) { xMOVDZX(xRegisterMMX(mmreg), ptr[&cpuRegs.sa]); SetMMXstate(); } else { xMOV(eax, ptr[&cpuRegs.sa]); _deleteEEreg(_Rd_, 0); xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[0]], eax); xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].UL[1]], 0); } }
void recDI() { //// No need to branch after disabling interrupts... //iFlushCall(0); //xMOV(eax, ptr[&cpuRegs.cycle ]); //xMOV(ptr[&g_nextBranchCycle], eax); //xFastCall((void*)(uptr)Interp::DI ); xMOV(eax, ptr[&cpuRegs.CP0.n.Status]); xTEST(eax, 0x20006); // EXL | ERL | EDI xForwardJNZ8 iHaveNoIdea; xTEST(eax, 0x18); // KSU xForwardJNZ8 inUserMode; iHaveNoIdea.SetTarget(); xAND(eax, ~(u32)0x10000); // EIE xMOV(ptr[&cpuRegs.CP0.n.Status], eax); inUserMode.SetTarget(); }
// SA is 4-bit and contains the amount of bytes to shift void recMTSA() { if( GPR_IS_CONST1(_Rs_) ) { xMOV(ptr32[&cpuRegs.sa], g_cpuConstRegs[_Rs_].UL[0] & 0xf ); } else { int mmreg; if( (mmreg = _checkXMMreg(XMMTYPE_GPRREG, _Rs_, MODE_READ)) >= 0 ) { xMOVSS(ptr[&cpuRegs.sa], xRegisterSSE(mmreg)); } else if( (mmreg = _checkMMXreg(MMX_GPR+_Rs_, MODE_READ)) >= 0 ) { xMOVD(ptr[&cpuRegs.sa], xRegisterMMX(mmreg)); SetMMXstate(); } else { xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]); xMOV(ptr[&cpuRegs.sa], eax); } xAND(ptr32[&cpuRegs.sa], 0xf); } }
void recJAL() { u32 newpc = (_Target_ << 2) + ( pc & 0xf0000000 ); _deleteEEreg(31, 0); if(EE_CONST_PROP) { GPR_SET_CONST(31); g_cpuConstRegs[31].UL[0] = pc + 4; g_cpuConstRegs[31].UL[1] = 0; } else { xMOV(ptr32[&cpuRegs.GPR.r[31].UL[0]], pc + 4); xMOV(ptr32[&cpuRegs.GPR.r[31].UL[1]], 0); } recompileNextInstruction(1); if (EmuConfig.Gamefixes.GoemonTlbHack) SetBranchImm(vtlb_V2P(newpc)); else SetBranchImm(newpc); }
// ------------------------------------------------------------------------ // void DynGen_DivStallUpdate( int newstall, const xRegister32& tempreg=eax ) { // DivUnit Stalling occurs any time the current instruction has a non-zero // DivStall value. Otherwise we just increment internal cycle counters // (which essentially behave as const-optimizations, and are written to // memory only when needed). if( newstall != 0 ) { // Inline version: /*xMOV( tempreg, &iopRegs.DivUnitCycles ); xSUB( tempreg, ir.DivUnit_GetCycleAccum() ); xForwardJS8 skipStall; xSUB( &iopRegs.evtCycleCountdown, tempreg ); skipStall.SetTarget();*/ xMOV( ptr32[&iopRegs.DivUnitCycles], newstall ); } }
void recMTC0() { if( GPR_IS_CONST1(_Rt_) ) { switch (_Rd_) { case 12: iFlushCall(FLUSH_INTERPRETER); xFastCall(WriteCP0Status, g_cpuConstRegs[_Rt_].UL[0] ); break; case 9: xMOV(ecx, ptr[&cpuRegs.cycle]); xMOV(ptr[&s_iLastCOP0Cycle], ecx); xMOV(ptr32[&cpuRegs.CP0.r[9]], g_cpuConstRegs[_Rt_].UL[0]); break; case 25: switch(_Imm_ & 0x3F) { case 0: iFlushCall(FLUSH_INTERPRETER); xFastCall(COP0_UpdatePCCR ); xMOV( ptr32[&cpuRegs.PERF.n.pccr], g_cpuConstRegs[_Rt_].UL[0] ); xFastCall(COP0_DiagnosticPCCR ); break; case 1: xMOV(eax, ptr[&cpuRegs.cycle]); xMOV(ptr32[&cpuRegs.PERF.n.pcr0], g_cpuConstRegs[_Rt_].UL[0]); xMOV(ptr[&s_iLastPERFCycle[0]], eax); break; case 3: xMOV(eax, ptr[&cpuRegs.cycle]); xMOV(ptr32[&cpuRegs.PERF.n.pcr1], g_cpuConstRegs[_Rt_].UL[0]); xMOV(ptr[&s_iLastPERFCycle[1]], eax); break; } break; case 24: COP0_LOG("MTC0 Breakpoint debug Registers code = %x\n", cpuRegs.code & 0x3FF); break; default: xMOV(ptr32[&cpuRegs.CP0.r[_Rd_]], g_cpuConstRegs[_Rt_].UL[0]); break; } } else { switch (_Rd_) { case 12: iFlushCall(FLUSH_INTERPRETER); _eeMoveGPRtoR(ecx, _Rt_); xFastCall(WriteCP0Status, ecx ); break; case 9: xMOV(ecx, ptr[&cpuRegs.cycle]); _eeMoveGPRtoM((uptr)&cpuRegs.CP0.r[9], _Rt_); xMOV(ptr[&s_iLastCOP0Cycle], ecx); break; case 25: switch(_Imm_ & 0x3F) { case 0: iFlushCall(FLUSH_INTERPRETER); xFastCall(COP0_UpdatePCCR ); _eeMoveGPRtoM((uptr)&cpuRegs.PERF.n.pccr, _Rt_); xFastCall(COP0_DiagnosticPCCR ); break; case 1: xMOV(ecx, ptr[&cpuRegs.cycle]); _eeMoveGPRtoM((uptr)&cpuRegs.PERF.n.pcr0, _Rt_); xMOV(ptr[&s_iLastPERFCycle[0]], ecx); break; case 3: xMOV(ecx, ptr[&cpuRegs.cycle]); _eeMoveGPRtoM((uptr)&cpuRegs.PERF.n.pcr1, _Rt_); xMOV(ptr[&s_iLastPERFCycle[1]], ecx); break; } break; case 24: COP0_LOG("MTC0 Breakpoint debug Registers code = %x\n", cpuRegs.code & 0x3FF); break; default: _eeMoveGPRtoM((uptr)&cpuRegs.CP0.r[_Rd_], _Rt_); break; } } }
void recMFC0() { if( _Rd_ == 9 ) { // This case needs to be handled even if the write-back is ignored (_Rt_ == 0 ) xMOV(ecx, ptr[&cpuRegs.cycle]); xMOV(eax, ecx); xSUB(eax, ptr[&s_iLastCOP0Cycle]); u8* skipInc = JNZ8( 0 ); xINC(eax); x86SetJ8( skipInc ); xADD(ptr[&cpuRegs.CP0.n.Count], eax); xMOV(ptr[&s_iLastCOP0Cycle], ecx); xMOV(eax, ptr[&cpuRegs.CP0.r[ _Rd_ ] ]); if( !_Rt_ ) return; _deleteEEreg(_Rt_, 0); xMOV(ptr[&cpuRegs.GPR.r[_Rt_].UL[0]], eax); xCDQ(); xMOV(ptr[&cpuRegs.GPR.r[_Rt_].UL[1]], edx); return; } if ( !_Rt_ ) return; if( _Rd_ == 25 ) { switch(_Imm_ & 0x3F) { case 0: xMOV(eax, ptr[&cpuRegs.PERF.n.pccr]); break; case 1: iFlushCall(FLUSH_INTERPRETER); xFastCall(COP0_UpdatePCCR ); xMOV(eax, ptr[&cpuRegs.PERF.n.pcr0]); break; case 3: iFlushCall(FLUSH_INTERPRETER); xFastCall(COP0_UpdatePCCR ); xMOV(eax, ptr[&cpuRegs.PERF.n.pcr1]); break; } _deleteEEreg(_Rt_, 0); xMOV(ptr[&cpuRegs.GPR.r[_Rt_].UL[0]], eax); xCDQ(); xMOV(ptr[&cpuRegs.GPR.r[_Rt_].UL[1]], edx); return; } else if(_Rd_ == 24){ COP0_LOG("MFC0 Breakpoint debug Registers code = %x\n", cpuRegs.code & 0x3FF); return; } _eeOnWriteReg(_Rt_, 1); _deleteEEreg(_Rt_, 0); xMOV(eax, ptr[&cpuRegs.CP0.r[ _Rd_ ]]); xCDQ(); xMOV(ptr[&cpuRegs.GPR.r[_Rt_].UL[0]], eax); xMOV(ptr[&cpuRegs.GPR.r[_Rt_].UL[1]], edx); }
// ------------------------------------------------------------------------ // Internal implementation of EmitSibMagic which has been custom tailored // to optimize special forms of the Lea instructions accordingly, such // as when a LEA can be replaced with a "MOV reg,imm" or "MOV reg,reg". // // preserve_flags - set to ture to disable use of SHL on [Index*Base] forms // of LEA, which alters flags states. // static void EmitLeaMagic( const xRegisterInt& to, const xIndirectVoid& src, bool preserve_flags ) { int displacement_size = (src.Displacement == 0) ? 0 : ( ( src.IsByteSizeDisp() ) ? 1 : 2 ); // See EmitSibMagic for commenting on SIB encoding. if( !NeedsSibMagic( src ) ) { // LEA Land: means we have either 1-register encoding or just an offset. // offset is encodable as an immediate MOV, and a register is encodable // as a register MOV. if( src.Index.IsEmpty() ) { xMOV( to, src.Displacement ); return; } else if( displacement_size == 0 ) { _xMovRtoR( to, src.Index ); return; } else { if( !preserve_flags ) { // encode as MOV and ADD combo. Make sure to use the immediate on the // ADD since it can encode as an 8-bit sign-extended value. _xMovRtoR( to, src.Index ); xADD( to, src.Displacement ); return; } else { // note: no need to do ebp+0 check since we encode all 0 displacements as // register assignments above (via MOV) xWrite8( 0x8d ); ModRM( displacement_size, to.Id, src.Index.Id ); } } } else { if( src.Base.IsEmpty() ) { if( !preserve_flags && (displacement_size == 0) ) { // Encode [Index*Scale] as a combination of Mov and Shl. // This is more efficient because of the bloated LEA format which requires // a 32 bit displacement, and the compact nature of the alternative. // // (this does not apply to older model P4s with the broken barrel shifter, // but we currently aren't optimizing for that target anyway). _xMovRtoR( to, src.Index ); xSHL( to, src.Scale ); return; } xWrite8( 0x8d ); ModRM( 0, to.Id, ModRm_UseSib ); SibSB( src.Scale, src.Index.Id, ModRm_UseDisp32 ); xWrite32( src.Displacement ); return; } else { if( src.Scale == 0 ) { if( !preserve_flags ) { if( src.Index == esp ) { // ESP is not encodable as an index (ix86 ignores it), thus: _xMovRtoR( to, src.Base ); // will do the trick! if( src.Displacement ) xADD( to, src.Displacement ); return; } else if( src.Displacement == 0 ) { _xMovRtoR( to, src.Base ); _g1_EmitOp( G1Type_ADD, to, src.Index ); return; } } else if( (src.Index == esp) && (src.Displacement == 0) ) { // special case handling of ESP as Index, which is replaceable with // a single MOV even when preserve_flags is set! :D _xMovRtoR( to, src.Base ); return; } } if( src.Base == ebp && displacement_size == 0 ) displacement_size = 1; // forces [ebp] to be encoded as [ebp+0]! xWrite8( 0x8d ); ModRM( displacement_size, to.Id, ModRm_UseSib ); SibSB( src.Scale, src.Index.Id, src.Base.Id ); } } if( displacement_size != 0 ) { if( displacement_size == 1 ) xWrite<s8>( src.Displacement ); else xWrite<s32>( src.Displacement ); } }
void recJALR() { int newpc = pc + 4; _allocX86reg(esi, X86TYPE_PCWRITEBACK, 0, MODE_WRITE); _eeMoveGPRtoR(esi, _Rs_); if (EmuConfig.Gamefixes.GoemonTlbHack) { xMOV(ecx, esi); vtlb_DynV2P(); xMOV(esi, eax); } // uncomment when there are NO instructions that need to call interpreter // int mmreg; // if( GPR_IS_CONST1(_Rs_) ) // xMOV(ptr32[&cpuRegs.pc], g_cpuConstRegs[_Rs_].UL[0] ); // else { // int mmreg; // // if( (mmreg = _checkXMMreg(XMMTYPE_GPRREG, _Rs_, MODE_READ)) >= 0 ) { // xMOVSS(ptr[&cpuRegs.pc], xRegisterSSE(mmreg)); // } // else if( (mmreg = _checkMMXreg(MMX_GPR+_Rs_, MODE_READ)) >= 0 ) { // xMOVD(ptr[&cpuRegs.pc], xRegisterMMX(mmreg)); // SetMMXstate(); // } // else { // xMOV(eax, ptr[(void*)((int)&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ] )]); // xMOV(ptr[&cpuRegs.pc], eax); // } // } if ( _Rd_ ) { _deleteEEreg(_Rd_, 0); if(EE_CONST_PROP) { GPR_SET_CONST(_Rd_); g_cpuConstRegs[_Rd_].UL[0] = newpc; g_cpuConstRegs[_Rd_].UL[1] = 0; } else { xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].UL[0]], newpc); xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].UL[1]], 0); } } _clearNeededMMXregs(); _clearNeededXMMregs(); recompileNextInstruction(1); if( x86regs[esi.GetId()].inuse ) { pxAssert( x86regs[esi.GetId()].type == X86TYPE_PCWRITEBACK ); xMOV(ptr[&cpuRegs.pc], esi); x86regs[esi.GetId()].inuse = 0; } else { xMOV(eax, ptr[&g_recWriteback]); xMOV(ptr[&cpuRegs.pc], eax); } SetBranchReg(0xffffffff); }