void JitArm::GetCarryAndClear(ARMReg reg) { ARMReg tmp = gpr.GetReg(); Operand2 mask = Operand2(2, 2); // XER_CA_MASK LDR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER])); AND(reg, tmp, mask); BIC(tmp, tmp, mask); STR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER])); gpr.Unlock(tmp); }
void JitArm::FinalizeCarry(ARMReg reg) { ARMReg tmp = gpr.GetReg(); Operand2 mask = Operand2(2, 2); // XER_CA_MASK SetCC(CC_CS); ORR(reg, reg, mask); SetCC(); LDR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER])); ORR(tmp, tmp, reg); STR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER])); gpr.Unlock(tmp); }
void JitArm::ComputeCarry() { ARMReg tmp = gpr.GetReg(); Operand2 mask = Operand2(2, 2); // XER_CA_MASK LDR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER])); SetCC(CC_CS); ORR(tmp, tmp, mask); SetCC(CC_CC); BIC(tmp, tmp, mask); SetCC(); STR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER])); gpr.Unlock(tmp); }
void Jit::Comp_mxc1(u32 op) { CONDITIONAL_DISABLE; int fs = _FS; int rt = _RT; switch((op >> 21) & 0x1f) { case 0: // R(rt) = FI(fs); break; //mfc1 // Let's just go through RAM for now. fpr.FlushR(fs); gpr.MapReg(rt, MAP_DIRTY | MAP_NOINIT); LDR(gpr.R(rt), CTXREG, fpr.GetMipsRegOffset(fs)); return; case 2: //cfc1 if (fs == 31) { gpr.MapReg(rt, MAP_DIRTY | MAP_NOINIT); LDR(R0, CTXREG, offsetof(MIPSState, fpcond)); AND(R0,R0, Operand2(1)); // Just in case LDR(gpr.R(rt), CTXREG, offsetof(MIPSState, fcr31)); BIC(gpr.R(rt), gpr.R(rt), Operand2(0x1 << 23)); ORR(gpr.R(rt), gpr.R(rt), Operand2(R0, ST_LSL, 23)); } else if (fs == 0) { gpr.MapReg(rt, MAP_DIRTY | MAP_NOINIT); LDR(gpr.R(rt), CTXREG, offsetof(MIPSState, fcr0)); } return; case 4: //FI(fs) = R(rt); break; //mtc1 // Let's just go through RAM for now. gpr.FlushR(rt); fpr.MapReg(fs, MAP_DIRTY | MAP_NOINIT); VLDR(fpr.R(fs), CTXREG, gpr.GetMipsRegOffset(rt)); return; case 6: //ctc1 if (fs == 31) { gpr.MapReg(rt, 0); // Hardware rounding method. // Left here in case it is faster than conditional method. /* AND(R0, gpr.R(rt), Operand2(3)); // MIPS Rounding Mode <-> ARM Rounding Mode // 0, 1, 2, 3 <-> 0, 3, 1, 2 CMP(R0, Operand2(1)); SetCC(CC_EQ); ADD(R0, R0, Operand2(2)); SetCC(CC_GT); SUB(R0, R0, Operand2(1)); SetCC(CC_AL); // Load and Store RM to FPSCR VMRS(R1); BIC(R1, R1, Operand2(0x3 << 22)); ORR(R1, R1, Operand2(R0, ST_LSL, 22)); VMSR(R1); */ // Update MIPS state STR(gpr.R(rt), CTXREG, offsetof(MIPSState, fcr31)); MOV(R0, Operand2(gpr.R(rt), ST_LSR, 23)); AND(R0, R0, Operand2(1)); STR(R0, CTXREG, offsetof(MIPSState, fpcond)); } return; } }
void Jit::Comp_FPU2op(u32 op) { CONDITIONAL_DISABLE; int fs = _FS; int fd = _FD; // logBlocks = 1; switch (op & 0x3f) { case 4: //F(fd) = sqrtf(F(fs)); break; //sqrt fpr.MapDirtyIn(fd, fs); VSQRT(fpr.R(fd), fpr.R(fs)); break; case 5: //F(fd) = fabsf(F(fs)); break; //abs fpr.MapDirtyIn(fd, fs); VABS(fpr.R(fd), fpr.R(fs)); break; case 6: //F(fd) = F(fs); break; //mov fpr.MapDirtyIn(fd, fs); VMOV(fpr.R(fd), fpr.R(fs)); break; case 7: //F(fd) = -F(fs); break; //neg fpr.MapDirtyIn(fd, fs); VNEG(fpr.R(fd), fpr.R(fs)); break; case 12: //FsI(fd) = (int)floorf(F(fs)+0.5f); break; //round.w.s fpr.MapDirtyIn(fd, fs); VCVT(fpr.R(fd), fpr.R(fs), TO_INT | IS_SIGNED); break; case 13: //FsI(fd) = Rto0(F(fs))); break; //trunc.w.s fpr.MapDirtyIn(fd, fs); VCVT(fpr.R(fd), fpr.R(fs), TO_INT | IS_SIGNED | ROUND_TO_ZERO); break; case 14: //FsI(fd) = (int)ceilf (F(fs)); break; //ceil.w.s fpr.MapDirtyIn(fd, fs); MOVI2F(S0, 0.5f, R0); VADD(S0,fpr.R(fs),S0); VCVT(fpr.R(fd), S0, TO_INT | IS_SIGNED); break; case 15: //FsI(fd) = (int)floorf(F(fs)); break; //floor.w.s fpr.MapDirtyIn(fd, fs); MOVI2F(S0, 0.5f, R0); VSUB(S0,fpr.R(fs),S0); VCVT(fpr.R(fd), S0, TO_INT | IS_SIGNED); break; case 32: //F(fd) = (float)FsI(fs); break; //cvt.s.w fpr.MapDirtyIn(fd, fs); VCVT(fpr.R(fd), fpr.R(fs), TO_FLOAT | IS_SIGNED); break; case 36: //FsI(fd) = (int) F(fs); break; //cvt.w.s fpr.MapDirtyIn(fd, fs); LDR(R0, CTXREG, offsetof(MIPSState, fcr31)); AND(R0, R0, Operand2(3)); // MIPS Rounding Mode: // 0: Round nearest // 1: Round to zero // 2: Round up (ceil) // 3: Round down (floor) CMP(R0, Operand2(2)); SetCC(CC_GE); MOVI2F(S0, 0.5f, R1); SetCC(CC_GT); VSUB(S0,fpr.R(fs),S0); SetCC(CC_EQ); VADD(S0,fpr.R(fs),S0); SetCC(CC_GE); VCVT(fpr.R(fd), S0, TO_INT | IS_SIGNED); /* 2,3 */ SetCC(CC_AL); CMP(R0, Operand2(1)); SetCC(CC_EQ); VCVT(fpr.R(fd), fpr.R(fs), TO_INT | IS_SIGNED | ROUND_TO_ZERO); /* 1 */ SetCC(CC_LT); VCVT(fpr.R(fd), fpr.R(fs), TO_INT | IS_SIGNED); /* 0 */ SetCC(CC_AL); break; default: DISABLE; } }
void JitArm::fctiwx(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITFloatingPointOff) u32 b = inst.FB; u32 d = inst.FD; ARMReg vB = fpr.R0(b); ARMReg vD = fpr.R0(d); ARMReg V0 = fpr.GetReg(); ARMReg V1 = fpr.GetReg(); ARMReg V2 = fpr.GetReg(); ARMReg rA = gpr.GetReg(); ARMReg fpscrReg = gpr.GetReg(); FixupBranch DoneMax, DoneMin; LDR(fpscrReg, R9, PPCSTATE_OFF(fpscr)); MOVI2R(rA, (u32)minmaxFloat); // Check if greater than max float { VLDR(V0, rA, 8); // Load Max VCMPE(vB, V0); VMRS(_PC); // Loads in to APSR FixupBranch noException = B_CC(CC_LE); VMOV(vD, V0); // Set to max SetFPException(fpscrReg, FPSCR_VXCVI); DoneMax = B(); SetJumpTarget(noException); } // Check if less than min float { VLDR(V0, rA, 0); VCMPE(vB, V0); VMRS(_PC); FixupBranch noException = B_CC(CC_GE); VMOV(vD, V0); SetFPException(fpscrReg, FPSCR_VXCVI); DoneMin = B(); SetJumpTarget(noException); } // Within ranges, convert to integer // Set rounding mode first // PPC <-> ARM rounding modes // 0, 1, 2, 3 <-> 0, 3, 1, 2 ARMReg rB = gpr.GetReg(); VMRS(rA); // Bits 22-23 BIC(rA, rA, Operand2(3, 5)); LDR(rB, R9, PPCSTATE_OFF(fpscr)); AND(rB, rB, 0x3); // Get the FPSCR rounding bits CMP(rB, 1); SetCC(CC_EQ); // zero ORR(rA, rA, Operand2(3, 5)); SetCC(CC_NEQ); CMP(rB, 2); // +inf SetCC(CC_EQ); ORR(rA, rA, Operand2(1, 5)); SetCC(CC_NEQ); CMP(rB, 3); // -inf SetCC(CC_EQ); ORR(rA, rA, Operand2(2, 5)); SetCC(); VMSR(rA); ORR(rA, rA, Operand2(3, 5)); VCVT(vD, vB, TO_INT | IS_SIGNED); VMSR(rA); gpr.Unlock(rB); VCMPE(vD, vB); VMRS(_PC); SetCC(CC_EQ); BIC(fpscrReg, fpscrReg, FRFIMask); FixupBranch DoneEqual = B(); SetCC(); SetFPException(fpscrReg, FPSCR_XX); ORR(fpscrReg, fpscrReg, FIMask); VABS(V1, vB); VABS(V2, vD); VCMPE(V2, V1); VMRS(_PC); SetCC(CC_GT); ORR(fpscrReg, fpscrReg, FRMask); SetCC(); SetJumpTarget(DoneEqual); SetJumpTarget(DoneMax); SetJumpTarget(DoneMin); MOVI2R(rA, (u32)&doublenum); VLDR(V0, rA, 0); NEONXEmitter nemit(this); nemit.VORR(vD, vD, V0); if (inst.Rc) Helper_UpdateCR1(fpscrReg, rA); STR(fpscrReg, R9, PPCSTATE_OFF(fpscr)); gpr.Unlock(rA); gpr.Unlock(fpscrReg); fpr.Unlock(V0); fpr.Unlock(V1); fpr.Unlock(V2); }
void Jit::Comp_ITypeMemLR(MIPSOpcode op, bool load) { CONDITIONAL_DISABLE; int offset = (signed short)(op & 0xFFFF); MIPSGPReg rt = _RT; MIPSGPReg rs = _RS; int o = op >> 26; if (!js.inDelaySlot) { // Optimisation: Combine to single unaligned load/store bool isLeft = (o == 34 || o == 42); MIPSOpcode nextOp = Memory::Read_Instruction(js.compilerPC + 4); // Find a matching shift in opposite direction with opposite offset. if (nextOp == (isLeft ? (op.encoding + (4<<26) - 3) : (op.encoding - (4<<26) + 3))) { EatInstruction(nextOp); nextOp = MIPSOpcode(((load ? 35 : 43) << 26) | ((isLeft ? nextOp : op) & 0x03FFFFFF)); //lw, sw Comp_ITypeMem(nextOp); return; } } u32 iaddr = gpr.IsImm(rs) ? offset + gpr.GetImm(rs) : 0xFFFFFFFF; bool doCheck = false; FixupBranch skip; if (gpr.IsImm(rs) && Memory::IsValidAddress(iaddr)) { u32 addr = iaddr & 0x3FFFFFFF; // Need to initialize since this only loads part of the register. // But rs no longer matters (even if rs == rt) since we have the address. gpr.MapReg(rt, load ? MAP_DIRTY : 0); gpr.SetRegImm(R0, addr & ~3); u8 shift = (addr & 3) * 8; switch (o) { case 34: // lwl LDR(R0, MEMBASEREG, R0); ANDI2R(gpr.R(rt), gpr.R(rt), 0x00ffffff >> shift, SCRATCHREG2); ORR(gpr.R(rt), gpr.R(rt), Operand2(R0, ST_LSL, 24 - shift)); break; case 38: // lwr LDR(R0, MEMBASEREG, R0); ANDI2R(gpr.R(rt), gpr.R(rt), 0xffffff00 << (24 - shift), SCRATCHREG2); ORR(gpr.R(rt), gpr.R(rt), Operand2(R0, ST_LSR, shift)); break; case 42: // swl LDR(SCRATCHREG2, MEMBASEREG, R0); // Don't worry, can't use temporary. ANDI2R(SCRATCHREG2, SCRATCHREG2, 0xffffff00 << shift, R0); ORR(SCRATCHREG2, SCRATCHREG2, Operand2(gpr.R(rt), ST_LSR, 24 - shift)); STR(SCRATCHREG2, MEMBASEREG, R0); break; case 46: // swr LDR(SCRATCHREG2, MEMBASEREG, R0); // Don't worry, can't use temporary. ANDI2R(SCRATCHREG2, SCRATCHREG2, 0x00ffffff >> (24 - shift), R0); ORR(SCRATCHREG2, SCRATCHREG2, Operand2(gpr.R(rt), ST_LSL, shift)); STR(SCRATCHREG2, MEMBASEREG, R0); break; } return; }
void JitArmILAsmRoutineManager::Generate() { enterCode = GetCodePtr(); PUSH(9, R4, R5, R6, R7, R8, R9, R10, R11, _LR); // Take care to 8-byte align stack for function calls. // We are misaligned here because of an odd number of args for PUSH. // It's not like x86 where you need to account for an extra 4 bytes // consumed by CALL. SUB(_SP, _SP, 4); MOVI2R(R0, (u32)&CoreTiming::downcount); MOVI2R(R9, (u32)&PowerPC::ppcState.spr[0]); FixupBranch skipToRealDispatcher = B(); dispatcher = GetCodePtr(); printf("ILDispatcher is %p\n", dispatcher); // Downcount Check // The result of slice decrementation should be in flags if somebody jumped here // IMPORTANT - We jump on negative, not carry!!! FixupBranch bail = B_CC(CC_MI); SetJumpTarget(skipToRealDispatcher); dispatcherNoCheck = GetCodePtr(); // This block of code gets the address of the compiled block of code // It runs though to the compiling portion if it isn't found LDR(R12, R9, PPCSTATE_OFF(pc));// Load the current PC into R12 Operand2 iCacheMask = Operand2(0xE, 2); // JIT_ICACHE_MASK BIC(R12, R12, iCacheMask); // R12 contains PC & JIT_ICACHE_MASK here. MOVI2R(R14, (u32)jit->GetBlockCache()->iCache); LDR(R12, R14, R12); // R12 contains iCache[PC & JIT_ICACHE_MASK] here // R12 Confirmed this is the correct iCache Location loaded. TST(R12, 0x80); // Test to see if it is a JIT block. SetCC(CC_EQ); // Success, it is our Jitblock. MOVI2R(R14, (u32)jit->GetBlockCache()->GetCodePointers()); // LDR R14 right here to get CodePointers()[0] pointer. LSL(R12, R12, 2); // Multiply by four because address locations are u32 in size LDR(R14, R14, R12); // Load the block address in to R14 B(R14); // No need to jump anywhere after here, the block will go back to dispatcher start SetCC(); // If we get to this point, that means that we don't have the block cached to execute // So call ArmJit to compile the block and then execute it. MOVI2R(R14, (u32)&Jit); BL(R14); B(dispatcherNoCheck); // fpException() // Floating Point Exception Check, Jumped to if false fpException = GetCodePtr(); LDR(R0, R9, PPCSTATE_OFF(Exceptions)); ORR(R0, R0, EXCEPTION_FPU_UNAVAILABLE); STR(R0, R9, PPCSTATE_OFF(Exceptions)); QuickCallFunction(R14, (void*)&PowerPC::CheckExceptions); LDR(R0, R9, PPCSTATE_OFF(npc)); STR(R0, R9, PPCSTATE_OFF(pc)); B(dispatcher); SetJumpTarget(bail); doTiming = GetCodePtr(); // XXX: In JIT64, Advance() gets called /after/ the exception checking // once it jumps back to the start of outerLoop QuickCallFunction(R14, (void*)&CoreTiming::Advance); // Does exception checking testExceptions = GetCodePtr(); LDR(R0, R9, PPCSTATE_OFF(pc)); STR(R0, R9, PPCSTATE_OFF(npc)); QuickCallFunction(R14, (void*)&PowerPC::CheckExceptions); LDR(R0, R9, PPCSTATE_OFF(npc)); STR(R0, R9, PPCSTATE_OFF(pc)); // Check the state pointer to see if we are exiting // Gets checked on every exception check MOVI2R(R0, (u32)PowerPC::GetStatePtr()); MVN(R1, 0); LDR(R0, R0); TST(R0, R1); FixupBranch Exit = B_CC(CC_NEQ); B(dispatcher); SetJumpTarget(Exit); ADD(_SP, _SP, 4); POP(9, R4, R5, R6, R7, R8, R9, R10, R11, _PC); // Returns GenerateCommon(); FlushIcache(); }