void Jit::ApplyPrefixD(const u8 *vregs, VectorSize sz) { _assert_(js.prefixDFlag & ArmJitState::PREFIX_KNOWN); if (!js.prefixD) return; int n = GetNumVectorElements(sz); for (int i = 0; i < n; i++) { if (js.VfpuWriteMask(i)) continue; int sat = (js.prefixD >> (i * 2)) & 3; if (sat == 1) { // clamped = fabs(x) - fabs(x-0.5f) + 0.5f; // [ 0, 1] fpr.MapRegV(vregs[i], MAP_DIRTY); MOVI2F(S0, 0.5, R0); VABS(S1, fpr.V(vregs[i])); // S1 = fabs(x) VSUB(S2, fpr.V(vregs[i]), S0); // S2 = fabs(x-0.5f) {VABD} VABS(S2, S2); VSUB(fpr.V(vregs[i]), S1, S2); // v[i] = S1 - S2 + 0.5f VADD(fpr.V(vregs[i]), fpr.V(vregs[i]), S0); } else if (sat == 3) { // clamped = fabs(x) - fabs(x-1.0f); // [-1, 1] fpr.MapRegV(vregs[i], MAP_DIRTY); MOVI2F(S0, 1.0, R0); VABS(S1, fpr.V(vregs[i])); // S1 = fabs(x) VSUB(S2, fpr.V(vregs[i]), S0); // S2 = fabs(x-1.0f) {VABD} VABS(S2, S2); VSUB(fpr.V(vregs[i]), S1, S2); // v[i] = S1 - S2 } } }
void Jit::ApplyPrefixST(u8 *vregs, u32 prefix, VectorSize sz) { if (prefix == 0xE4) return; int n = GetNumVectorElements(sz); u8 origV[4]; static const float constantArray[8] = {0.f, 1.f, 2.f, 0.5f, 3.f, 1.f/3.f, 0.25f, 1.f/6.f}; for (int i = 0; i < n; i++) origV[i] = vregs[i]; for (int i = 0; i < n; i++) { int regnum = (prefix >> (i*2)) & 3; int abs = (prefix >> (8+i)) & 1; int negate = (prefix >> (16+i)) & 1; int constants = (prefix >> (12+i)) & 1; // Unchanged, hurray. if (!constants && regnum == i && !abs && !negate) continue; // This puts the value into a temp reg, so we won't write the modified value back. vregs[i] = fpr.GetTempV(); if (!constants) { fpr.MapDirtyInV(vregs[i], origV[regnum]); fpr.SpillLockV(vregs[i]); // Prefix may say "z, z, z, z" but if this is a pair, we force to x. // TODO: But some ops seem to use const 0 instead? if (regnum >= n) { WARN_LOG(CPU, "JIT: Invalid VFPU swizzle: %08x : %d / %d at PC = %08x (%s)", prefix, regnum, n, js.compilerPC, currentMIPS->DisasmAt(js.compilerPC)); regnum = 0; } if (abs) { VABS(fpr.V(vregs[i]), fpr.V(origV[regnum])); if (negate) VNEG(fpr.V(vregs[i]), fpr.V(vregs[i])); } else { if (negate) VNEG(fpr.V(vregs[i]), fpr.V(origV[regnum])); else VMOV(fpr.V(vregs[i]), fpr.V(origV[regnum])); } } else { fpr.MapRegV(vregs[i], MAP_DIRTY | MAP_NOINIT); fpr.SpillLockV(vregs[i]); MOVI2F(fpr.V(vregs[i]), constantArray[regnum + (abs<<2)], R0, negate); } } }
void Jit::ApplyPrefixST(u8 *vregs, u32 prefix, VectorSize sz) { if (prefix == 0xE4) return; int n = GetNumVectorElements(sz); u8 origV[4]; static const float constantArray[8] = {0.f, 1.f, 2.f, 0.5f, 3.f, 1.f/3.f, 0.25f, 1.f/6.f}; for (int i = 0; i < n; i++) origV[i] = vregs[i]; for (int i = 0; i < n; i++) { int regnum = (prefix >> (i*2)) & 3; int abs = (prefix >> (8+i)) & 1; int negate = (prefix >> (16+i)) & 1; int constants = (prefix >> (12+i)) & 1; // Unchanged, hurray. if (!constants && regnum == i && !abs && !negate) continue; // This puts the value into a temp reg, so we won't write the modified value back. vregs[i] = fpr.GetTempV(); fpr.MapRegV(vregs[i], MAP_NOINIT | MAP_DIRTY); if (!constants) { // Prefix may say "z, z, z, z" but if this is a pair, we force to x. // TODO: But some ops seem to use const 0 instead? if (regnum >= n) { ERROR_LOG_REPORT(CPU, "Invalid VFPU swizzle: %08x / %d", prefix, sz); regnum = 0; } if (abs) { VABS(fpr.V(vregs[i]), fpr.V(origV[regnum])); } else { VMOV(fpr.V(vregs[i]), fpr.V(origV[regnum])); } } else { // TODO: There is VMOV s, imm on ARM, that can generate some of these constants. Not 1/3 or 1/6 though. MOVI2F(fpr.V(vregs[i]), constantArray[regnum + (abs<<2)], R0); } // TODO: This can be integrated into the VABS / VMOV above, and also the constants. if (negate) VNEG(fpr.V(vregs[i]), fpr.V(vregs[i])); // TODO: This probably means it will swap out soon, inefficiently... fpr.ReleaseSpillLockV(vregs[i]); } }
void Jit::Comp_VVectorInit(u32 op) { CONDITIONAL_DISABLE; // WARNING: No prefix support! if (js.MayHavePrefix()) { Comp_Generic(op); js.EatPrefix(); return; } switch ((op >> 16) & 0xF) { case 6: // v=zeros; break; //vzero MOVI2F(S0, 0.0f, R0); break; case 7: // v=ones; break; //vone MOVI2F(S0, 1.0f, R0); break; default: DISABLE; break; } VectorSize sz = GetVecSize(op); int n = GetNumVectorElements(sz); u8 dregs[4]; GetVectorRegsPrefixD(dregs, sz, _VD); fpr.MapRegsV(dregs, sz, MAP_NOINIT | MAP_DIRTY); for (int i = 0; i < n; ++i) VMOV(fpr.V(dregs[i]), S0); ApplyPrefixD(dregs, sz); fpr.ReleaseSpillLocks(); }
void Jit::Comp_VVectorInit(MIPSOpcode op) { CONDITIONAL_DISABLE; // WARNING: No prefix support! if (js.HasUnknownPrefix() || disablePrefixes) { DISABLE; } switch ((op >> 16) & 0xF) { case 6: // v=zeros; break; //vzero MOVI2F(S0, 0.0f, R0); break; case 7: // v=ones; break; //vone MOVI2F(S0, 1.0f, R0); break; default: DISABLE; break; } VectorSize sz = GetVecSize(op); int n = GetNumVectorElements(sz); u8 dregs[4]; GetVectorRegsPrefixD(dregs, sz, _VD); fpr.MapRegsAndSpillLockV(dregs, sz, MAP_NOINIT | MAP_DIRTY); for (int i = 0; i < n; ++i) VMOV(fpr.V(dregs[i]), S0); ApplyPrefixD(dregs, sz); fpr.ReleaseSpillLocksAndDiscardTemps(); }
void Jit::Comp_FPU2op(u32 op) { CONDITIONAL_DISABLE; int fs = _FS; int fd = _FD; // logBlocks = 1; switch (op & 0x3f) { case 4: //F(fd) = sqrtf(F(fs)); break; //sqrt fpr.MapDirtyIn(fd, fs); VSQRT(fpr.R(fd), fpr.R(fs)); break; case 5: //F(fd) = fabsf(F(fs)); break; //abs fpr.MapDirtyIn(fd, fs); VABS(fpr.R(fd), fpr.R(fs)); break; case 6: //F(fd) = F(fs); break; //mov fpr.MapDirtyIn(fd, fs); VMOV(fpr.R(fd), fpr.R(fs)); break; case 7: //F(fd) = -F(fs); break; //neg fpr.MapDirtyIn(fd, fs); VNEG(fpr.R(fd), fpr.R(fs)); break; case 12: //FsI(fd) = (int)floorf(F(fs)+0.5f); break; //round.w.s fpr.MapDirtyIn(fd, fs); VCVT(fpr.R(fd), fpr.R(fs), TO_INT | IS_SIGNED); break; case 13: //FsI(fd) = Rto0(F(fs))); break; //trunc.w.s fpr.MapDirtyIn(fd, fs); VCVT(fpr.R(fd), fpr.R(fs), TO_INT | IS_SIGNED | ROUND_TO_ZERO); break; case 14: //FsI(fd) = (int)ceilf (F(fs)); break; //ceil.w.s fpr.MapDirtyIn(fd, fs); MOVI2F(S0, 0.5f, R0); VADD(S0,fpr.R(fs),S0); VCVT(fpr.R(fd), S0, TO_INT | IS_SIGNED); break; case 15: //FsI(fd) = (int)floorf(F(fs)); break; //floor.w.s fpr.MapDirtyIn(fd, fs); MOVI2F(S0, 0.5f, R0); VSUB(S0,fpr.R(fs),S0); VCVT(fpr.R(fd), S0, TO_INT | IS_SIGNED); break; case 32: //F(fd) = (float)FsI(fs); break; //cvt.s.w fpr.MapDirtyIn(fd, fs); VCVT(fpr.R(fd), fpr.R(fs), TO_FLOAT | IS_SIGNED); break; case 36: //FsI(fd) = (int) F(fs); break; //cvt.w.s fpr.MapDirtyIn(fd, fs); LDR(R0, CTXREG, offsetof(MIPSState, fcr31)); AND(R0, R0, Operand2(3)); // MIPS Rounding Mode: // 0: Round nearest // 1: Round to zero // 2: Round up (ceil) // 3: Round down (floor) CMP(R0, Operand2(2)); SetCC(CC_GE); MOVI2F(S0, 0.5f, R1); SetCC(CC_GT); VSUB(S0,fpr.R(fs),S0); SetCC(CC_EQ); VADD(S0,fpr.R(fs),S0); SetCC(CC_GE); VCVT(fpr.R(fd), S0, TO_INT | IS_SIGNED); /* 2,3 */ SetCC(CC_AL); CMP(R0, Operand2(1)); SetCC(CC_EQ); VCVT(fpr.R(fd), fpr.R(fs), TO_INT | IS_SIGNED | ROUND_TO_ZERO); /* 1 */ SetCC(CC_LT); VCVT(fpr.R(fd), fpr.R(fs), TO_INT | IS_SIGNED); /* 0 */ SetCC(CC_AL); break; default: DISABLE; } }
void Jit::Comp_SV(MIPSOpcode op) { CONDITIONAL_DISABLE; s32 imm = (signed short)(op&0xFFFC); int vt = ((op >> 16) & 0x1f) | ((op & 3) << 5); MIPSGPReg rs = _RS; bool doCheck = false; switch (op >> 26) { case 50: //lv.s // VI(vt) = Memory::Read_U32(addr); { // CC might be set by slow path below, so load regs first. fpr.MapRegV(vt, MAP_DIRTY | MAP_NOINIT); if (gpr.IsImm(rs)) { u32 addr = (imm + gpr.GetImm(rs)) & 0x3FFFFFFF; MOVI2R(R0, addr + (u32)Memory::base); } else { gpr.MapReg(rs); if (g_Config.bFastMemory) { SetR0ToEffectiveAddress(rs, imm); } else { SetCCAndR0ForSafeAddress(rs, imm, R1); doCheck = true; } ADD(R0, R0, R11); } #ifdef __ARM_ARCH_7S__ FixupBranch skip; if (doCheck) { skip = B_CC(CC_EQ); } VLDR(fpr.V(vt), R0, 0); if (doCheck) { SetJumpTarget(skip); SetCC(CC_AL); } #else VLDR(fpr.V(vt), R0, 0); if (doCheck) { SetCC(CC_EQ); MOVI2F(fpr.V(vt), 0.0f, R0); SetCC(CC_AL); } #endif } break; case 58: //sv.s // Memory::Write_U32(VI(vt), addr); { // CC might be set by slow path below, so load regs first. fpr.MapRegV(vt); if (gpr.IsImm(rs)) { u32 addr = (imm + gpr.GetImm(rs)) & 0x3FFFFFFF; MOVI2R(R0, addr + (u32)Memory::base); } else { gpr.MapReg(rs); if (g_Config.bFastMemory) { SetR0ToEffectiveAddress(rs, imm); } else { SetCCAndR0ForSafeAddress(rs, imm, R1); doCheck = true; } ADD(R0, R0, R11); } #ifdef __ARM_ARCH_7S__ FixupBranch skip; if (doCheck) { skip = B_CC(CC_EQ); } VSTR(fpr.V(vt), R0, 0); if (doCheck) { SetJumpTarget(skip); SetCC(CC_AL); } #else VSTR(fpr.V(vt), R0, 0); if (doCheck) { SetCC(CC_AL); } #endif } break; default: DISABLE; } }
void Jit::ApplyPrefixD(const u8 *vregs, VectorSize sz) { _assert_(js.prefixDFlag & ArmJitState::PREFIX_KNOWN); if (!js.prefixD) return; int n = GetNumVectorElements(sz); for (int i = 0; i < n; i++) { if (js.VfpuWriteMask(i)) continue; // TODO: These clampers are wrong - put this into google // and look at the plot: abs(x) - abs(x-0.5) + 0.5 // It's too steep. // Also, they mishandle NaN and Inf. int sat = (js.prefixD >> (i * 2)) & 3; if (sat == 1) { // clamped = fabs(x) - fabs(x-0.5f) + 0.5f; // [ 0, 1] fpr.MapRegV(vregs[i], MAP_DIRTY); MOVI2F(S0, 0.0f, R0); MOVI2F(S1, 1.0f, R0); VCMP(fpr.V(vregs[i]), S0); VMRS_APSR(); // Move FP flags from FPSCR to APSR (regular flags). SetCC(CC_LE); VMOV(fpr.V(vregs[i]), S0); SetCC(CC_AL); VCMP(fpr.V(vregs[i]), S1); VMRS_APSR(); // Move FP flags from FPSCR to APSR (regular flags). SetCC(CC_GT); VMOV(fpr.V(vregs[i]), S1); SetCC(CC_AL); /* VABS(S1, fpr.V(vregs[i])); // S1 = fabs(x) VSUB(fpr.V(vregs[i]), fpr.V(vregs[i]), S0); // S2 = fabs(x-0.5f) {VABD} VABS(fpr.V(vregs[i]), fpr.V(vregs[i])); VSUB(fpr.V(vregs[i]), S1, fpr.V(vregs[i])); // v[i] = S1 - S2 + 0.5f VADD(fpr.V(vregs[i]), fpr.V(vregs[i]), S0);*/ } else if (sat == 3) { fpr.MapRegV(vregs[i], MAP_DIRTY); MOVI2F(S0, -1.0f, R0); MOVI2F(S1, 1.0f, R0); VCMP(fpr.V(vregs[i]), S0); VMRS_APSR(); // Move FP flags from FPSCR to APSR (regular flags). SetCC(CC_LT); VMOV(fpr.V(vregs[i]), S0); SetCC(CC_AL); VCMP(fpr.V(vregs[i]), S1); VMRS_APSR(); // Move FP flags from FPSCR to APSR (regular flags). SetCC(CC_GT); VMOV(fpr.V(vregs[i]), S1); SetCC(CC_AL); // clamped = fabs(x) - fabs(x-1.0f); // [-1, 1] /* fpr.MapRegV(vregs[i], MAP_DIRTY); MOVI2F(S0, 1.0f, R0); VABS(S1, fpr.V(vregs[i])); // S1 = fabs(x) VSUB(fpr.V(vregs[i]), fpr.V(vregs[i]), S0); // S2 = fabs(x-1.0f) {VABD} VABS(fpr.V(vregs[i]), fpr.V(vregs[i])); VSUB(fpr.V(vregs[i]), S1, fpr.V(vregs[i])); // v[i] = S1 - S2 */ } } }
void Jit::Comp_VV2Op(u32 op) { CONDITIONAL_DISABLE; DISABLE; if (js.HasUnknownPrefix()) DISABLE; VectorSize sz = GetVecSize(op); int n = GetNumVectorElements(sz); u8 sregs[4], dregs[4]; GetVectorRegsPrefixS(sregs, sz, _VS); GetVectorRegsPrefixD(dregs, sz, _VD); ARMReg tempxregs[4]; for (int i = 0; i < n; ++i) { if (!IsOverlapSafeAllowS(dregs[i], i, n, sregs)) { int reg = fpr.GetTempV(); fpr.MapRegV(reg, MAP_NOINIT | MAP_DIRTY); fpr.SpillLockV(reg); tempxregs[i] = fpr.V(reg); } else { fpr.MapRegV(dregs[i], (dregs[i] == sregs[i] ? 0 : MAP_NOINIT) | MAP_DIRTY); fpr.SpillLockV(dregs[i]); tempxregs[i] = fpr.V(dregs[i]); } } // Warning: sregs[i] and tempxregs[i] may be the same reg. // Helps for vmov, hurts for vrcp, etc. for (int i = 0; i < n; ++i) { switch ((op >> 16) & 0x1f) { case 0: // d[i] = s[i]; break; //vmov // Probably for swizzle. VMOV(tempxregs[i], fpr.V(sregs[i])); break; case 1: // d[i] = fabsf(s[i]); break; //vabs //if (!fpr.V(sregs[i]).IsSimpleReg(tempxregs[i])) VABS(tempxregs[i], fpr.V(sregs[i])); break; case 2: // d[i] = -s[i]; break; //vneg VNEG(tempxregs[i], fpr.V(sregs[i])); break; case 4: // if (s[i] < 0) d[i] = 0; else {if(s[i] > 1.0f) d[i] = 1.0f; else d[i] = s[i];} break; // vsat0 DISABLE; break; case 5: // if (s[i] < -1.0f) d[i] = -1.0f; else {if(s[i] > 1.0f) d[i] = 1.0f; else d[i] = s[i];} break; // vsat1 DISABLE; break; case 16: // d[i] = 1.0f / s[i]; break; //vrcp MOVI2F(S0, 1.0f, R0); VDIV(tempxregs[i], S0, fpr.V(sregs[i])); break; case 17: // d[i] = 1.0f / sqrtf(s[i]); break; //vrsq MOVI2F(S0, 1.0f, R0); VSQRT(S1, fpr.V(sregs[i])); VDIV(tempxregs[i], S0, S1); break; case 18: // d[i] = sinf((float)M_PI_2 * s[i]); break; //vsin DISABLE; break; case 19: // d[i] = cosf((float)M_PI_2 * s[i]); break; //vcos DISABLE; break; case 20: // d[i] = powf(2.0f, s[i]); break; //vexp2 DISABLE; break; case 21: // d[i] = logf(s[i])/log(2.0f); break; //vlog2 DISABLE; break; case 22: // d[i] = sqrtf(s[i]); break; //vsqrt VSQRT(tempxregs[i], fpr.V(sregs[i])); VABS(tempxregs[i], tempxregs[i]); break; case 23: // d[i] = asinf(s[i] * (float)M_2_PI); break; //vasin DISABLE; break; case 24: // d[i] = -1.0f / s[i]; break; // vnrcp MOVI2F(S0, -1.0f, R0); VDIV(tempxregs[i], S0, fpr.V(sregs[i])); break; case 26: // d[i] = -sinf((float)M_PI_2 * s[i]); break; // vnsin DISABLE; break; case 28: // d[i] = 1.0f / expf(s[i] * (float)M_LOG2E); break; // vrexp2 DISABLE; break; } } fpr.MapRegsV(dregs, sz, MAP_NOINIT | MAP_DIRTY); for (int i = 0; i < n; ++i) { VMOV(fpr.V(dregs[i]), tempxregs[i]); } ApplyPrefixD(dregs, sz); fpr.ReleaseSpillLocks(); }