void MacroAssemblerX86::convertUInt64ToDouble(Register64 src, Register temp, FloatRegister dest) { // SUBPD needs SSE2, HADDPD needs SSE3. if (!HasSSE3()) { convertUInt32ToDouble(src.high, dest); movePtr(ImmPtr(&TO_DOUBLE_HIGH_SCALE), temp); loadDouble(Address(temp, 0), ScratchDoubleReg); asMasm().mulDouble(ScratchDoubleReg, dest); convertUInt32ToDouble(src.low, ScratchDoubleReg); asMasm().addDouble(ScratchDoubleReg, dest); return; } // Following operation uses entire 128-bit of dest XMM register. // Currently higher 64-bit is free when we have access to lower 64-bit. MOZ_ASSERT(dest.size() == 8); FloatRegister dest128 = FloatRegister(dest.encoding(), FloatRegisters::Simd128); // Assume that src is represented as following: // src = 0x HHHHHHHH LLLLLLLL // Move src to dest (=dest128) and ScratchInt32x4Reg (=scratch): // dest = 0x 00000000 00000000 00000000 LLLLLLLL // scratch = 0x 00000000 00000000 00000000 HHHHHHHH vmovd(src.low, dest128); vmovd(src.high, ScratchSimd128Reg); // Unpack and interleave dest and scratch to dest: // dest = 0x 00000000 00000000 HHHHHHHH LLLLLLLL vpunpckldq(ScratchSimd128Reg, dest128, dest128); // Unpack and interleave dest and a constant C1 to dest: // C1 = 0x 00000000 00000000 45300000 43300000 // dest = 0x 45300000 HHHHHHHH 43300000 LLLLLLLL // here, each 64-bit part of dest represents following double: // HI(dest) = 0x 1.00000HHHHHHHH * 2**84 == 2**84 + 0x HHHHHHHH 00000000 // LO(dest) = 0x 1.00000LLLLLLLL * 2**52 == 2**52 + 0x 00000000 LLLLLLLL movePtr(ImmPtr(TO_DOUBLE), temp); vpunpckldq(Operand(temp, 0), dest128, dest128); // Subtract a constant C2 from dest, for each 64-bit part: // C2 = 0x 45300000 00000000 43300000 00000000 // here, each 64-bit part of C2 represents following double: // HI(C2) = 0x 1.0000000000000 * 2**84 == 2**84 // LO(C2) = 0x 1.0000000000000 * 2**52 == 2**52 // after the operation each 64-bit part of dest represents following: // HI(dest) = double(0x HHHHHHHH 00000000) // LO(dest) = double(0x 00000000 LLLLLLLL) vsubpd(Operand(temp, sizeof(uint64_t) * 2), dest128, dest128); // Add HI(dest) and LO(dest) in double and store it into LO(dest), // LO(dest) = double(0x HHHHHHHH 00000000) + double(0x 00000000 LLLLLLLL) // = double(0x HHHHHHHH LLLLLLLL) // = double(src) vhaddpd(dest128, dest128); }
void MacroAssembler::PopRegsInMaskIgnore(LiveRegisterSet set, LiveRegisterSet ignore) { FloatRegisterSet fpuSet(set.fpus().reduceSetForPush()); unsigned numFpu = fpuSet.size(); int32_t diffG = set.gprs().size() * sizeof(intptr_t); int32_t diffF = fpuSet.getPushSizeInBytes(); const int32_t reservedG = diffG; const int32_t reservedF = diffF; for (FloatRegisterBackwardIterator iter(fpuSet); iter.more(); iter++) { FloatRegister reg = *iter; diffF -= reg.size(); numFpu -= 1; if (ignore.has(reg)) continue; Address spillAddress(StackPointer, diffF); if (reg.isDouble()) loadDouble(spillAddress, reg); else if (reg.isSingle()) loadFloat32(spillAddress, reg); else if (reg.isInt32x4()) loadUnalignedInt32x4(spillAddress, reg); else if (reg.isFloat32x4()) loadUnalignedFloat32x4(spillAddress, reg); else MOZ_CRASH("Unknown register type."); } freeStack(reservedF); MOZ_ASSERT(numFpu == 0); // x64 padding to keep the stack aligned on uintptr_t. Keep in sync with // GetPushBytesInSize. diffF -= diffF % sizeof(uintptr_t); MOZ_ASSERT(diffF == 0); // On x86, use pop to pop the integer registers, if we're not going to // ignore any slots, as it's fast on modern hardware and it's a small // instruction. if (ignore.emptyGeneral()) { for (GeneralRegisterForwardIterator iter(set.gprs()); iter.more(); iter++) { diffG -= sizeof(intptr_t); Pop(*iter); } } else { for (GeneralRegisterBackwardIterator iter(set.gprs()); iter.more(); iter++) { diffG -= sizeof(intptr_t); if (!ignore.has(*iter)) loadPtr(Address(StackPointer, diffG), *iter); } freeStack(reservedG); } MOZ_ASSERT(diffG == 0); }
void MacroAssembler::storeRegsInMask(LiveRegisterSet set, Address dest, Register) { FloatRegisterSet fpuSet(set.fpus().reduceSetForPush()); unsigned numFpu = fpuSet.size(); int32_t diffF = fpuSet.getPushSizeInBytes(); int32_t diffG = set.gprs().size() * sizeof(intptr_t); MOZ_ASSERT(dest.offset >= diffG + diffF); for (GeneralRegisterBackwardIterator iter(set.gprs()); iter.more(); ++iter) { diffG -= sizeof(intptr_t); dest.offset -= sizeof(intptr_t); storePtr(*iter, dest); } MOZ_ASSERT(diffG == 0); for (FloatRegisterBackwardIterator iter(fpuSet); iter.more(); ++iter) { FloatRegister reg = *iter; diffF -= reg.size(); numFpu -= 1; dest.offset -= reg.size(); if (reg.isDouble()) storeDouble(reg, dest); else if (reg.isSingle()) storeFloat32(reg, dest); else if (reg.isSimd128()) storeUnalignedSimd128Float(reg, dest); else MOZ_CRASH("Unknown register type."); } MOZ_ASSERT(numFpu == 0); // x64 padding to keep the stack aligned on uintptr_t. Keep in sync with // GetPushBytesInSize. diffF -= diffF % sizeof(uintptr_t); MOZ_ASSERT(diffF == 0); }
void MacroAssembler::PushRegsInMask(LiveRegisterSet set) { FloatRegisterSet fpuSet(set.fpus().reduceSetForPush()); unsigned numFpu = fpuSet.size(); int32_t diffF = fpuSet.getPushSizeInBytes(); int32_t diffG = set.gprs().size() * sizeof(intptr_t); // On x86, always use push to push the integer registers, as it's fast // on modern hardware and it's a small instruction. for (GeneralRegisterBackwardIterator iter(set.gprs()); iter.more(); iter++) { diffG -= sizeof(intptr_t); Push(*iter); } MOZ_ASSERT(diffG == 0); reserveStack(diffF); for (FloatRegisterBackwardIterator iter(fpuSet); iter.more(); iter++) { FloatRegister reg = *iter; diffF -= reg.size(); numFpu -= 1; Address spillAddress(StackPointer, diffF); if (reg.isDouble()) storeDouble(reg, spillAddress); else if (reg.isSingle()) storeFloat32(reg, spillAddress); else if (reg.isInt32x4()) storeUnalignedInt32x4(reg, spillAddress); else if (reg.isFloat32x4()) storeUnalignedFloat32x4(reg, spillAddress); else MOZ_CRASH("Unknown register type."); } MOZ_ASSERT(numFpu == 0); // x64 padding to keep the stack aligned on uintptr_t. Keep in sync with // GetPushBytesInSize. diffF -= diffF % sizeof(uintptr_t); MOZ_ASSERT(diffF == 0); }