sIntBasePtr ShGcInt::ifelse(sIntBasePtr& a, sIntBasePtr & ifTrue, sIntBasePtr & ifFalse) { auto aa = getMemory(a); auto tt = getMemory(ifTrue); auto ff = getMemory(ifFalse); auto ret(new ShGcInt(mRt, tt->size())); sIntBasePtr rret(ret); ShGc::CircuitItem workItem; workItem.mInputBundleCount = 3; workItem.mLabels.resize(4); workItem.mLabels[0] = tt; workItem.mLabels[1] = ff; workItem.mLabels[2] = aa; workItem.mLabels[3] = ret->mLabels; if (workItem.mLabels[0]->size() != workItem.mLabels[1]->size() || workItem.mLabels[0]->size() != workItem.mLabels[3]->size()) throw std::runtime_error("IfElse must be performed with variables of the same bit length. " LOCATION); if (workItem.mLabels[2]->size()!= 1) throw std::runtime_error(LOCATION); workItem.mCircuit = mRt.mLibrary.int_int_multiplex(workItem.mLabels[0]->size()); mRt.enqueue(std::move(workItem)); return rret; }
TCA emitCallToExit(CodeBlock& cb, DataBlock& data, const UniqueStubs& us) { ppc64_asm::Assembler a { cb }; auto const start = a.frontier(); if (RuntimeOption::EvalHHIRGenerateAsserts) { vwrap(cb, data, [&] (Vout& v) { // Not doing it directly as rret(0) == rarg(0) on ppc64 Vreg ret_addr = v.makeReg(); // exittc address pushed on calltc/resumetc. v << copy{rsp(), ret_addr}; // We need to spill the return registers around the assert call. v << push{rret(0)}; v << push{rret(1)}; v << copy{ret_addr, rarg(0)}; v << call{TCA(assert_tc_saved_rip), RegSet(rarg(0))}; v << pop{rret(1)}; v << pop{rret(0)}; }); } // Discard the exittc address pushed on calltc/resumetc for balancing the // stack next. a.addi(rsp(), rsp(), 8); // Reinitialize r1 for the external code found after enterTCExit's stubret a.addi(rsfp(), rsp(), 8); // r31 should have the same value as caller's r1. Loading it soon on stubret. // (this corrupts the backchain, but it's not relevant as this frame will be // destroyed soon) a.std(rsfp(), rsp()[8]); // Emulate a ret to enterTCExit without actually doing one to avoid // unbalancing the return stack buffer. a.branchAuto(TCA(mcg->ustubs().enterTCExit)); return start; }
TCA emitCallToExit(CodeBlock& cb, DataBlock& data, const UniqueStubs& /*us*/) { ppc64_asm::Assembler a { cb }; auto const start = a.frontier(); if (RuntimeOption::EvalHHIRGenerateAsserts) { vwrap(cb, data, [&] (Vout& v) { // Not doing it directly as rret(0) == rarg(0) on ppc64 Vreg ret_addr = v.makeReg(); // exittc address pushed on calltc/resumetc. v << copy{rsp(), ret_addr}; // We need to spill the return registers around the assert call. v << push{rret(0)}; v << push{rret(1)}; v << copy{ret_addr, rarg(0)}; v << call{TCA(assert_tc_saved_rip), RegSet(rarg(0))}; v << pop{rret(1)}; v << pop{rret(0)}; }); } // Discard the exittc address pushed on calltc/resumetc for balancing the // stack next. a.addi(rsp(), rsp(), 8); // Reinitialize r1 for the external code found after enterTCExit's stubret a.addi(rsfp(), rsp(), 8); // Restore the rvmfp when leaving the VM, which must be the same of rsfp. a.mr(rvmfp(), rsfp()); // Emulate a ret to enterTCExit without actually doing one to avoid // unbalancing the return stack buffer. a.branchAuto(TCA(tc::ustubs().enterTCExit)); return start; }
void lower_vcall(Vunit& unit, Inst& inst, Vlabel b, size_t i) { auto& blocks = unit.blocks; auto const& vinstr = blocks[b].code[i]; auto const is_vcall = vinstr.op == Vinstr::vcall; auto const vcall = vinstr.vcall_; auto const vinvoke = vinstr.vinvoke_; // We lower vinvoke in two phases, and `inst' is overwritten after the first // phase. We need to save any of its parameters that we care about in the // second phase ahead of time. auto const& vargs = unit.vcallArgs[inst.args]; auto const dests = unit.tuples[inst.d]; auto const destType = inst.destType; auto const scratch = unit.makeScratchBlock(); SCOPE_EXIT { unit.freeScratchBlock(scratch); }; Vout v(unit, scratch, vinstr.origin); int32_t const adjust = (vargs.stkArgs.size() & 0x1) ? sizeof(uintptr_t) : 0; if (adjust) v << lea{rsp()[-adjust], rsp()}; // Push stack arguments, in reverse order. for (int i = vargs.stkArgs.size() - 1; i >= 0; --i) { v << push{vargs.stkArgs[i]}; } // Get the arguments in the proper registers. RegSet argRegs; bool needsCopy = false; auto doArgs = [&] (const VregList& srcs, PhysReg (*r)(size_t)) { VregList argDests; for (size_t i = 0, n = srcs.size(); i < n; ++i) { auto const reg = r(i); argDests.push_back(reg); argRegs |= reg; } if (argDests.size()) { v << copyargs{v.makeTuple(srcs), v.makeTuple(std::move(argDests))}; } }; switch (arch()) { case Arch::X64: case Arch::PPC64: doArgs(vargs.args, rarg); break; case Arch::ARM: if (vargs.indirect) { if (vargs.args.size() > 0) { // First arg is a pointer to storage for the return value. v << copy{vargs.args[0], rret_indirect()}; VregList rem(vargs.args.begin() + 1, vargs.args.end()); doArgs(rem, rarg); needsCopy = true; } } else { doArgs(vargs.args, rarg); } } doArgs(vargs.simdArgs, rarg_simd); // Emit the appropriate call instruction sequence. emitCall(v, inst.call, argRegs); // Handle fixup and unwind information. if (inst.fixup.isValid()) { v << syncpoint{inst.fixup}; } if (!is_vcall) { auto& targets = vinvoke.targets; v << unwind{{targets[0], targets[1]}}; // Insert an lea fixup for any stack args at the beginning of the catch // block. if (auto rspOffset = ((vargs.stkArgs.size() + 1) & ~1) * sizeof(uintptr_t)) { auto& taken = unit.blocks[targets[1]].code; assertx(taken.front().op == Vinstr::landingpad || taken.front().op == Vinstr::jmp); Vinstr vi { lea{rsp()[rspOffset], rsp()} }; vi.origin = taken.front().origin; if (taken.front().op == Vinstr::jmp) { taken.insert(taken.begin(), vi); } else { taken.insert(taken.begin() + 1, vi); } } // Write out the code so far to the end of b. Remaining code will be // emitted to the next block. vector_splice(blocks[b].code, i, 1, blocks[scratch].code); } else if (vcall.nothrow) { v << nothrow{}; } // Copy back the indirect result pointer into the return register. if (needsCopy) { v << copy{rret_indirect(), rret(0)}; } // For vinvoke, `inst' is no longer valid after this point. // Copy the call result to the destination register(s). switch (destType) { case DestType::TV: static_assert(offsetof(TypedValue, m_data) == 0, ""); static_assert(offsetof(TypedValue, m_type) == 8, ""); if (dests.size() == 2) { v << copy2{rret(0), rret(1), dests[0], dests[1]}; } else { // We have cases where we statically know the type but need the value // from native call. Even if the type does not really need a register // (e.g., InitNull), a Vreg is still allocated in assignRegs(), so the // following assertion holds. assertx(dests.size() == 1); v << copy{rret(0), dests[0]}; } break; case DestType::SIMD: static_assert(offsetof(TypedValue, m_data) == 0, ""); static_assert(offsetof(TypedValue, m_type) == 8, ""); assertx(dests.size() == 1); pack2(v, rret(0), rret(1), dests[0]); break; case DestType::SSA: case DestType::Byte: assertx(dests.size() == 1); assertx(dests[0].isValid()); // Copy the single-register result to dests[0]. v << copy{rret(0), dests[0]}; break; case DestType::Dbl: // Copy the single-register result to dests[0]. assertx(dests.size() == 1); assertx(dests[0].isValid()); v << copy{rret_simd(0), dests[0]}; break; case DestType::None: assertx(dests.empty()); break; } if (vargs.stkArgs.size() > 0) { auto const delta = safe_cast<int32_t>( vargs.stkArgs.size() * sizeof(uintptr_t) + adjust ); v << lea{rsp()[delta], rsp()}; } // Insert new instructions to the appropriate block. if (is_vcall) { vector_splice(blocks[b].code, i, 1, blocks[scratch].code); } else { vector_splice(blocks[vinvoke.targets[0]].code, 0, 0, blocks[scratch].code); } }
void lower_vcall(Vunit& unit, Inst& inst, Vlabel b, size_t i) { auto& blocks = unit.blocks; auto const& vinstr = blocks[b].code[i]; auto const is_vcall = vinstr.op == Vinstr::vcall; auto const vcall = vinstr.vcall_; auto const vinvoke = vinstr.vinvoke_; // We lower vinvoke in two phases, and `inst' is overwritten after the first // phase. We need to save any of its parameters that we care about in the // second phase ahead of time. auto const& vargs = unit.vcallArgs[inst.args]; auto const dests = unit.tuples[inst.d]; auto const destType = inst.destType; auto const scratch = unit.makeScratchBlock(); SCOPE_EXIT { unit.freeScratchBlock(scratch); }; Vout v(unit, scratch, vinstr.irctx()); // Push stack arguments, in reverse order. Push in pairs without padding // except for the last argument (pushed first) which should be padded if // there are an odd number of arguments. auto numArgs = vargs.stkArgs.size(); int32_t const adjust = (numArgs & 0x1) ? sizeof(uintptr_t) : 0; if (adjust) { // Using InvalidReg below fails SSA checks and simplify pass, so just // push the arg twice. It's on the same cacheline and will actually // perform faster than an explicit lea. v << pushp{vargs.stkArgs[numArgs - 1], vargs.stkArgs[numArgs - 1]}; --numArgs; } for (auto i2 = numArgs; i2 >= 2; i2 -= 2) { v << pushp{vargs.stkArgs[i2 - 1], vargs.stkArgs[i2 - 2]}; } // Get the arguments in the proper registers. RegSet argRegs; auto doArgs = [&] (const VregList& srcs, PhysReg (*r)(size_t)) { VregList argDests; for (size_t i2 = 0, n = srcs.size(); i2 < n; ++i2) { auto const reg = r(i2); argDests.push_back(reg); argRegs |= reg; } if (argDests.size()) { v << copyargs{v.makeTuple(srcs), v.makeTuple(std::move(argDests))}; } }; doArgs(vargs.indRetArgs, rarg_ind_ret); doArgs(vargs.args, rarg); doArgs(vargs.simdArgs, rarg_simd); // Emit the appropriate call instruction sequence. emitCall(v, inst.call, argRegs); // Handle fixup and unwind information. if (inst.fixup.isValid()) { v << syncpoint{inst.fixup}; } if (!is_vcall) { auto& targets = vinvoke.targets; v << unwind{{targets[0], targets[1]}}; // Insert an lea fixup for any stack args at the beginning of the catch // block. if (auto rspOffset = ((vargs.stkArgs.size() + 1) & ~1) * sizeof(uintptr_t)) { auto& taken = unit.blocks[targets[1]].code; assertx(taken.front().op == Vinstr::landingpad || taken.front().op == Vinstr::jmp); Vinstr vi { lea{rsp()[rspOffset], rsp()}, taken.front().irctx() }; if (taken.front().op == Vinstr::jmp) { taken.insert(taken.begin(), vi); } else { taken.insert(taken.begin() + 1, vi); } } // Write out the code so far to the end of b. Remaining code will be // emitted to the next block. vector_splice(blocks[b].code, i, 1, blocks[scratch].code); } else if (vcall.nothrow) { v << nothrow{}; } // For vinvoke, `inst' is no longer valid after this point. // Copy the call result to the destination register(s). switch (destType) { case DestType::TV: static_assert(offsetof(TypedValue, m_data) == 0, ""); static_assert(offsetof(TypedValue, m_type) == 8, ""); if (dests.size() == 2) { switch (arch()) { case Arch::X64: // fall through case Arch::PPC64: v << copy2{rret(0), rret(1), dests[0], dests[1]}; break; case Arch::ARM: // For ARM64 we need to clear the bits 8..31 from the type value. // That allows us to use the resulting register values in // type comparisons without the need for truncation there. // We must not touch bits 63..32 as they contain the AUX data. v << copy{rret(0), dests[0]}; v << andq{v.cns(0xffffffff000000ff), rret(1), dests[1], v.makeReg()}; break; } } else { // We have cases where we statically know the type but need the value // from native call. Even if the type does not really need a register // (e.g., InitNull), a Vreg is still allocated in assignRegs(), so the // following assertion holds. assertx(dests.size() == 1); v << copy{rret(0), dests[0]}; } break; case DestType::SIMD: static_assert(offsetof(TypedValue, m_data) == 0, ""); static_assert(offsetof(TypedValue, m_type) == 8, ""); assertx(dests.size() == 1); pack2(v, rret(0), rret(1), dests[0]); break; case DestType::SSA: case DestType::Byte: assertx(dests.size() == 1); assertx(dests[0].isValid()); // Copy the single-register result to dests[0]. v << copy{rret(0), dests[0]}; break; case DestType::SSAPair: assertx(dests.size() == 2); assertx(dests[0].isValid()); assertx(dests[1].isValid()); // Copy the result pair to dests. v << copy2{rret(0), rret(1), dests[0], dests[1]}; break; case DestType::Dbl: // Copy the single-register result to dests[0]. assertx(dests.size() == 1); assertx(dests[0].isValid()); v << copy{rret_simd(0), dests[0]}; break; case DestType::Indirect: // Already asserted above break; case DestType::None: assertx(dests.empty()); break; } if (vargs.stkArgs.size() > 0) { auto const delta = safe_cast<int32_t>( vargs.stkArgs.size() * sizeof(uintptr_t) + adjust ); v << lea{rsp()[delta], rsp()}; } // Insert new instructions to the appropriate block. if (is_vcall) { vector_splice(blocks[b].code, i, 1, blocks[scratch].code); } else { vector_splice(blocks[vinvoke.targets[0]].code, 0, 0, blocks[scratch].code); } }
TCA emitFunctionEnterHelper(CodeBlock& cb, UniqueStubs& us) { alignJmpTarget(cb); auto const start = vwrap(cb, [&] (Vout& v) { auto const ar = v.makeReg(); v << copy{rvmfp(), ar}; // Fully set up the call frame for the stub. We can't skip this like we do // in other stubs because we need the return IP for this frame in the %rbp // chain, in order to find the proper fixup for the VMRegAnchor in the // intercept handler. v << stublogue{true}; v << copy{rsp(), rvmfp()}; // When we call the event hook, it might tell us to skip the callee // (because of fb_intercept). If that happens, we need to return to the // caller, but the handler will have already popped the callee's frame. // So, we need to save these values for later. v << pushm{ar[AROFF(m_savedRip)]}; v << pushm{ar[AROFF(m_sfp)]}; v << copy2{ar, v.cns(EventHook::NormalFunc), rarg(0), rarg(1)}; bool (*hook)(const ActRec*, int) = &EventHook::onFunctionCall; v << call{TCA(hook)}; }); us.functionEnterHelperReturn = vwrap2(cb, [&] (Vout& v, Vout& vcold) { auto const sf = v.makeReg(); v << testb{rret(), rret(), sf}; unlikelyIfThen(v, vcold, CC_Z, sf, [&] (Vout& v) { auto const saved_rip = v.makeReg(); // The event hook has already cleaned up the stack and popped the // callee's frame, so we're ready to continue from the original call // site. We just need to grab the fp/rip of the original frame that we // saved earlier, and sync rvmsp(). v << pop{rvmfp()}; v << pop{saved_rip}; // Drop our call frame; the stublogue{} instruction guarantees that this // is exactly 16 bytes. v << lea{rsp()[16], rsp()}; // Sync vmsp and return to the caller. This unbalances the return stack // buffer, but if we're intercepting, we probably don't care. v << load{rvmtl()[rds::kVmspOff], rvmsp()}; v << jmpr{saved_rip}; }); // Skip past the stuff we saved for the intercept case. v << lea{rsp()[16], rsp()}; // Restore rvmfp() and return to the callee's func prologue. v << stubret{RegSet(), true}; }); return start; }