TCA emitEndCatchHelper(CodeBlock& cb, UniqueStubs& us) { auto const udrspo = rvmtl()[unwinderDebuggerReturnSPOff()]; auto const debuggerReturn = vwrap(cb, [&] (Vout& v) { v << load{udrspo, rvmsp()}; v << storeqi{0, udrspo}; }); svcreq::emit_persistent(cb, folly::none, REQ_POST_DEBUGGER_RET); auto const resumeCPPUnwind = vwrap(cb, [] (Vout& v) { static_assert(sizeof(tl_regState) == 1, "The following store must match the size of tl_regState."); auto const regstate = emitTLSAddr(v, tls_datum(tl_regState)); v << storebi{static_cast<int32_t>(VMRegState::CLEAN), regstate}; v << load{rvmtl()[unwinderExnOff()], rarg(0)}; v << call{TCA(_Unwind_Resume), arg_regs(1)}; }); us.endCatchHelperPast = cb.frontier(); vwrap(cb, [] (Vout& v) { v << ud2{}; }); alignJmpTarget(cb); return vwrap(cb, [&] (Vout& v) { auto const done1 = v.makeBlock(); auto const sf1 = v.makeReg(); v << cmpqim{0, udrspo, sf1}; v << jcci{CC_NE, sf1, done1, debuggerReturn}; v = done1; // Normal end catch situation: call back to tc_unwind_resume, which returns // the catch trace (or null) in %rax, and the new vmfp in %rdx. v << copy{rvmfp(), rarg(0)}; v << call{TCA(tc_unwind_resume)}; v << copy{reg::rdx, rvmfp()}; auto const done2 = v.makeBlock(); auto const sf2 = v.makeReg(); v << testq{reg::rax, reg::rax, sf2}; v << jcci{CC_Z, sf2, done2, resumeCPPUnwind}; v = done2; // We need to do a syncForLLVMCatch(), but vmfp is already in rdx. v << jmpr{reg::rax}; }); }
/* * Helper for the freeLocalsHelpers which does the actual work of decrementing * a value's refcount or releasing it. * * This helper is reached via call from the various freeLocalHelpers. It * expects `tv' to be the address of a TypedValue with refcounted type `type' * (though it may be static, and we will do nothing in that case). * * The `live' registers must be preserved across any native calls (and * generally left untouched). */ static TCA emitDecRefHelper(CodeBlock& cb, DataBlock& data, CGMeta& fixups, PhysReg tv, PhysReg type, RegSet live) { return vwrap(cb, data, fixups, [&] (Vout& v) { // We use the first argument register for the TV data because we might pass // it to the native release call. It's not live when we enter the helper. auto const data = rarg(0); v << load{tv[TVOFF(m_data)], data}; auto destroy = [&](Vout& v) { PhysRegSaver prs{v, live}; auto const dword_size = sizeof(int64_t); // saving return value on the stack, but keeping it 16-byte aligned v << mflr{rfuncln()}; v << lea {rsp()[-2 * dword_size], rsp()}; v << store{rfuncln(), rsp()[0]}; // The refcount is exactly 1; release the value. // Avoid 'this' pointer overwriting by reserving it as an argument. v << callm{lookupDestructor(v, type), arg_regs(1)}; // Between where r1 is now and the saved RIP of the call into the // freeLocalsHelpers stub, we have all the live regs we pushed, plus the // stack size reserved for the LR saved right above and the LR offset in // the frame. v << syncpoint{makeIndirectFixup(prs.dwordsPushed())}; // fallthru // restore the return value from the stack v << load{rsp()[0], rfuncln()}; v << lea {rsp()[2 * dword_size], rsp()}; v << mtlr{rfuncln()}; }; auto const sf = emitCmpRefCount(v, OneReference, data); if (one_bit_refcount) { ifThen(v, CC_E, sf, destroy); } else { ifThen(v, CC_NL, sf, [&] (Vout& v) { // The refcount is positive, so the value is refcounted. We need to // either decref or release. ifThen(v, CC_NE, sf, [&] (Vout& v) { // The refcount is greater than 1; decref it. emitDecRefCount(v, data); v << ret{live}; }); destroy(v); }); } // Either we did a decref, or the value was static. v << ret{live}; }); }
void lower(VLS& env, vcallarray& inst, Vlabel b, size_t i) { // vcallarray can only appear at the end of a block. assertx(i == env.unit.blocks[b].code.size() - 1); lower_impl(env.unit, b, i, [&] (Vout& v) { auto const& srcs = env.unit.tuples[inst.extraArgs]; auto args = inst.args; auto dsts = jit::vector<Vreg>{}; for (auto i = 0; i < srcs.size(); ++i) { dsts.emplace_back(rarg(i)); args |= rarg(i); } v << copyargs{env.unit.makeTuple(srcs), env.unit.makeTuple(std::move(dsts))}; v << callarray{inst.target, args}; v << unwind{{inst.targets[0], inst.targets[1]}}; }); }
TCA emitCallToExit(CodeBlock& cb, DataBlock& data, const UniqueStubs& us) { ppc64_asm::Assembler a { cb }; auto const start = a.frontier(); if (RuntimeOption::EvalHHIRGenerateAsserts) { vwrap(cb, data, [&] (Vout& v) { // Not doing it directly as rret(0) == rarg(0) on ppc64 Vreg ret_addr = v.makeReg(); // exittc address pushed on calltc/resumetc. v << copy{rsp(), ret_addr}; // We need to spill the return registers around the assert call. v << push{rret(0)}; v << push{rret(1)}; v << copy{ret_addr, rarg(0)}; v << call{TCA(assert_tc_saved_rip), RegSet(rarg(0))}; v << pop{rret(1)}; v << pop{rret(0)}; }); } // Discard the exittc address pushed on calltc/resumetc for balancing the // stack next. a.addi(rsp(), rsp(), 8); // Reinitialize r1 for the external code found after enterTCExit's stubret a.addi(rsfp(), rsp(), 8); // r31 should have the same value as caller's r1. Loading it soon on stubret. // (this corrupts the backchain, but it's not relevant as this frame will be // destroyed soon) a.std(rsfp(), rsp()[8]); // Emulate a ret to enterTCExit without actually doing one to avoid // unbalancing the return stack buffer. a.branchAuto(TCA(mcg->ustubs().enterTCExit)); return start; }
TCA emitCallToExit(CodeBlock& cb, DataBlock& data, const UniqueStubs& /*us*/) { ppc64_asm::Assembler a { cb }; auto const start = a.frontier(); if (RuntimeOption::EvalHHIRGenerateAsserts) { vwrap(cb, data, [&] (Vout& v) { // Not doing it directly as rret(0) == rarg(0) on ppc64 Vreg ret_addr = v.makeReg(); // exittc address pushed on calltc/resumetc. v << copy{rsp(), ret_addr}; // We need to spill the return registers around the assert call. v << push{rret(0)}; v << push{rret(1)}; v << copy{ret_addr, rarg(0)}; v << call{TCA(assert_tc_saved_rip), RegSet(rarg(0))}; v << pop{rret(1)}; v << pop{rret(0)}; }); } // Discard the exittc address pushed on calltc/resumetc for balancing the // stack next. a.addi(rsp(), rsp(), 8); // Reinitialize r1 for the external code found after enterTCExit's stubret a.addi(rsfp(), rsp(), 8); // Restore the rvmfp when leaving the VM, which must be the same of rsfp. a.mr(rvmfp(), rsfp()); // Emulate a ret to enterTCExit without actually doing one to avoid // unbalancing the return stack buffer. a.branchAuto(TCA(tc::ustubs().enterTCExit)); return start; }
void cgInterpOneCF(IRLS& env, const IRInstruction* inst) { auto const extra = inst->extra<InterpOneCF>(); auto const sp = srcLoc(env, inst, 0).reg(); auto& v = vmain(env); auto const sync_sp = v.makeReg(); v << lea{sp[cellsToBytes(extra->spOffset.offset)], sync_sp}; v << syncvmsp{sync_sp}; assertx(tc::ustubs().interpOneCFHelpers.count(extra->opcode)); // We pass the Offset in the third argument register. v << ldimml{extra->bcOff, rarg(2)}; v << jmpi{tc::ustubs().interpOneCFHelpers.at(extra->opcode), interp_one_cf_regs()}; }
/* * Helper for the freeLocalsHelpers which does the actual work of decrementing * a value's refcount or releasing it. * * This helper is reached via call from the various freeLocalHelpers. It * expects `tv' to be the address of a TypedValue with refcounted type `type' * (though it may be static, and we will do nothing in that case). * * The `live' registers must be preserved across any native calls (and * generally left untouched). */ static TCA emitDecRefHelper(CodeBlock& cb, DataBlock& data, CGMeta& fixups, PhysReg tv, PhysReg type, RegSet live) { return vwrap(cb, data, fixups, [&] (Vout& v) { // Set up frame linkage to avoid an indirect fixup. v << pushp{rlr(), rfp()}; v << copy{rsp(), rfp()}; // We use the first argument register for the TV data because we might pass // it to the native release call. It's not live when we enter the helper. auto const data = rarg(0); v << load{tv[TVOFF(m_data)], data}; auto const sf = v.makeReg(); v << cmplim{1, data[FAST_REFCOUNT_OFFSET], sf}; ifThen(v, CC_NL, sf, [&] (Vout& v) { // The refcount is positive, so the value is refcounted. We need to // either decref or release. ifThen(v, CC_NE, sf, [&] (Vout& v) { // The refcount is greater than 1; decref it. v << declm{data[FAST_REFCOUNT_OFFSET], v.makeReg()}; // Pop FP/LR and return v << popp{rfp(), rlr()}; v << ret{live}; }); // Note that the stack is aligned since we called to this helper from an // stack-unaligned stub. PhysRegSaver prs{v, live}; // The refcount is exactly 1; release the value. // Avoid 'this' pointer overwriting by reserving it as an argument. v << callm{lookupDestructor(v, type), arg_regs(1)}; // Between where %rsp is now and the saved RIP of the call into the // freeLocalsHelpers stub, we have all the live regs we pushed, plus the // saved RIP of the call from the stub to this helper. v << syncpoint{makeIndirectFixup(prs.dwordsPushed())}; // fallthru }); // Either we did a decref, or the value was static. // Pop FP/LR and return v << popp{rfp(), rlr()}; v << ret{live}; }); }
/* * Helper for the freeLocalsHelpers which does the actual work of decrementing * a value's refcount or releasing it. * * This helper is reached via call from the various freeLocalHelpers. It * expects `tv' to be the address of a TypedValue with refcounted type `type' * (though it may be static, and we will do nothing in that case). * * The `live' registers must be preserved across any native calls (and * generally left untouched). */ static TCA emitDecRefHelper(CodeBlock& cb, DataBlock& data, CGMeta& fixups, PhysReg tv, PhysReg type, RegSet live) { return vwrap(cb, data, fixups, [&] (Vout& v) { // Set up frame linkage to avoid an indirect fixup. v << stublogue{true}; v << copy{rsp(), rfp()}; // We use the first argument register for the TV data because we might pass // it to the native release call. It's not live when we enter the helper. auto const data = rarg(0); v << load{tv[TVOFF(m_data)], data}; auto destroy = [&](Vout& v) { // Note that the stack is aligned since we called to this helper from an // stack-unaligned stub. PhysRegSaver prs{v, live}; // The refcount is exactly 1; release the value. // Avoid 'this' pointer overwriting by reserving it as an argument. // There's no need for a fixup, because we setup a frame on the c++ // stack. v << callm{lookupDestructor(v, type), arg_regs(1)}; // fallthru }; auto const sf = emitCmpRefCount(v, OneReference, data); if (one_bit_refcount) { ifThen(v, CC_E, sf, destroy); } else { ifThen(v, CC_NL, sf, [&] (Vout& v) { // The refcount is positive, so the value is refcounted. We need to // either decref or release. ifThen(v, CC_NE, sf, [&] (Vout& v) { // The refcount is greater than 1; decref it. emitDecRefCount(v, data); v << stubret{live, true}; }); destroy(v); }); } // Either we did a decref, or the value was static. v << stubret{live, true}; }); }
void emitCall(Vout& v, CallSpec target, RegSet args) { switch (target.kind()) { case CallSpec::Kind::Direct: v << call{static_cast<TCA>(target.address()), args}; return; case CallSpec::Kind::Smashable: v << calls{static_cast<TCA>(target.address()), args}; return; case CallSpec::Kind::ArrayVirt: { auto const addr = reinterpret_cast<intptr_t>(target.arrayTable()); auto const arrkind = v.makeReg(); v << loadzbl{rarg(0)[HeaderKindOffset], arrkind}; if (deltaFits(addr, sz::dword)) { v << callm{baseless(arrkind * 8 + addr), args}; } else { auto const base = v.makeReg(); v << ldimmq{addr, base}; v << callm{base[arrkind * 8], args}; } static_assert(sizeof(HeaderKind) == 1, ""); } return; case CallSpec::Kind::Destructor: { // this movzbq is only needed because callers aren't required to // zero-extend the type. auto zextType = v.makeReg(); v << movzbq{target.reg(), zextType}; auto dtor_ptr = lookupDestructor(v, zextType); v << callm{dtor_ptr, args}; } return; case CallSpec::Kind::Stub: v << callstub{target.stubAddr(), args}; return; } not_reached(); }
void addDbgGuardImpl(SrcKey sk, SrcRec* sr) { TCA realCode = sr->getTopTranslation(); if (!realCode) return; // No translations, nothing to do. auto& cb = mcg->code.main(); auto const dbgGuard = vwrap(cb, [&] (Vout& v) { if (!sk.resumed()) { auto const off = sr->nonResumedSPOff(); v << lea{rvmfp()[-cellsToBytes(off.offset)], rvmsp()}; } auto const tinfo = v.makeReg(); auto const attached = v.makeReg(); auto const sf = v.makeReg(); auto const done = v.makeBlock(); constexpr size_t dbgOff = offsetof(ThreadInfo, m_reqInjectionData) + RequestInjectionData::debuggerReadOnlyOffset(); v << ldimmq{reinterpret_cast<uintptr_t>(sk.pc()), rarg(0)}; emitTLSLoad(v, tls_datum(ThreadInfo::s_threadInfo), tinfo); v << loadb{tinfo[dbgOff], attached}; v << testbi{static_cast<int8_t>(0xffu), attached, sf}; v << jcci{CC_NZ, sf, done, mcg->ustubs().interpHelper}; v = done; v << fallthru{}; }, CodeKind::Helper); // Emit a jump to the actual code. auto const dbgBranchGuardSrc = emitSmashableJmp(cb, realCode); // Add the guard to the SrcRec. sr->addDebuggerGuard(dbgGuard, dbgBranchGuardSrc); }
void emitCall(Vout& v, CallSpec target, RegSet args) { using K = CallSpec::Kind; switch (target.kind()) { case K::Direct: v << call{static_cast<TCA>(target.address()), args}; return; case K::Smashable: v << calls{static_cast<TCA>(target.address()), args}; return; case K::ArrayVirt: { auto const addr = reinterpret_cast<intptr_t>(target.arrayTable()); auto const arrkind = v.makeReg(); v << loadzbl{rarg(0)[HeaderKindOffset], arrkind}; if (deltaFits(addr, sz::dword)) { v << callm{baseless(arrkind * 8 + addr), args}; } else { auto const base = v.makeReg(); v << ldimmq{addr, base}; v << callm{base[arrkind * 8], args}; } static_assert(sizeof(HeaderKind) == 1, ""); } return; case K::Destructor: { auto dtor = lookupDestructor(v, target.reg()); v << callm{dtor, args}; } return; case K::Stub: v << callstub{target.stubAddr(), args}; return; } not_reached(); }
PhysReg r_svcreq_arg(size_t i) { return rarg(i + 2); }
PhysReg r_svcreq_stub() { return rarg(1); }
PhysReg r_svcreq_req() { return rarg(0); }
TCA emitFreeLocalsHelpers(CodeBlock& cb, DataBlock& data, UniqueStubs& us) { // The address of the first local is passed in the second argument register. // We use the third and fourth as scratch registers. auto const local = rarg(1); auto const last = rarg(2); auto const type = rarg(3); CGMeta fixups; TCA freeLocalsHelpers[kNumFreeLocalsHelpers]; TCA freeManyLocalsHelper; // This stub is very hot; keep it cache-aligned. align(cb, &fixups, Alignment::CacheLine, AlignContext::Dead); auto const release = emitDecRefHelper(cb, data, fixups, local, type, local | last); auto const decref_local = [&] (Vout& v) { auto const sf = v.makeReg(); // We can't use emitLoadTVType() here because it does a byte load, and we // need to sign-extend since we use `type' as a 32-bit array index to the // destructor table. v << loadzbl{local[TVOFF(m_type)], type}; emitCmpTVType(v, sf, KindOfRefCountThreshold, type); ifThen(v, CC_G, sf, [&] (Vout& v) { v << call{release, arg_regs(3)}; }); }; auto const next_local = [&] (Vout& v) { v << addqi{static_cast<int>(sizeof(TypedValue)), local, local, v.makeReg()}; }; alignJmpTarget(cb); freeManyLocalsHelper = vwrap(cb, data, [&] (Vout& v) { // We always unroll the final `kNumFreeLocalsHelpers' decrefs, so only loop // until we hit that point. v << lea{rvmfp()[localOffset(kNumFreeLocalsHelpers - 1)], last}; // Set up frame linkage to avoid an indirect fixup. v << copy{rsp(), rfp()}; doWhile(v, CC_NZ, {}, [&] (const VregList& in, const VregList& out) { auto const sf = v.makeReg(); decref_local(v); next_local(v); v << cmpq{local, last, sf}; return sf; } ); }); for (auto i = kNumFreeLocalsHelpers - 1; i >= 0; --i) { freeLocalsHelpers[i] = vwrap(cb, data, [&] (Vout& v) { decref_local(v); if (i != 0) next_local(v); }); } // All the stub entrypoints share the same ret. vwrap(cb, data, fixups, [] (Vout& v) { v << popp{rfp(), rlr()}; v << ret{}; }); // Create a table of branches us.freeManyLocalsHelper = vwrap(cb, data, [&] (Vout& v) { v << pushp{rlr(), rfp()}; // rvmfp() is needed by the freeManyLocalsHelper stub above, so frame // linkage setup is deferred until after its use in freeManyLocalsHelper. v << jmpi{freeManyLocalsHelper}; }); for (auto i = kNumFreeLocalsHelpers - 1; i >= 0; --i) { us.freeLocalsHelpers[i] = vwrap(cb, data, [&] (Vout& v) { // We set up frame linkage to avoid an indirect fixup. v << pushp{rlr(), rfp()}; v << copy{rsp(), rfp()}; v << jmpi{freeLocalsHelpers[i]}; }); } // FIXME: This stub is hot, so make sure to keep it small. #if 0 always_assert(Stats::enabled() || (cb.frontier() - release <= 4 * x64::cache_line_size())); #endif fixups.process(nullptr); return release; }
TCA emitFreeLocalsHelpers(CodeBlock& cb, DataBlock& data, UniqueStubs& us) { // The address of the first local is passed in the second argument register. // We use the third and fourth as scratch registers. auto const local = rarg(1); auto const last = rarg(2); auto const type = rarg(3); CGMeta fixups; // This stub is very hot; keep it cache-aligned. align(cb, &fixups, Alignment::CacheLine, AlignContext::Dead); auto const release = emitDecRefHelper(cb, data, fixups, local, type, local | last); auto const decref_local = [&] (Vout& v) { auto const sf = v.makeReg(); // We can't do a byte load here---we have to sign-extend since we use // `type' as a 32-bit array index to the destructor table. v << loadzbl{local[TVOFF(m_type)], type}; emitCmpTVType(v, sf, KindOfRefCountThreshold, type); ifThen(v, CC_G, sf, [&] (Vout& v) { auto const dword_size = sizeof(int64_t); // saving return value on the stack, but keeping it 16-byte aligned v << mflr{rfuncln()}; v << lea {rsp()[-2 * dword_size], rsp()}; v << store{rfuncln(), rsp()[0]}; v << call{release, arg_regs(3)}; // restore the return value from the stack v << load{rsp()[0], rfuncln()}; v << lea {rsp()[2 * dword_size], rsp()}; v << mtlr{rfuncln()}; }); }; auto const next_local = [&] (Vout& v) { v << addqi{static_cast<int>(sizeof(TypedValue)), local, local, v.makeReg()}; }; alignJmpTarget(cb); us.freeManyLocalsHelper = vwrap(cb, data, fixups, [&] (Vout& v) { // We always unroll the final `kNumFreeLocalsHelpers' decrefs, so only loop // until we hit that point. v << lea{rvmfp()[localOffset(kNumFreeLocalsHelpers - 1)], last}; doWhile(v, CC_NZ, {}, [&] (const VregList& in, const VregList& out) { auto const sf = v.makeReg(); decref_local(v); next_local(v); v << cmpq{local, last, sf}; return sf; } ); }); for (auto i = kNumFreeLocalsHelpers - 1; i >= 0; --i) { us.freeLocalsHelpers[i] = vwrap(cb, data, [&] (Vout& v) { decref_local(v); if (i != 0) next_local(v); }); } // All the stub entrypoints share the same ret. vwrap(cb, data, fixups, [] (Vout& v) { v << ret{}; }); // This stub is hot, so make sure to keep it small. #if 0 // TODO(gut): Currently this assert fails. // Take a closer look when looking at performance always_assert(Stats::enabled() || (cb.frontier() - release <= 4 * cache_line_size())); #endif fixups.process(nullptr); return release; }
RegSet arg_regs(size_t n) { RegSet regs; for (auto i = 0; i < n; i++) regs |= rarg(i); return regs; }
TCA emitFunctionEnterHelper(CodeBlock& cb, UniqueStubs& us) { alignJmpTarget(cb); auto const start = vwrap(cb, [&] (Vout& v) { auto const ar = v.makeReg(); v << copy{rvmfp(), ar}; // Fully set up the call frame for the stub. We can't skip this like we do // in other stubs because we need the return IP for this frame in the %rbp // chain, in order to find the proper fixup for the VMRegAnchor in the // intercept handler. v << stublogue{true}; v << copy{rsp(), rvmfp()}; // When we call the event hook, it might tell us to skip the callee // (because of fb_intercept). If that happens, we need to return to the // caller, but the handler will have already popped the callee's frame. // So, we need to save these values for later. v << pushm{ar[AROFF(m_savedRip)]}; v << pushm{ar[AROFF(m_sfp)]}; v << copy2{ar, v.cns(EventHook::NormalFunc), rarg(0), rarg(1)}; bool (*hook)(const ActRec*, int) = &EventHook::onFunctionCall; v << call{TCA(hook)}; }); us.functionEnterHelperReturn = vwrap2(cb, [&] (Vout& v, Vout& vcold) { auto const sf = v.makeReg(); v << testb{rret(), rret(), sf}; unlikelyIfThen(v, vcold, CC_Z, sf, [&] (Vout& v) { auto const saved_rip = v.makeReg(); // The event hook has already cleaned up the stack and popped the // callee's frame, so we're ready to continue from the original call // site. We just need to grab the fp/rip of the original frame that we // saved earlier, and sync rvmsp(). v << pop{rvmfp()}; v << pop{saved_rip}; // Drop our call frame; the stublogue{} instruction guarantees that this // is exactly 16 bytes. v << lea{rsp()[16], rsp()}; // Sync vmsp and return to the caller. This unbalances the return stack // buffer, but if we're intercepting, we probably don't care. v << load{rvmtl()[rds::kVmspOff], rvmsp()}; v << jmpr{saved_rip}; }); // Skip past the stuff we saved for the intercept case. v << lea{rsp()[16], rsp()}; // Restore rvmfp() and return to the callee's func prologue. v << stubret{RegSet(), true}; }); return start; }
TCA emitFreeLocalsHelpers(CodeBlock& cb, UniqueStubs& us) { // The address of the first local is passed in the second argument register. // We use the third and fourth as scratch registers. auto const local = rarg(1); auto const last = rarg(2); auto const type = rarg(3); CGMeta fixups; // This stub is very hot; keep it cache-aligned. align(cb, &fixups, Alignment::CacheLine, AlignContext::Dead); auto const release = emitDecRefHelper(cb, fixups, local, type, local | last); auto const decref_local = [&] (Vout& v) { auto const sf = v.makeReg(); // We can't do a byte load here---we have to sign-extend since we use // `type' as a 32-bit array index to the destructor table. v << loadzbl{local[TVOFF(m_type)], type}; emitCmpTVType(v, sf, KindOfRefCountThreshold, type); ifThen(v, CC_G, sf, [&] (Vout& v) { v << call{release, arg_regs(3)}; }); }; auto const next_local = [&] (Vout& v) { v << addqi{static_cast<int>(sizeof(TypedValue)), local, local, v.makeReg()}; }; alignJmpTarget(cb); us.freeManyLocalsHelper = vwrap(cb, fixups, [&] (Vout& v) { // We always unroll the final `kNumFreeLocalsHelpers' decrefs, so only loop // until we hit that point. v << lea{rvmfp()[localOffset(kNumFreeLocalsHelpers - 1)], last}; doWhile(v, CC_NZ, {}, [&] (const VregList& in, const VregList& out) { auto const sf = v.makeReg(); decref_local(v); next_local(v); v << cmpq{local, last, sf}; return sf; } ); }); for (auto i = kNumFreeLocalsHelpers - 1; i >= 0; --i) { us.freeLocalsHelpers[i] = vwrap(cb, [&] (Vout& v) { decref_local(v); if (i != 0) next_local(v); }); } // All the stub entrypoints share the same ret. vwrap(cb, fixups, [] (Vout& v) { v << ret{}; }); // This stub is hot, so make sure to keep it small. // Alas, we have more work to do in this under Windows, // so we can't be this small :( #ifndef _WIN32 always_assert(Stats::enabled() || (cb.frontier() - release <= 4 * x64::cache_line_size())); #endif fixups.process(nullptr); return release; }