TCA emitFreeLocalsHelpers(CodeBlock& cb, DataBlock& data, UniqueStubs& us) { // The address of the first local is passed in the second argument register. // We use the third and fourth as scratch registers. auto const local = rarg(1); auto const last = rarg(2); auto const type = rarg(3); CGMeta fixups; // This stub is very hot; keep it cache-aligned. align(cb, &fixups, Alignment::CacheLine, AlignContext::Dead); auto const release = emitDecRefHelper(cb, data, fixups, local, type, local | last); auto const decref_local = [&] (Vout& v) { auto const sf = v.makeReg(); // We can't do a byte load here---we have to sign-extend since we use // `type' as a 32-bit array index to the destructor table. v << loadzbl{local[TVOFF(m_type)], type}; emitCmpTVType(v, sf, KindOfRefCountThreshold, type); ifThen(v, CC_G, sf, [&] (Vout& v) { auto const dword_size = sizeof(int64_t); // saving return value on the stack, but keeping it 16-byte aligned v << mflr{rfuncln()}; v << lea {rsp()[-2 * dword_size], rsp()}; v << store{rfuncln(), rsp()[0]}; v << call{release, arg_regs(3)}; // restore the return value from the stack v << load{rsp()[0], rfuncln()}; v << lea {rsp()[2 * dword_size], rsp()}; v << mtlr{rfuncln()}; }); }; auto const next_local = [&] (Vout& v) { v << addqi{static_cast<int>(sizeof(TypedValue)), local, local, v.makeReg()}; }; alignJmpTarget(cb); us.freeManyLocalsHelper = vwrap(cb, data, fixups, [&] (Vout& v) { // We always unroll the final `kNumFreeLocalsHelpers' decrefs, so only loop // until we hit that point. v << lea{rvmfp()[localOffset(kNumFreeLocalsHelpers - 1)], last}; doWhile(v, CC_NZ, {}, [&] (const VregList& in, const VregList& out) { auto const sf = v.makeReg(); decref_local(v); next_local(v); v << cmpq{local, last, sf}; return sf; } ); }); for (auto i = kNumFreeLocalsHelpers - 1; i >= 0; --i) { us.freeLocalsHelpers[i] = vwrap(cb, data, [&] (Vout& v) { decref_local(v); if (i != 0) next_local(v); }); } // All the stub entrypoints share the same ret. vwrap(cb, data, fixups, [] (Vout& v) { v << ret{}; }); // This stub is hot, so make sure to keep it small. #if 0 // TODO(gut): Currently this assert fails. // Take a closer look when looking at performance always_assert(Stats::enabled() || (cb.frontier() - release <= 4 * cache_line_size())); #endif fixups.process(nullptr); return release; }
TCA emitFreeLocalsHelpers(CodeBlock& cb, UniqueStubs& us) { // The address of the first local is passed in the second argument register. // We use the third and fourth as scratch registers. auto const local = rarg(1); auto const last = rarg(2); auto const type = rarg(3); CGMeta fixups; // This stub is very hot; keep it cache-aligned. align(cb, &fixups, Alignment::CacheLine, AlignContext::Dead); auto const release = emitDecRefHelper(cb, fixups, local, type, local | last); auto const decref_local = [&] (Vout& v) { auto const sf = v.makeReg(); // We can't do a byte load here---we have to sign-extend since we use // `type' as a 32-bit array index to the destructor table. v << loadzbl{local[TVOFF(m_type)], type}; emitCmpTVType(v, sf, KindOfRefCountThreshold, type); ifThen(v, CC_G, sf, [&] (Vout& v) { v << call{release, arg_regs(3)}; }); }; auto const next_local = [&] (Vout& v) { v << addqi{static_cast<int>(sizeof(TypedValue)), local, local, v.makeReg()}; }; alignJmpTarget(cb); us.freeManyLocalsHelper = vwrap(cb, fixups, [&] (Vout& v) { // We always unroll the final `kNumFreeLocalsHelpers' decrefs, so only loop // until we hit that point. v << lea{rvmfp()[localOffset(kNumFreeLocalsHelpers - 1)], last}; doWhile(v, CC_NZ, {}, [&] (const VregList& in, const VregList& out) { auto const sf = v.makeReg(); decref_local(v); next_local(v); v << cmpq{local, last, sf}; return sf; } ); }); for (auto i = kNumFreeLocalsHelpers - 1; i >= 0; --i) { us.freeLocalsHelpers[i] = vwrap(cb, [&] (Vout& v) { decref_local(v); if (i != 0) next_local(v); }); } // All the stub entrypoints share the same ret. vwrap(cb, fixups, [] (Vout& v) { v << ret{}; }); // This stub is hot, so make sure to keep it small. // Alas, we have more work to do in this under Windows, // so we can't be this small :( #ifndef _WIN32 always_assert(Stats::enabled() || (cb.frontier() - release <= 4 * x64::cache_line_size())); #endif fixups.process(nullptr); return release; }
TCA emitFreeLocalsHelpers(CodeBlock& cb, DataBlock& data, UniqueStubs& us) { // The address of the first local is passed in the second argument register. // We use the third and fourth as scratch registers. auto const local = rarg(1); auto const last = rarg(2); auto const type = rarg(3); CGMeta fixups; TCA freeLocalsHelpers[kNumFreeLocalsHelpers]; TCA freeManyLocalsHelper; // This stub is very hot; keep it cache-aligned. align(cb, &fixups, Alignment::CacheLine, AlignContext::Dead); auto const release = emitDecRefHelper(cb, data, fixups, local, type, local | last); auto const decref_local = [&] (Vout& v) { auto const sf = v.makeReg(); // We can't use emitLoadTVType() here because it does a byte load, and we // need to sign-extend since we use `type' as a 32-bit array index to the // destructor table. v << loadzbl{local[TVOFF(m_type)], type}; emitCmpTVType(v, sf, KindOfRefCountThreshold, type); ifThen(v, CC_G, sf, [&] (Vout& v) { v << call{release, arg_regs(3)}; }); }; auto const next_local = [&] (Vout& v) { v << addqi{static_cast<int>(sizeof(TypedValue)), local, local, v.makeReg()}; }; alignJmpTarget(cb); freeManyLocalsHelper = vwrap(cb, data, [&] (Vout& v) { // We always unroll the final `kNumFreeLocalsHelpers' decrefs, so only loop // until we hit that point. v << lea{rvmfp()[localOffset(kNumFreeLocalsHelpers - 1)], last}; // Set up frame linkage to avoid an indirect fixup. v << copy{rsp(), rfp()}; doWhile(v, CC_NZ, {}, [&] (const VregList& in, const VregList& out) { auto const sf = v.makeReg(); decref_local(v); next_local(v); v << cmpq{local, last, sf}; return sf; } ); }); for (auto i = kNumFreeLocalsHelpers - 1; i >= 0; --i) { freeLocalsHelpers[i] = vwrap(cb, data, [&] (Vout& v) { decref_local(v); if (i != 0) next_local(v); }); } // All the stub entrypoints share the same ret. vwrap(cb, data, fixups, [] (Vout& v) { v << popp{rfp(), rlr()}; v << ret{}; }); // Create a table of branches us.freeManyLocalsHelper = vwrap(cb, data, [&] (Vout& v) { v << pushp{rlr(), rfp()}; // rvmfp() is needed by the freeManyLocalsHelper stub above, so frame // linkage setup is deferred until after its use in freeManyLocalsHelper. v << jmpi{freeManyLocalsHelper}; }); for (auto i = kNumFreeLocalsHelpers - 1; i >= 0; --i) { us.freeLocalsHelpers[i] = vwrap(cb, data, [&] (Vout& v) { // We set up frame linkage to avoid an indirect fixup. v << pushp{rlr(), rfp()}; v << copy{rsp(), rfp()}; v << jmpi{freeLocalsHelpers[i]}; }); } // FIXME: This stub is hot, so make sure to keep it small. #if 0 always_assert(Stats::enabled() || (cb.frontier() - release <= 4 * x64::cache_line_size())); #endif fixups.process(nullptr); return release; }