Пример #1
0
TCA emitEndCatchHelper(CodeBlock& cb, UniqueStubs& us) {
  auto const udrspo = rvmtl()[unwinderDebuggerReturnSPOff()];

  auto const debuggerReturn = vwrap(cb, [&] (Vout& v) {
    v << load{udrspo, rvmsp()};
    v << storeqi{0, udrspo};
  });
  svcreq::emit_persistent(cb, folly::none, REQ_POST_DEBUGGER_RET);

  auto const resumeCPPUnwind = vwrap(cb, [] (Vout& v) {
    static_assert(sizeof(tl_regState) == 1,
                  "The following store must match the size of tl_regState.");
    auto const regstate = emitTLSAddr(v, tls_datum(tl_regState));
    v << storebi{static_cast<int32_t>(VMRegState::CLEAN), regstate};

    v << load{rvmtl()[unwinderExnOff()], rarg(0)};
    v << call{TCA(_Unwind_Resume), arg_regs(1)};
  });
  us.endCatchHelperPast = cb.frontier();
  vwrap(cb, [] (Vout& v) { v << ud2{}; });

  alignJmpTarget(cb);

  return vwrap(cb, [&] (Vout& v) {
    auto const done1 = v.makeBlock();
    auto const sf1 = v.makeReg();

    v << cmpqim{0, udrspo, sf1};
    v << jcci{CC_NE, sf1, done1, debuggerReturn};
    v = done1;

    // Normal end catch situation: call back to tc_unwind_resume, which returns
    // the catch trace (or null) in %rax, and the new vmfp in %rdx.
    v << copy{rvmfp(), rarg(0)};
    v << call{TCA(tc_unwind_resume)};
    v << copy{reg::rdx, rvmfp()};

    auto const done2 = v.makeBlock();
    auto const sf2 = v.makeReg();

    v << testq{reg::rax, reg::rax, sf2};
    v << jcci{CC_Z, sf2, done2, resumeCPPUnwind};
    v = done2;

    // We need to do a syncForLLVMCatch(), but vmfp is already in rdx.
    v << jmpr{reg::rax};
  });
}
Пример #2
0
void cgInlineReturn(IRLS& env, const IRInstruction* inst) {
  auto& v = vmain(env);
  auto const fp = srcLoc(env, inst, 0).reg();
  auto const callerFPOff = inst->extra<InlineReturn>()->offset;
  v << lea{fp[cellsToBytes(callerFPOff.offset)], rvmfp()};
  v << inlineend{};
}
Пример #3
0
void cgInlineReturnNoFrame(IRLS& env, const IRInstruction* inst) {
  auto& v = vmain(env);

  if (RuntimeOption::EvalHHIRGenerateAsserts) {
    auto const extra = inst->extra<InlineReturnNoFrame>();
    auto const offset = cellsToBytes(extra->offset.offset);
    for (auto i = 0; i < kNumActRecCells; ++i) {
      trashTV(v, rvmfp(), offset - cellsToBytes(i), kTVTrashJITFrame);
    }
  }

  v << inlineend{};
}
Пример #4
0
void emitFuncGuard(const Func* func, CodeBlock& cb, CGMeta& fixups) {
  ppc64_asm::Assembler a { cb };

  const auto tmp1 = ppc64_asm::reg::r3;
  const auto tmp2 = ppc64_asm::reg::r4;

  assertx(ppc64::abi(CodeKind::CrossTrace).gpUnreserved.contains(tmp1));
  assertx(ppc64::abi(CodeKind::CrossTrace).gpUnreserved.contains(tmp2));

  emitSmashableMovq(a.code(), fixups, uint64_t(func), tmp1);
  a.  ld     (tmp2, rvmfp()[AROFF(m_func)]);
  a.  cmpd   (tmp1, tmp2);

  a.  branchFar(tc::ustubs().funcPrologueRedispatch,
                  ppc64_asm::BranchConditions::NotEqual);

  DEBUG_ONLY auto guard = funcGuardFromPrologue(a.frontier(), func);
  assertx(funcGuardMatches(guard, func));
}
Пример #5
0
void addDbgGuardImpl(SrcKey sk, SrcRec* sr) {
  TCA realCode = sr->getTopTranslation();
  if (!realCode) return;  // No translations, nothing to do.

  auto& cb = mcg->code.main();

  auto const dbgGuard = vwrap(cb, [&] (Vout& v) {
    if (!sk.resumed()) {
      auto const off = sr->nonResumedSPOff();
      v << lea{rvmfp()[-cellsToBytes(off.offset)], rvmsp()};
    }

    auto const tinfo = v.makeReg();
    auto const attached = v.makeReg();
    auto const sf = v.makeReg();

    auto const done = v.makeBlock();

    constexpr size_t dbgOff =
      offsetof(ThreadInfo, m_reqInjectionData) +
      RequestInjectionData::debuggerReadOnlyOffset();

    v << ldimmq{reinterpret_cast<uintptr_t>(sk.pc()), rarg(0)};

    emitTLSLoad(v, tls_datum(ThreadInfo::s_threadInfo), tinfo);
    v << loadb{tinfo[dbgOff], attached};
    v << testbi{static_cast<int8_t>(0xffu), attached, sf};

    v << jcci{CC_NZ, sf, done, mcg->ustubs().interpHelper};

    v = done;
    v << fallthru{};
  }, CodeKind::Helper);

  // Emit a jump to the actual code.
  auto const dbgBranchGuardSrc = emitSmashableJmp(cb, realCode);

  // Add the guard to the SrcRec.
  sr->addDebuggerGuard(dbgGuard, dbgBranchGuardSrc);
}
Пример #6
0
void emitFuncGuard(const Func* func, CodeBlock& cb, CGMeta& fixups) {
  vixl::MacroAssembler a { cb };
  vixl::Label after_data;
  vixl::Label target_data;
  auto const begin = cb.frontier();

  assertx(arm::abi(CodeKind::CrossTrace).gpUnreserved.contains(vixl::x0));

  emitSmashableMovq(cb, fixups, uint64_t(func), vixl::x0);
  a.  Ldr   (rAsm, M(rvmfp()[AROFF(m_func)]));
  a.  Cmp   (vixl::x0, rAsm);
  a.  B     (&after_data, convertCC(CC_Z));

  a.  Ldr   (rAsm_w, &target_data);
  a.  Br    (rAsm);

  a.  bind  (&target_data);
  a.  dc32  (makeTarget32(tc::ustubs().funcPrologueRedispatch));
  a.  bind  (&after_data);

  cb.sync(begin);
}
Пример #7
0
TCA emitCallToExit(CodeBlock& cb, DataBlock& data, const UniqueStubs& /*us*/) {
  ppc64_asm::Assembler a { cb };
  auto const start = a.frontier();

  if (RuntimeOption::EvalHHIRGenerateAsserts) {
    vwrap(cb, data, [&] (Vout& v) {
      // Not doing it directly as rret(0) == rarg(0) on ppc64
      Vreg ret_addr = v.makeReg();

      // exittc address pushed on calltc/resumetc.
      v << copy{rsp(), ret_addr};

      // We need to spill the return registers around the assert call.
      v << push{rret(0)};
      v << push{rret(1)};

      v << copy{ret_addr, rarg(0)};
      v << call{TCA(assert_tc_saved_rip), RegSet(rarg(0))};

      v << pop{rret(1)};
      v << pop{rret(0)};
    });
  }

  // Discard the exittc address pushed on calltc/resumetc for balancing the
  // stack next.
  a.addi(rsp(), rsp(), 8);

  // Reinitialize r1 for the external code found after enterTCExit's stubret
  a.addi(rsfp(), rsp(), 8);

  // Restore the rvmfp when leaving the VM, which must be the same of rsfp.
  a.mr(rvmfp(), rsfp());

  // Emulate a ret to enterTCExit without actually doing one to avoid
  // unbalancing the return stack buffer.
  a.branchAuto(TCA(tc::ustubs().enterTCExit));
  return start;
}
Пример #8
0
/*
 * Service request stub emitter.
 *
 * Emit a service request stub of type `sr' at `start' in `cb'.
 */
void emit_svcreq(CodeBlock& cb,
                 TCA start,
                 bool persist,
                 folly::Optional<FPInvOffset> spOff,
                 ServiceRequest sr,
                 const ArgVec& argv) {
  FTRACE(2, "svcreq @{} {}(", start, to_name(sr));

  auto const is_reused = start != cb.frontier();

  CodeBlock stub;
  stub.init(start, stub_size(), "svcreq_stub");

  { Vauto vasm{stub};
    auto& v = vasm.main();

    // If we have an spOff, materialize rvmsp() so that handleSRHelper() can do
    // a VM reg sync.  (When we don't have an spOff, the caller of the service
    // request was responsible for making sure rvmsp already contained the top
    // of the stack.)
    if (spOff) {
      v << lea{rvmfp()[-cellsToBytes(spOff->offset)], rvmsp()};
    }

    auto live_out = leave_trace_regs();

    assert(argv.size() <= kMaxArgs);

    // Pick up CondCode arguments first---vasm may optimize immediate loads
    // into operations which clobber status flags.
    for (auto i = 0; i < argv.size(); ++i) {
      auto const& arg = argv[i];
      if (arg.kind != Arg::Kind::CondCode) continue;

      FTRACE(2, "c({}), ", cc_names[arg.cc]);
      v << setcc{arg.cc, r_svcreq_sf(), rbyte(r_svcreq_arg(i))};
    }

    for (auto i = 0; i < argv.size(); ++i) {
      auto const& arg = argv[i];
      auto const r = r_svcreq_arg(i);

      switch (arg.kind) {
        case Arg::Kind::Immed:
          FTRACE(2, "{}, ", arg.imm);
          v << copy{v.cns(arg.imm), r};
          break;
        case Arg::Kind::Address:
          FTRACE(2, "{}(%rip), ", arg.imm);
          v << leap{reg::rip[arg.imm], r};
          break;
        case Arg::Kind::CondCode:
          break;
      }
      live_out |= r;
    }
    FTRACE(2, ") : stub@");

    if (persist) {
      FTRACE(2, "<none>");
      v << copy{v.cns(0), r_svcreq_stub()};
    } else {
      FTRACE(2, "{}", stub.base());
      v << leap{reg::rip[int64_t(stub.base())], r_svcreq_stub()};
    }
    v << copy{v.cns(sr), r_svcreq_req()};

    live_out |= r_svcreq_stub();
    live_out |= r_svcreq_req();

    v << jmpi{TCA(handleSRHelper), live_out};

    // We pad ephemeral stubs unconditionally.  This is required for
    // correctness by the x64 code relocator.
    vasm.unit().padding = !persist;
  }

  if (!is_reused) cb.skip(stub.used());
}
Пример #9
0
void cgCall(IRLS& env, const IRInstruction* inst) {
  auto const sp = srcLoc(env, inst, 0).reg();
  auto const fp = srcLoc(env, inst, 1).reg();
  auto const extra = inst->extra<Call>();
  auto const callee = extra->callee;
  auto const argc = extra->numParams;

  auto& v = vmain(env);
  auto& vc = vcold(env);
  auto const catchBlock = label(env, inst->taken());

  auto const calleeSP = sp[cellsToBytes(extra->spOffset.offset)];
  auto const calleeAR = calleeSP + cellsToBytes(argc);

  v << store{fp, calleeAR + AROFF(m_sfp)};
  v << storeli{safe_cast<int32_t>(extra->after), calleeAR + AROFF(m_soff)};

  if (extra->fcallAwait) {
    // This clobbers any flags that might have already been set on the callee
    // AR (e.g., by SpillFrame), but this is okay because there should never be
    // any conflicts; see the documentation in act-rec.h.
    auto const imm = static_cast<int32_t>(
      ActRec::encodeNumArgsAndFlags(argc, ActRec::Flags::IsFCallAwait)
    );
    v << storeli{imm, calleeAR + AROFF(m_numArgsAndFlags)};
  }

  auto const isNativeImplCall = callee &&
                                callee->builtinFuncPtr() &&
                                !callee->nativeFuncPtr() &&
                                argc == callee->numParams();
  if (isNativeImplCall) {
    // The assumption here is that for builtins, the generated func contains
    // only a single opcode (NativeImpl), and there are no non-argument locals.
    if (do_assert) {
      assertx(argc == callee->numLocals());
      assertx(callee->numIterators() == 0);

      auto addr = callee->getEntry();
      while (peek_op(addr) == Op::AssertRATL) {
        addr += instrLen(addr);
      }
      assertx(peek_op(addr) == Op::NativeImpl);
      assertx(addr + instrLen(addr) ==
              callee->unit()->entry() + callee->past());
    }

    v << store{v.cns(mcg->ustubs().retHelper), calleeAR + AROFF(m_savedRip)};
    if (callee->attrs() & AttrMayUseVV) {
      v << storeqi{0, calleeAR + AROFF(m_invName)};
    }
    v << lea{calleeAR, rvmfp()};

    emitCheckSurpriseFlagsEnter(v, vc, fp, Fixup(0, argc), catchBlock);

    auto const builtinFuncPtr = callee->builtinFuncPtr();
    TRACE(2, "Calling builtin preClass %p func %p\n",
          callee->preClass(), builtinFuncPtr);

    // We sometimes call this while curFunc() isn't really the builtin, so make
    // sure to record the sync point as if we are inside the builtin.
    if (FixupMap::eagerRecord(callee)) {
      auto const syncSP = v.makeReg();
      v << lea{calleeSP, syncSP};
      emitEagerSyncPoint(v, callee->getEntry(), rvmtl(), rvmfp(), syncSP);
    }

    // Call the native implementation.  This will free the locals for us in the
    // normal case.  In the case where an exception is thrown, the VM unwinder
    // will handle it for us.
    auto const done = v.makeBlock();
    v << vinvoke{CallSpec::direct(builtinFuncPtr), v.makeVcallArgs({{rvmfp()}}),
                 v.makeTuple({}), {done, catchBlock}, Fixup(0, argc)};
    env.catch_calls[inst->taken()] = CatchCall::CPP;

    v = done;
    // The native implementation already put the return value on the stack for
    // us, and handled cleaning up the arguments.  We have to update the frame
    // pointer and the stack pointer, and load the return value into the return
    // register so the trace we are returning to has it where it expects.
    // TODO(#1273094): We should probably modify the actual builtins to return
    // values via registers using the C ABI and do a reg-to-reg move.
    loadTV(v, inst->dst(), dstLoc(env, inst, 0), rvmfp()[AROFF(m_r)], true);
    v << load{rvmfp()[AROFF(m_sfp)], rvmfp()};
    emitRB(v, Trace::RBTypeFuncExit, callee->fullName()->data());
    return;
  }

  v << lea{calleeAR, rvmfp()};

  if (RuntimeOption::EvalHHIRGenerateAsserts) {
    v << syncvmsp{v.cns(0x42)};

    constexpr uint64_t kUninitializedRIP = 0xba5eba11acc01ade;
    emitImmStoreq(v, kUninitializedRIP, rvmfp()[AROFF(m_savedRip)]);
  }

  // Emit a smashable call that initially calls a recyclable service request
  // stub.  The stub and the eventual targets take rvmfp() as an argument,
  // pointing to the callee ActRec.
  auto const target = callee
    ? mcg->ustubs().immutableBindCallStub
    : mcg->ustubs().bindCallStub;

  auto const done = v.makeBlock();
  v << callphp{target, php_call_regs(), {{done, catchBlock}}};
  env.catch_calls[inst->taken()] = CatchCall::PHP;
  v = done;

  auto const dst = dstLoc(env, inst, 0);
  v << defvmret{dst.reg(0), dst.reg(1)};
}
Пример #10
0
TCA emitFunctionEnterHelper(CodeBlock& cb, UniqueStubs& us) {
  alignJmpTarget(cb);

  auto const start = vwrap(cb, [&] (Vout& v) {
    auto const ar = v.makeReg();

    v << copy{rvmfp(), ar};

    // Fully set up the call frame for the stub.  We can't skip this like we do
    // in other stubs because we need the return IP for this frame in the %rbp
    // chain, in order to find the proper fixup for the VMRegAnchor in the
    // intercept handler.
    v << stublogue{true};
    v << copy{rsp(), rvmfp()};

    // When we call the event hook, it might tell us to skip the callee
    // (because of fb_intercept).  If that happens, we need to return to the
    // caller, but the handler will have already popped the callee's frame.
    // So, we need to save these values for later.
    v << pushm{ar[AROFF(m_savedRip)]};
    v << pushm{ar[AROFF(m_sfp)]};

    v << copy2{ar, v.cns(EventHook::NormalFunc), rarg(0), rarg(1)};

    bool (*hook)(const ActRec*, int) = &EventHook::onFunctionCall;
    v << call{TCA(hook)};
  });

  us.functionEnterHelperReturn = vwrap2(cb, [&] (Vout& v, Vout& vcold) {
    auto const sf = v.makeReg();
    v << testb{rret(), rret(), sf};

    unlikelyIfThen(v, vcold, CC_Z, sf, [&] (Vout& v) {
      auto const saved_rip = v.makeReg();

      // The event hook has already cleaned up the stack and popped the
      // callee's frame, so we're ready to continue from the original call
      // site.  We just need to grab the fp/rip of the original frame that we
      // saved earlier, and sync rvmsp().
      v << pop{rvmfp()};
      v << pop{saved_rip};

      // Drop our call frame; the stublogue{} instruction guarantees that this
      // is exactly 16 bytes.
      v << lea{rsp()[16], rsp()};

      // Sync vmsp and return to the caller.  This unbalances the return stack
      // buffer, but if we're intercepting, we probably don't care.
      v << load{rvmtl()[rds::kVmspOff], rvmsp()};
      v << jmpr{saved_rip};
    });

    // Skip past the stuff we saved for the intercept case.
    v << lea{rsp()[16], rsp()};

    // Restore rvmfp() and return to the callee's func prologue.
    v << stubret{RegSet(), true};
  });

  return start;
}
Пример #11
0
TCA emitFreeLocalsHelpers(CodeBlock& cb, UniqueStubs& us) {
  // The address of the first local is passed in the second argument register.
  // We use the third and fourth as scratch registers.
  auto const local = rarg(1);
  auto const last = rarg(2);
  auto const type = rarg(3);
  CGMeta fixups;

  // This stub is very hot; keep it cache-aligned.
  align(cb, &fixups, Alignment::CacheLine, AlignContext::Dead);
  auto const release = emitDecRefHelper(cb, fixups, local, type, local | last);

  auto const decref_local = [&] (Vout& v) {
    auto const sf = v.makeReg();

    // We can't do a byte load here---we have to sign-extend since we use
    // `type' as a 32-bit array index to the destructor table.
    v << loadzbl{local[TVOFF(m_type)], type};
    emitCmpTVType(v, sf, KindOfRefCountThreshold, type);

    ifThen(v, CC_G, sf, [&] (Vout& v) {
      v << call{release, arg_regs(3)};
    });
  };

  auto const next_local = [&] (Vout& v) {
    v << addqi{static_cast<int>(sizeof(TypedValue)),
               local, local, v.makeReg()};
  };

  alignJmpTarget(cb);

  us.freeManyLocalsHelper = vwrap(cb, fixups, [&] (Vout& v) {
    // We always unroll the final `kNumFreeLocalsHelpers' decrefs, so only loop
    // until we hit that point.
    v << lea{rvmfp()[localOffset(kNumFreeLocalsHelpers - 1)], last};

    doWhile(v, CC_NZ, {},
      [&] (const VregList& in, const VregList& out) {
        auto const sf = v.makeReg();

        decref_local(v);
        next_local(v);
        v << cmpq{local, last, sf};
        return sf;
      }
    );
  });

  for (auto i = kNumFreeLocalsHelpers - 1; i >= 0; --i) {
    us.freeLocalsHelpers[i] = vwrap(cb, [&] (Vout& v) {
      decref_local(v);
      if (i != 0) next_local(v);
    });
  }

  // All the stub entrypoints share the same ret.
  vwrap(cb, fixups, [] (Vout& v) { v << ret{}; });

  // This stub is hot, so make sure to keep it small.
  // Alas, we have more work to do in this under Windows,
  // so we can't be this small :(
#ifndef _WIN32
  always_assert(Stats::enabled() ||
                (cb.frontier() - release <= 4 * x64::cache_line_size()));
#endif

  fixups.process(nullptr);
  return release;
}
Пример #12
0
TCA emitFreeLocalsHelpers(CodeBlock& cb, DataBlock& data, UniqueStubs& us) {
  // The address of the first local is passed in the second argument register.
  // We use the third and fourth as scratch registers.
  auto const local = rarg(1);
  auto const last = rarg(2);
  auto const type = rarg(3);
  CGMeta fixups;

  // This stub is very hot; keep it cache-aligned.
  align(cb, &fixups, Alignment::CacheLine, AlignContext::Dead);
  auto const release =
    emitDecRefHelper(cb, data, fixups, local, type, local | last);

  auto const decref_local = [&] (Vout& v) {
    auto const sf = v.makeReg();

    // We can't do a byte load here---we have to sign-extend since we use
    // `type' as a 32-bit array index to the destructor table.
    v << loadzbl{local[TVOFF(m_type)], type};
    emitCmpTVType(v, sf, KindOfRefCountThreshold, type);

    ifThen(v, CC_G, sf, [&] (Vout& v) {
      auto const dword_size = sizeof(int64_t);

      // saving return value on the stack, but keeping it 16-byte aligned
      v << mflr{rfuncln()};
      v << lea {rsp()[-2 * dword_size], rsp()};
      v << store{rfuncln(), rsp()[0]};

      v << call{release, arg_regs(3)};

      // restore the return value from the stack
      v << load{rsp()[0], rfuncln()};
      v << lea {rsp()[2 * dword_size], rsp()};
      v << mtlr{rfuncln()};
    });
  };

  auto const next_local = [&] (Vout& v) {
    v << addqi{static_cast<int>(sizeof(TypedValue)),
               local, local, v.makeReg()};
  };

  alignJmpTarget(cb);

  us.freeManyLocalsHelper = vwrap(cb, data, fixups, [&] (Vout& v) {
    // We always unroll the final `kNumFreeLocalsHelpers' decrefs, so only loop
    // until we hit that point.
    v << lea{rvmfp()[localOffset(kNumFreeLocalsHelpers - 1)], last};

    doWhile(v, CC_NZ, {},
      [&] (const VregList& in, const VregList& out) {
        auto const sf = v.makeReg();

        decref_local(v);
        next_local(v);
        v << cmpq{local, last, sf};
        return sf;
      }
    );
  });

  for (auto i = kNumFreeLocalsHelpers - 1; i >= 0; --i) {
    us.freeLocalsHelpers[i] = vwrap(cb, data, [&] (Vout& v) {
      decref_local(v);
      if (i != 0) next_local(v);
    });
  }

  // All the stub entrypoints share the same ret.
  vwrap(cb, data, fixups, [] (Vout& v) { v << ret{}; });

  // This stub is hot, so make sure to keep it small.
#if 0
  // TODO(gut): Currently this assert fails.
  // Take a closer look when looking at performance
  always_assert(Stats::enabled() ||
                (cb.frontier() - release <= 4 * cache_line_size()));
#endif

  fixups.process(nullptr);
  return release;
}
Пример #13
0
TCA emitFreeLocalsHelpers(CodeBlock& cb, DataBlock& data, UniqueStubs& us) {
  // The address of the first local is passed in the second argument register.
  // We use the third and fourth as scratch registers.
  auto const local = rarg(1);
  auto const last = rarg(2);
  auto const type = rarg(3);
  CGMeta fixups;
  TCA freeLocalsHelpers[kNumFreeLocalsHelpers];
  TCA freeManyLocalsHelper;

  // This stub is very hot; keep it cache-aligned.
  align(cb, &fixups, Alignment::CacheLine, AlignContext::Dead);
  auto const release =
    emitDecRefHelper(cb, data, fixups, local, type, local | last);

  auto const decref_local = [&] (Vout& v) {
    auto const sf = v.makeReg();

    // We can't use emitLoadTVType() here because it does a byte load, and we
    // need to sign-extend since we use `type' as a 32-bit array index to the
    // destructor table.
    v << loadzbl{local[TVOFF(m_type)], type};
    emitCmpTVType(v, sf, KindOfRefCountThreshold, type);

    ifThen(v, CC_G, sf, [&] (Vout& v) {
      v << call{release, arg_regs(3)};
    });
  };

  auto const next_local = [&] (Vout& v) {
    v << addqi{static_cast<int>(sizeof(TypedValue)),
               local, local, v.makeReg()};
  };

  alignJmpTarget(cb);

  freeManyLocalsHelper = vwrap(cb, data, [&] (Vout& v) {
    // We always unroll the final `kNumFreeLocalsHelpers' decrefs, so only loop
    // until we hit that point.
    v << lea{rvmfp()[localOffset(kNumFreeLocalsHelpers - 1)], last};

    // Set up frame linkage to avoid an indirect fixup.
    v << copy{rsp(), rfp()};

    doWhile(v, CC_NZ, {},
      [&] (const VregList& in, const VregList& out) {
        auto const sf = v.makeReg();

        decref_local(v);
        next_local(v);
        v << cmpq{local, last, sf};
        return sf;
      }
    );
  });

  for (auto i = kNumFreeLocalsHelpers - 1; i >= 0; --i) {
    freeLocalsHelpers[i] = vwrap(cb, data, [&] (Vout& v) {
      decref_local(v);
      if (i != 0) next_local(v);
    });
  }

  // All the stub entrypoints share the same ret.
  vwrap(cb, data, fixups, [] (Vout& v) {
    v << popp{rfp(), rlr()};
    v << ret{};
  });

  // Create a table of branches
  us.freeManyLocalsHelper = vwrap(cb, data, [&] (Vout& v) {
    v << pushp{rlr(), rfp()};

    // rvmfp() is needed by the freeManyLocalsHelper stub above, so frame
    // linkage setup is deferred until after its use in freeManyLocalsHelper.
    v << jmpi{freeManyLocalsHelper};
  });
  for (auto i = kNumFreeLocalsHelpers - 1; i >= 0; --i) {
    us.freeLocalsHelpers[i] = vwrap(cb, data, [&] (Vout& v) {
      // We set up frame linkage to avoid an indirect fixup.
      v << pushp{rlr(), rfp()};
      v << copy{rsp(), rfp()};
      v << jmpi{freeLocalsHelpers[i]};
    });
  }

  // FIXME: This stub is hot, so make sure to keep it small.
#if 0
  always_assert(Stats::enabled() ||
                (cb.frontier() - release <= 4 * x64::cache_line_size()));
#endif

  fixups.process(nullptr);
  return release;
}