Exemple #1
PhysReg forceAlloc(const SSATmp& tmp) {
  if (tmp.type() <= TBottom) return InvalidReg;

  auto inst = tmp.inst();
  auto opc = inst->op();

  auto const forceStkPtrs = [&] {
    switch (arch()) {
    case Arch::X64: return false;
    case Arch::ARM: return true;
    case Arch::PPC64: not_implemented(); break;

  if (forceStkPtrs && tmp.isA(TStkPtr)) {
      opc == DefSP ||
      opc == Mov,
      "unexpected StkPtr dest from {}",
    return rvmsp();

  // LdContActRec and LdAFWHActRec, loading a generator's AR, is the only time
  // we have a pointer to an AR that is not in rvmfp().
  if (opc != LdContActRec && opc != LdAFWHActRec && tmp.isA(TFramePtr)) {
    return rvmfp();

  return InvalidReg;
Exemple #2
SSATmp* IRBuilder::preOptimizeCheckType(IRInstruction* inst) {
  SSATmp* src  = inst->src(0);
  auto const oldType = src->type();
  auto const newType = inst->typeParam();

  if (oldType.isBoxed() && newType.isBoxed() &&
      (oldType.not(newType) || newType < oldType)) {
    /* This CheckType serves to update the inner type hint for a boxed value,
     * which requires no runtime work. This depends on the type being boxed,
     * and constraining it with DataTypeCountness will do it.  */
    constrainValue(src, DataTypeCountness);
    return gen(AssertType, newType, src);

  if (oldType.not(newType)) {
    /* This check will always fail. It's probably due to an incorrect
     * prediction. Generate a Jmp, and return src because
     * following instructions may depend on the output of CheckType
     * (they'll be DCEd later). Note that we can't use convertToJmp
     * because the return value isn't nullptr, so the original
     * instruction won't be inserted into the stream. */
    gen(Jmp, inst->taken());
    return src;

  if (newType >= oldType) {
    /* The type of the src is the same or more refined than type, so the guard
     * is unnecessary. */
    return src;

  return nullptr;
 * Stores a ref (boxed value) to a local. Also handles unsetting a local.
void TraceBuilder::genBindLoc(uint32_t id,
                              SSATmp* newValue,
                              bool doRefCount /* = true */) {
  Type trackedType = getLocalType(id);
  SSATmp* prevValue = 0;
  if (trackedType == Type::None) {
    if (doRefCount) {
      prevValue = gen(LdLoc, Type::Gen, LocalId(id), m_fpValue);
  } else {
    prevValue = getLocalValue(id);
    assert(prevValue == nullptr || prevValue->type() == trackedType);
    if (prevValue == newValue) {
      // Silent store: local already contains value being stored
      // NewValue needs to be decref'ed
      if (!trackedType.notCounted() && doRefCount) {
        gen(DecRef, prevValue);
    if (trackedType.maybeCounted() && !prevValue && doRefCount) {
      prevValue = gen(LdLoc, trackedType, LocalId(id), m_fpValue);
  bool genStoreType = true;
  if ((trackedType.isBoxed() && newValue->type().isBoxed()) ||
      (trackedType == newValue->type() && !trackedType.isString())) {
    // no need to store type with local value
    genStoreType = false;
  gen(genStoreType ? StLoc : StLocNT, LocalId(id), m_fpValue, newValue);
  if (trackedType.maybeCounted() && doRefCount) {
    gen(DecRef, prevValue);
SSATmp* TraceBuilder::genLdLocAsCell(uint32_t id, Trace* exitTrace) {
  SSATmp*    tmp = genLdLoc(id);
  Type type = tmp->type();
  assert(type.isBoxed() || type.notBoxed());
  if (!type.isBoxed()) {
    return tmp;
  // Unbox tmp into a cell via a LdRef
  return gen(LdRef, type.innerType(), exitTrace, tmp);
 * Store a cell value to a local that might be boxed.
SSATmp* TraceBuilder::genStLoc(uint32_t id,
                               SSATmp* newValue,
                               bool doRefCount,
                               bool genStoreType,
                               Trace* exit) {
   * If prior value of local is a cell, then  re-use genBindLoc.
   * Otherwise, if prior value of local is a ref:
   * prevLocValue = LdLoc<T>{id} fp
   *    prevValue = LdRef [prevLocValue]
   *       newRef = StRef [prevLocValue], newValue
   * DecRef prevValue
   * -- track local value in newRef
  Type trackedType = getLocalType(id);
  assert(trackedType != Type::None);  // tracelet guards guarantee a type
  if (trackedType.notBoxed()) {
    SSATmp* retVal = doRefCount ? gen(IncRef, newValue) : newValue;
    genBindLoc(id, newValue, doRefCount);
    return retVal;
  SSATmp* prevRef = getLocalValue(id);
  assert(prevRef == nullptr || prevRef->type() == trackedType);
  // prevRef is a ref
  if (prevRef == nullptr) {
    // prevRef = ldLoc
    prevRef = gen(LdLoc, trackedType, LocalId(id), m_fpValue);
  SSATmp* prevValue = nullptr;
  if (doRefCount) {
    Type innerType = trackedType.innerType();
    prevValue = gen(LdRef, innerType, exit, prevRef);
  // stref [prevRef] = t1
  Opcode opc = genStoreType ? StRef : StRefNT;
  gen(opc, prevRef, newValue);

  SSATmp* retVal = newValue;
  if (doRefCount) {
    retVal = gen(IncRef, newValue);
    gen(DecRef, prevValue);
  return retVal;
SSATmp* TraceBuilder::genBoxLoc(uint32_t id) {
  SSATmp* prevValue  = genLdLoc(id);
  Type prevType = prevValue->type();
  // Don't box if local's value already boxed
  if (prevType.isBoxed()) {
    return prevValue;
  // The Box helper requires us to incref the values its boxing, but in
  // this case we don't need to incref prevValue because we are simply
  // transfering its refcount from the local to the box.
  if (prevValue->isA(Type::Uninit)) {
    // No box can ever contain Uninit, so promote it to InitNull here.
    prevValue = genDefInitNull();
  SSATmp* newValue = gen(Box, prevValue);
  gen(StLoc, LocalId(id), m_fpValue, newValue);
  return newValue;
Exemple #7
void CodeGenerator::cgIncRef(IRInstruction* inst) {
  SSATmp* src = inst->src(0);
  auto loc = srcLoc(0);
  Type type = src->type();
  if (type.notCounted()) return;

  auto increfMaybeStatic = [&](Vout& v) {
    auto base = loc.reg(0);
    auto rCount = v.makeReg();
    v << loadl{base[FAST_REFCOUNT_OFFSET], rCount};
    if (!type.needsStaticBitCheck()) {
      auto count1 = v.makeReg();
      v << addli{1, rCount, count1, v.makeReg()};
      v << storel{count1, base[FAST_REFCOUNT_OFFSET]};
    } else {
      auto const sf = v.makeReg();
      v << cmpli{0, rCount, sf};
      static_assert(UncountedValue < 0 && StaticValue < 0, "");
      ifThen(v, CC_GE, sf, [&](Vout& v) {
        auto count1 = v.makeReg();
        v << addli{1, rCount, count1, v.makeReg()};
        v << storel{count1, base[FAST_REFCOUNT_OFFSET]};

  auto& v = vmain();
  if (type.isKnownDataType()) {
  } else {
    auto const sf = v.makeReg();
    v << cmpli{KindOfRefCountThreshold, loc.reg(1), sf};
    ifThen(v, CC_G, sf, [&](Vout& v) { increfMaybeStatic(v); });
Exemple #8
 * reoptimize() runs a trace through a second pass of TraceBuilder
 * optimizations, like this:
 *   reset state.
 *   move all blocks to a temporary list.
 *   compute immediate dominators.
 *   for each block in trace order:
 *     if we have a snapshot state for this block:
 *       clear cse entries that don't dominate this block.
 *       use snapshot state.
 *     move all instructions to a temporary list.
 *     for each instruction:
 *       optimizeWork - do CSE and simplify again
 *       if not simplified:
 *         append existing instruction and update state.
 *       else:
 *         if the instruction has a result, insert a mov from the
 *         simplified tmp to the original tmp and discard the instruction.
 *     if the last conditional branch was turned into a jump, remove the
 *     fall-through edge to the next block.
void TraceBuilder::reoptimize() {
  FTRACE(5, "ReOptimize:vvvvvvvvvvvvvvvvvvvv\n");
  SCOPE_EXIT { FTRACE(5, "ReOptimize:^^^^^^^^^^^^^^^^^^^^\n"); };

  m_enableSimplification = RuntimeOption::EvalHHIRSimplification;
  if (!m_state.enableCse() && !m_enableSimplification) return;
  m_inReoptimize = true;

  BlockList sortedBlocks = rpoSortCfg(m_unit);
  auto const idoms = findDominators(m_unit, sortedBlocks);

  auto& traceBlocks = m_curTrace->blocks();
  BlockList blocks(traceBlocks.begin(), traceBlocks.end());
  for (auto* block : blocks) {
    assert(block->trace() == m_curTrace);
    FTRACE(5, "Block: {}\n", block->id());


    auto instructions = std::move(block->instrs());
    while (!instructions.empty()) {
      auto *inst = &instructions.front();

      // merging state looks at the current marker, and optimizeWork
      // below may create new instructions. Use the marker from this
      // instruction.

      auto const tmp = optimizeWork(inst, idoms); // Can generate new instrs!
      if (!tmp) {
        // Could not optimize; keep the old instruction
        appendInstruction(inst, block);
      SSATmp* dst = inst->dst();
      if (dst->type() != Type::None && dst != tmp) {
        // The result of optimization has a different destination than the inst.
        // Generate a mov(tmp->dst) to get result into dst. If we get here then
        // assume the last instruction in the block isn't a guard. If it was,
        // we would have to insert the mov on the fall-through edge.
        assert(block->empty() || !block->back().isBlockEnd());
        IRInstruction* mov = m_unit.mov(dst, tmp, inst->marker());
        appendInstruction(mov, block);
      // Not re-adding inst; remove the inst->taken edge
      if (inst->taken()) inst->setTaken(nullptr);

    if (block->empty()) {
      // If all the instructions in the block were optimized away, remove it
      // from the trace.
      auto it = traceBlocks.end();
      assert(*it == block);
    } else {
      if (block->back().isTerminal()) {
        // Could have converted a conditional branch to Jmp; clear next.
void LinearScan::allocRegToInstruction(InstructionList::iterator it) {
  IRInstruction* inst = &*it;
  dumpIR<IRInstruction, kExtraLevel>(inst, "allocating to instruction");

  // Reload all source operands if necessary.
  // Mark registers as unpinned.
  for (int regNo = 0; regNo < kNumRegs; ++regNo) {
    m_regs[regNo].m_pinned = false;
  smart::vector<bool> needsReloading(inst->numSrcs(), true);
  for (uint32_t i = 0; i < inst->numSrcs(); ++i) {
    SSATmp* tmp = inst->src(i);
    int32_t slotId = m_spillSlots[tmp];
    if (slotId == -1) {
      needsReloading[i] = false;
    } else if ((tmp = m_slots[slotId].latestReload)) {
      needsReloading[i] = false;
      inst->setSrc(i, tmp);
    if (!needsReloading[i]) {
      for (int i = 0, n = m_allocInfo[tmp].numAllocatedRegs(); i < n; ++i) {
        m_regs[int(m_allocInfo[tmp].reg(i))].m_pinned = true;
  for (uint32_t i = 0; i < inst->numSrcs(); ++i) {
    if (needsReloading[i]) {
      SSATmp* tmp = inst->src(i);
      int32_t slotId = m_spillSlots[tmp];
      // <tmp> is spilled, and not reloaded.
      // Therefore, We need to reload the value into a new SSATmp.

      // Insert the Reload instruction.
      SSATmp* spillTmp = m_slots[slotId].spillTmp;
      IRInstruction* reload = m_unit.gen(Reload, inst->marker(),
      inst->block()->insert(it, reload);

      // Create <reloadTmp> which inherits <tmp>'s slot ID and
      // <spillTmp>'s last use ID.
      // Replace <tmp> with <reloadTmp> in <inst>.
      SSATmp* reloadTmp = reload->dst();
      m_uses[reloadTmp].lastUse = m_uses[spillTmp].lastUse;
      m_spillSlots[reloadTmp] = slotId;
      inst->setSrc(i, reloadTmp);
      // reloadTmp and tmp share the same type.  Since it was spilled, it
      // must be using its entire needed-count of registers.
      assert(reloadTmp->type() == tmp->type());
      for (int locIndex = 0; locIndex < tmp->numNeededRegs();) {
        locIndex += allocRegToTmp(reloadTmp, locIndex);
      // Remember this reload tmp in case we can reuse it in later blocks.
      m_slots[slotId].latestReload = reloadTmp;
      dumpIR<IRInstruction, kExtraLevel>(reload, "created reload");

  // Update next native.
  if (nextNative() == inst) {

  Range<SSATmp*> dsts = inst->dsts();
  if (dsts.empty()) return;

  Opcode opc = inst->op();
  if (opc == DefMIStateBase) {
    assignRegToTmp(&m_regs[int(rsp)], &dsts[0], 0);

  for (SSATmp& dst : dsts) {
    for (int numAllocated = 0, n = dst.numNeededRegs(); numAllocated < n; ) {
      // LdRaw, loading a generator's embedded AR, is the only time we have a
      // pointer to an AR that is not in rVmFp.
      const bool abnormalFramePtr =
        (opc == LdRaw &&
          inst->src(1)->getValInt() == RawMemSlot::ContARPtr);

      // Note that the point of StashGeneratorSP is to save a StkPtr
      // somewhere other than rVmSp.  (TODO(#2288359): make rbx not
      // special.)
      const bool abnormalStkPtr = opc == StashGeneratorSP;

      if (!abnormalStkPtr && dst.isA(Type::StkPtr)) {
        assert(opc == DefSP ||
               opc == ReDefSP ||
               opc == ReDefGeneratorSP ||
               opc == PassSP ||
               opc == DefInlineSP ||
               opc == Call ||
               opc == CallArray ||
               opc == SpillStack ||
               opc == SpillFrame ||
               opc == CufIterSpillFrame ||
               opc == ExceptionBarrier ||
               opc == RetAdjustStack ||
               opc == InterpOne ||
               opc == InterpOneCF ||
               opc == GenericRetDecRefs ||
               opc == CheckStk ||
               opc == GuardStk ||
               opc == AssertStk ||
               opc == CastStk ||
               opc == CoerceStk ||
               opc == SideExitGuardStk  ||
        assignRegToTmp(&m_regs[int(rVmSp)], &dst, 0);
      if (!abnormalFramePtr && dst.isA(Type::FramePtr)) {
        assignRegToTmp(&m_regs[int(rVmFp)], &dst, 0);

      // Generally speaking, StkPtrs are pretty special due to
      // tracelet ABI registers. Keep track here of the allowed uses
      // that don't use the above allocation.
      assert(!dst.isA(Type::FramePtr) || abnormalFramePtr);
      assert(!dst.isA(Type::StkPtr) || abnormalStkPtr);

      if (!RuntimeOption::EvalHHIRDeadCodeElim || m_uses[dst].lastUse != 0) {
        numAllocated += allocRegToTmp(&dst, numAllocated);
      } else {
  if (!RuntimeOption::EvalHHIRDeadCodeElim) {
    // if any outputs were unused, free regs now.
Exemple #10
 * reoptimize() runs a trace through a second pass of TraceBuilder
 * optimizations, like this:
 *   reset state.
 *   move all blocks to a temporary list.
 *   compute immediate dominators.
 *   for each block in trace order:
 *     if we have a snapshot state for this block:
 *       clear cse entries that don't dominate this block.
 *       use snapshot state.
 *     move all instructions to a temporary list.
 *     for each instruction:
 *       optimizeWork - do CSE and simplify again
 *       if not simplified:
 *         append existing instruction and update state.
 *       else:
 *         if the instruction has a result, insert a mov from the
 *         simplified tmp to the original tmp and discard the instruction.
 *     if the last conditional branch was turned into a jump, remove the
 *     fall-through edge to the next block.
void TraceBuilder::reoptimize() {
  FTRACE(5, "ReOptimize:vvvvvvvvvvvvvvvvvvvv\n");
  SCOPE_EXIT { FTRACE(5, "ReOptimize:^^^^^^^^^^^^^^^^^^^^\n"); };
  assert(m_curTrace == m_mainTrace.get());

  m_enableCse = RuntimeOption::EvalHHIRCse;
  m_enableSimplification = RuntimeOption::EvalHHIRSimplification;
  if (!m_enableCse && !m_enableSimplification) return;
  if (m_mainTrace->blocks().size() >
      RuntimeOption::EvalHHIRSimplificationMaxBlocks) {
    // TODO CSEHash::filter is very slow for large block sizes
    // t2135219 should address that

  BlockList sortedBlocks = rpoSortCfg(m_mainTrace.get(), m_irFactory);
  auto const idoms = findDominators(sortedBlocks);

  auto blocks = std::move(m_mainTrace->blocks());
  while (!blocks.empty()) {
    Block* block = blocks.front();
    assert(block->trace() == m_mainTrace.get());
    FTRACE(5, "Block: {}\n", block->id());

    if (m_snapshots[block]) {

    auto instructions = std::move(block->instrs());
    while (!instructions.empty()) {
      auto *inst = &instructions.front();

      // last attempt to elide ActRecs, if we still need the InlineFPAnchor
      // it will be added back to the trace when we re-add instructions that
      // rely on it
      if (inst->op() == InlineFPAnchor) {

      // merging state looks at the current marker, and optimizeWork
      // below may create new instructions. Use the marker from this
      // instruction.

      auto const tmp = optimizeWork(inst, idoms); // Can generate new instrs!
      if (!tmp) {
        // Could not optimize; keep the old instruction
        appendInstruction(inst, block);
      SSATmp* dst = inst->dst();
      if (dst->type() != Type::None && dst != tmp) {
        // The result of optimization has a different destination than the inst.
        // Generate a mov(tmp->dst) to get result into dst. If we get here then
        // assume the last instruction in the block isn't a guard. If it was,
        // we would have to insert the mov on the fall-through edge.
        assert(block->empty() || !block->back()->isBlockEnd());
        IRInstruction* mov = m_irFactory.mov(dst, tmp, inst->marker());
        appendInstruction(mov, block);
      // Not re-adding inst; remove the inst->taken edge
      if (inst->taken()) inst->setTaken(nullptr);
    if (block->back()->isTerminal()) {
      // Could have converted a conditional branch to Jmp; clear next.
    } else {
      // if the last instruction was a branch, we already saved state
      // for the target in updateTrackedState().  Now save state for
      // the fall-through path.
Exemple #11
void CodeGenerator::cgGuardRefs(IRInstruction* inst) {
  assert(inst->numSrcs() == 5);

  SSATmp* funcPtrTmp = inst->src(0);
  SSATmp* nParamsTmp = inst->src(1);
  SSATmp* firstBitNumTmp = inst->src(2);
  SSATmp* mask64Tmp  = inst->src(3);
  SSATmp* vals64Tmp  = inst->src(4);

  // Get values in place
  assert(funcPtrTmp->type() == Type::Func);
  auto funcPtrReg = x2a(curOpd(funcPtrTmp).reg());

  assert(nParamsTmp->type() == Type::Int);
  auto nParamsReg = x2a(curOpd(nParamsTmp).reg());
  assert(nParamsReg.IsValid() || nParamsTmp->isConst());

  assert(firstBitNumTmp->isConst() && firstBitNumTmp->type() == Type::Int);
  uint32_t firstBitNum = (uint32_t)(firstBitNumTmp->getValInt());

  assert(mask64Tmp->type() == Type::Int);
  auto mask64Reg = x2a(curOpd(mask64Tmp).reg());
  assert(mask64Reg.IsValid() || mask64Tmp->inst()->op() != LdConst);
  uint64_t mask64 = mask64Tmp->getValInt();

  assert(vals64Tmp->type() == Type::Int);
  auto vals64Reg = x2a(curOpd(vals64Tmp).reg());
  assert(vals64Reg.IsValid() || vals64Tmp->inst()->op() != LdConst);
  uint64_t vals64 = vals64Tmp->getValInt();
  assert((vals64 & mask64) == vals64);

  auto const destSK = SrcKey(curFunc(), m_unit.bcOff());
  auto const destSR = m_tx64->getSrcRec(destSK);

  auto thenBody = [&] {
    auto bitsOff = sizeof(uint64_t) * (firstBitNum / 64);
    auto cond = CC_NE;
    auto bitsPtrReg = rAsm;

    if (firstBitNum == 0) {
      bitsOff = Func::refBitValOff();
      bitsPtrReg = funcPtrReg;
    } else {
      m_as.    Ldr  (bitsPtrReg, funcPtrReg[Func::sharedOff()]);
      bitsOff -= sizeof(uint64_t);

    // Don't need the bits pointer after this point
    auto bitsReg = rAsm;
    // Load the bits
    m_as.    Ldr  (bitsReg, bitsPtrReg[bitsOff]);

    // Mask the bits. There are restrictions on what can be encoded as an
    // immediate in ARM's logical instructions, and if they're not met, we'll
    // have to use a register.
    if (vixl::Assembler::IsImmLogical(mask64, vixl::kXRegSize)) {
      m_as.  And  (bitsReg, bitsReg, mask64);
    } else {
      if (mask64Reg.IsValid()) {
        m_as.And  (bitsReg, bitsReg, mask64Reg);
      } else {
        m_as.Mov  (rAsm2, mask64);
        m_as.And  (bitsReg, bitsReg, rAsm2);

    // Now do the compare. There are also restrictions on immediates in
    // arithmetic instructions (of which Cmp is one; it's just a subtract that
    // sets flags), so same deal as with the mask immediate above.
    if (vixl::Assembler::IsImmArithmetic(vals64)) {
      m_as.  Cmp  (bitsReg, vals64);
    } else {
      if (vals64Reg.IsValid()) {
        m_as.Cmp  (bitsReg, vals64Reg);
      } else {
        m_as.Mov  (rAsm2, vals64);
        m_as.Cmp  (bitsReg, rAsm2);
    destSR->emitFallbackJump(m_mainCode, cond);

  if (firstBitNum == 0) {
    // This is the first 64 bits. No need to check
    // nParams.
  } else {
    // Check number of args...
    m_as.    Cmp   (nParamsReg, firstBitNum);

    if (vals64 != 0 && vals64 != mask64) {
      // If we're beyond nParams, then either all params
      // are refs, or all params are non-refs, so if vals64
      // isn't 0 and isnt mask64, there's no possibility of
      // a match
      destSR->emitFallbackJump(m_mainCode, CC_LE);
    } else {
      ifThenElse(m_as, vixl::gt, thenBody, /* else */ [&] {
          //   If not special builtin...
          m_as.  Ldr  (rAsm, funcPtrReg[Func::attrsOff()]);
          m_as.  Tst  (rAsm, AttrVariadicByRef);
          destSR->emitFallbackJump(m_mainCode, vals64 ? CC_Z : CC_NZ);
Exemple #12
 * reoptimize() runs a trace through a second pass of TraceBuilder
 * optimizations, like this:
 *   reset state.
 *   move all blocks to a temporary list.
 *   compute immediate dominators.
 *   for each block in trace order:
 *     if we have a snapshot state for this block:
 *       clear cse entries that don't dominate this block.
 *       use snapshot state.
 *     move all instructions to a temporary list.
 *     for each instruction:
 *       optimizeWork - do CSE and simplify again
 *       if not simplified:
 *         append existing instruction and update state.
 *       else:
 *         if the instruction has a result, insert a mov from the
 *         simplified tmp to the original tmp and discard the instruction.
 *     if the last conditional branch was turned into a jump, remove the
 *     fall-through edge to the next block.
void TraceBuilder::reoptimize() {
  m_enableCse = RuntimeOption::EvalHHIRCse;
  m_enableSimplification = RuntimeOption::EvalHHIRSimplification;
  if (!m_enableCse && !m_enableSimplification) return;
  if (m_trace->getBlocks().size() >
      RuntimeOption::EvalHHIRSimplificationMaxBlocks) {
    // TODO CSEHash::filter is very slow for large block sizes
    // t2135219 should address that
  BlockList sortedBlocks = sortCfg(m_trace.get(), m_irFactory);
  IdomVector idoms = findDominators(sortedBlocks);
  auto blocks = std::move(m_trace->getBlocks());
  while (!blocks.empty()) {
    Block* block = blocks.front();
    assert(block->getTrace() == m_trace.get());
    if (m_snapshots[block]) {
      m_cseHash.filter(block, idoms);
    auto instructions = std::move(block->getInstrs());
    while (!instructions.empty()) {
      auto *inst = &instructions.front();
      SSATmp* tmp = optimizeWork(inst); // Can generate new instrs!
      if (!tmp) {
        // Could not optimize; keep the old instruction
        appendInstruction(inst, block);
      SSATmp* dst = inst->getDst();
      if (dst->type() != Type::None && dst != tmp) {
        // The result of optimization has a different destination than the inst.
        // Generate a mov(tmp->dst) to get result into dst. If we get here then
        // assume the last instruction in the block isn't a guard. If it was,
        // we would have to insert the mov on the fall-through edge.
        IRInstruction* mov = m_irFactory.mov(dst, tmp);
        appendInstruction(mov, block);
      // Not re-adding inst; remove the inst->taken edge
      if (inst->getTaken()) inst->setTaken(nullptr);
    if (block->back()->isTerminal()) {
      // Could have converted a conditional branch to Jmp; clear next.
    } else {
      // if the last instruction was a branch, we already saved state
      // for the target in updateTrackedState().  Now save state for
      // the fall-through path.
Exemple #13
 * reoptimize() runs a trace through a second pass of TraceBuilder
 * optimizations, like this:
 *   reset state.
 *   move all blocks to a temporary list.
 *   compute immediate dominators.
 *   for each block in trace order:
 *     if we have a snapshot state for this block:
 *       clear cse entries that don't dominate this block.
 *       use snapshot state.
 *     move all instructions to a temporary list.
 *     for each instruction:
 *       optimizeWork - do CSE and simplify again
 *       if not simplified:
 *         append existing instruction and update state.
 *       else:
 *         if the instruction has a result, insert a mov from the
 *         simplified tmp to the original tmp and discard the instruction.
 *     if the last conditional branch was turned into a jump, remove the
 *     fall-through edge to the next block.
void TraceBuilder::reoptimize() {
  FTRACE(5, "ReOptimize:vvvvvvvvvvvvvvvvvvvv\n");
  SCOPE_EXIT { FTRACE(5, "ReOptimize:^^^^^^^^^^^^^^^^^^^^\n"); };

  m_enableSimplification = RuntimeOption::EvalHHIRSimplification;
  if (!m_state.enableCse() && !m_enableSimplification) return;

  BlockList sortedBlocks = rpoSortCfg(m_unit);
  auto const idoms = findDominators(m_unit, sortedBlocks);

  for (auto* block : rpoSortCfg(m_unit)) {
    FTRACE(5, "Block: {}\n", block->id());

    m_curBlock = block;

    auto instructions = std::move(block->instrs());
    while (!instructions.empty()) {
      auto *inst = &instructions.front();

      // merging state looks at the current marker, and optimizeWork
      // below may create new instructions. Use the marker from this
      // instruction.

      auto const tmp = optimizeWork(inst, idoms); // Can generate new instrs!
      if (!tmp) {
        // Could not optimize; keep the old instruction

      SSATmp* dst = inst->dst();
      if (dst->type() != Type::None && dst != tmp) {
        // The result of optimization has a different destination than the inst.
        // Generate a mov(tmp->dst) to get result into dst. If we get here then
        // assume the last instruction in the block isn't a guard. If it was,
        // we would have to insert the mov on the fall-through edge.
        assert(block->empty() || !block->back().isBlockEnd());
        IRInstruction* mov = m_unit.mov(dst, tmp, inst->marker());

      if (inst->isBlockEnd()) {
        // Not re-adding inst; replace it with a jump to the next block.
        auto next = inst->next();
        appendInstruction(m_unit.gen(Jmp, inst->marker(), next));

Exemple #14
void CodeGenerator::cgGuardRefs(IRInstruction* inst) {
  assert(inst->numSrcs() == 5);

  SSATmp* funcPtrTmp = inst->src(0);
  SSATmp* nParamsTmp = inst->src(1);
  SSATmp* firstBitNumTmp = inst->src(2);
  SSATmp* mask64Tmp  = inst->src(3);
  SSATmp* vals64Tmp  = inst->src(4);

  // Get values in place
  assert(funcPtrTmp->type() == Type::Func);
  auto funcPtrReg = x2a(m_regs[funcPtrTmp].reg());

  assert(nParamsTmp->type() == Type::Int);
  auto nParamsReg = x2a(m_regs[nParamsTmp].reg());
  assert(nParamsReg.IsValid() || nParamsTmp->isConst());

  assert(firstBitNumTmp->isConst() && firstBitNumTmp->type() == Type::Int);
  uint32_t firstBitNum = (uint32_t)(firstBitNumTmp->getValInt());

  assert(mask64Tmp->type() == Type::Int);
  auto mask64Reg = x2a(m_regs[mask64Tmp].reg());
  assert(mask64Reg.IsValid() || mask64Tmp->inst()->op() != LdConst);
  uint64_t mask64 = mask64Tmp->getValInt();

  assert(vals64Tmp->type() == Type::Int);
  auto vals64Reg = x2a(m_regs[vals64Tmp].reg());
  assert(vals64Reg.IsValid() || vals64Tmp->inst()->op() != LdConst);
  uint64_t vals64 = vals64Tmp->getValInt();
  assert((vals64 & mask64) == vals64);

  auto const destSK = SrcKey(curFunc(), m_unit.bcOff());
  auto const destSR = m_tx64->getSrcRec(destSK);

  auto thenBody = [&] {
    auto bitsOff = sizeof(uint64_t) * (firstBitNum / 64);
    auto cond = CC_NE;
    auto bitsPtrReg = rAsm;

    if (firstBitNum == 0) {
      bitsOff = Func::refBitValOff();
      bitsPtrReg = funcPtrReg;
    } else {
      m_as.    Ldr  (bitsPtrReg, funcPtrReg[Func::sharedOff()]);
      bitsOff -= sizeof(uint64_t);

    if (vals64 == 0 || (mask64 & (mask64 - 1)) == 0) {
      // If vals64 is zero, or we're testing a single
      // bit, we can get away with a single test,
      // rather than mask-and-compare
      m_as.    Ldr  (rAsm2, bitsPtrReg[bitsOff]);
      if (mask64Reg.IsValid()) {
        m_as.  Tst  (rAsm2, mask64Reg);
      } else {
        assert(vixl::Assembler::IsImmLogical(mask64, vixl::kXRegSize));
        m_as.  Tst  (rAsm2, mask64);
      if (vals64) cond = CC_E;
    } else {
      auto bitsValReg = rAsm;
      m_as.    Ldr  (bitsValReg, bitsPtrReg[bitsOff]);
      if (debug) bitsPtrReg = Register();

      //     bitsValReg <- bitsValReg & mask64
      // NB: these 'And' ops don't set flags. They don't need to.
      if (mask64Reg.IsValid()) {
        m_as.  And    (bitsValReg, bitsValReg, mask64Reg);
      } else {
        // There are restrictions on the immediates that can be encoded into
        // logical ops. If the mask doesn't meet those restrictions, we have to
        // load it into a register first.
        if (vixl::Assembler::IsImmLogical(mask64, vixl::kXRegSize)) {
          m_as.And    (bitsValReg, bitsValReg, mask64);
        } else {
          m_as.Mov    (rAsm2, mask64);
          m_as.And    (bitsValReg, bitsValReg, rAsm2);

      //   If bitsValReg != vals64, then goto Exit
      if (vals64Reg.IsValid()) {
        m_as.  Cmp    (bitsValReg, vals64Reg);
      } else {
        m_as.  Cmp    (bitsValReg, vals64);
    destSR->emitFallbackJump(m_mainCode, cond);

  if (firstBitNum == 0) {
    // This is the first 64 bits. No need to check
    // nParams.
  } else {
    // Check number of args...
    m_as.    Cmp   (nParamsReg, firstBitNum);

    if (vals64 != 0 && vals64 != mask64) {
      // If we're beyond nParams, then either all params
      // are refs, or all params are non-refs, so if vals64
      // isn't 0 and isnt mask64, there's no possibility of
      // a match
      destSR->emitFallbackJump(m_mainCode, CC_LE);
    } else {
      ifThenElse(m_as, vixl::gt, thenBody, /* else */ [&] {
          //   If not special builtin...
          m_as.  Ldr  (rAsm, funcPtrReg[Func::attrsOff()]);
          m_as.  Tst  (rAsm, AttrVariadicByRef);
          destSR->emitFallbackJump(m_mainCode, vals64 ? CC_Z : CC_NZ);