Esempio n. 1
0
PhysReg forceAlloc(const SSATmp& tmp) {
  if (tmp.type() <= TBottom) return InvalidReg;

  auto inst = tmp.inst();
  auto opc = inst->op();

  auto const forceStkPtrs = [&] {
    switch (arch()) {
    case Arch::X64: return false;
    case Arch::ARM: return true;
    case Arch::PPC64: not_implemented(); break;
    }
    not_reached();
  }();

  if (forceStkPtrs && tmp.isA(TStkPtr)) {
    assert_flog(
      opc == DefSP ||
      opc == Mov,
      "unexpected StkPtr dest from {}",
      opcodeName(opc)
    );
    return rvmsp();
  }

  // LdContActRec and LdAFWHActRec, loading a generator's AR, is the only time
  // we have a pointer to an AR that is not in rvmfp().
  if (opc != LdContActRec && opc != LdAFWHActRec && tmp.isA(TFramePtr)) {
    return rvmfp();
  }

  return InvalidReg;
}
Esempio n. 2
0
SSATmp* IRBuilder::preOptimizeCheckType(IRInstruction* inst) {
  SSATmp* src  = inst->src(0);
  auto const oldType = src->type();
  auto const newType = inst->typeParam();

  if (oldType.isBoxed() && newType.isBoxed() &&
      (oldType.not(newType) || newType < oldType)) {
    /* This CheckType serves to update the inner type hint for a boxed value,
     * which requires no runtime work. This depends on the type being boxed,
     * and constraining it with DataTypeCountness will do it.  */
    constrainValue(src, DataTypeCountness);
    return gen(AssertType, newType, src);
  }

  if (oldType.not(newType)) {
    /* This check will always fail. It's probably due to an incorrect
     * prediction. Generate a Jmp, and return src because
     * following instructions may depend on the output of CheckType
     * (they'll be DCEd later). Note that we can't use convertToJmp
     * because the return value isn't nullptr, so the original
     * instruction won't be inserted into the stream. */
    gen(Jmp, inst->taken());
    return src;
  }

  if (newType >= oldType) {
    /* The type of the src is the same or more refined than type, so the guard
     * is unnecessary. */
    return src;
  }

  return nullptr;
}
/*
 * Stores a ref (boxed value) to a local. Also handles unsetting a local.
 */
void TraceBuilder::genBindLoc(uint32_t id,
                              SSATmp* newValue,
                              bool doRefCount /* = true */) {
  Type trackedType = getLocalType(id);
  SSATmp* prevValue = 0;
  if (trackedType == Type::None) {
    if (doRefCount) {
      prevValue = gen(LdLoc, Type::Gen, LocalId(id), m_fpValue);
    }
  } else {
    prevValue = getLocalValue(id);
    assert(prevValue == nullptr || prevValue->type() == trackedType);
    if (prevValue == newValue) {
      // Silent store: local already contains value being stored
      // NewValue needs to be decref'ed
      if (!trackedType.notCounted() && doRefCount) {
        gen(DecRef, prevValue);
      }
      return;
    }
    if (trackedType.maybeCounted() && !prevValue && doRefCount) {
      prevValue = gen(LdLoc, trackedType, LocalId(id), m_fpValue);
    }
  }
  bool genStoreType = true;
  if ((trackedType.isBoxed() && newValue->type().isBoxed()) ||
      (trackedType == newValue->type() && !trackedType.isString())) {
    // no need to store type with local value
    genStoreType = false;
  }
  gen(genStoreType ? StLoc : StLocNT, LocalId(id), m_fpValue, newValue);
  if (trackedType.maybeCounted() && doRefCount) {
    gen(DecRef, prevValue);
  }
}
SSATmp* TraceBuilder::genLdLocAsCell(uint32_t id, Trace* exitTrace) {
  SSATmp*    tmp = genLdLoc(id);
  Type type = tmp->type();
  assert(type.isBoxed() || type.notBoxed());
  if (!type.isBoxed()) {
    return tmp;
  }
  // Unbox tmp into a cell via a LdRef
  return gen(LdRef, type.innerType(), exitTrace, tmp);
}
/*
 * Store a cell value to a local that might be boxed.
 */
SSATmp* TraceBuilder::genStLoc(uint32_t id,
                               SSATmp* newValue,
                               bool doRefCount,
                               bool genStoreType,
                               Trace* exit) {
  assert(!newValue->type().isBoxed());
  /*
   * If prior value of local is a cell, then  re-use genBindLoc.
   * Otherwise, if prior value of local is a ref:
   *
   * prevLocValue = LdLoc<T>{id} fp
   *    prevValue = LdRef [prevLocValue]
   *       newRef = StRef [prevLocValue], newValue
   * DecRef prevValue
   * -- track local value in newRef
   */
  Type trackedType = getLocalType(id);
  assert(trackedType != Type::None);  // tracelet guards guarantee a type
  if (trackedType.notBoxed()) {
    SSATmp* retVal = doRefCount ? gen(IncRef, newValue) : newValue;
    genBindLoc(id, newValue, doRefCount);
    return retVal;
  }
  assert(trackedType.isBoxed());
  SSATmp* prevRef = getLocalValue(id);
  assert(prevRef == nullptr || prevRef->type() == trackedType);
  // prevRef is a ref
  if (prevRef == nullptr) {
    // prevRef = ldLoc
    prevRef = gen(LdLoc, trackedType, LocalId(id), m_fpValue);
  }
  SSATmp* prevValue = nullptr;
  if (doRefCount) {
    assert(exit);
    Type innerType = trackedType.innerType();
    prevValue = gen(LdRef, innerType, exit, prevRef);
  }
  // stref [prevRef] = t1
  Opcode opc = genStoreType ? StRef : StRefNT;
  gen(opc, prevRef, newValue);

  SSATmp* retVal = newValue;
  if (doRefCount) {
    retVal = gen(IncRef, newValue);
    gen(DecRef, prevValue);
  }
  return retVal;
}
SSATmp* TraceBuilder::genBoxLoc(uint32_t id) {
  SSATmp* prevValue  = genLdLoc(id);
  Type prevType = prevValue->type();
  // Don't box if local's value already boxed
  if (prevType.isBoxed()) {
    return prevValue;
  }
  assert(prevType.notBoxed());
  // The Box helper requires us to incref the values its boxing, but in
  // this case we don't need to incref prevValue because we are simply
  // transfering its refcount from the local to the box.
  if (prevValue->isA(Type::Uninit)) {
    // No box can ever contain Uninit, so promote it to InitNull here.
    prevValue = genDefInitNull();
  }
  SSATmp* newValue = gen(Box, prevValue);
  gen(StLoc, LocalId(id), m_fpValue, newValue);
  return newValue;
}
Esempio n. 7
0
void CodeGenerator::cgIncRef(IRInstruction* inst) {
  SSATmp* src = inst->src(0);
  auto loc = srcLoc(0);
  Type type = src->type();
  if (type.notCounted()) return;

  auto increfMaybeStatic = [&](Vout& v) {
    auto base = loc.reg(0);
    auto rCount = v.makeReg();
    v << loadl{base[FAST_REFCOUNT_OFFSET], rCount};
    if (!type.needsStaticBitCheck()) {
      auto count1 = v.makeReg();
      v << addli{1, rCount, count1, v.makeReg()};
      v << storel{count1, base[FAST_REFCOUNT_OFFSET]};
    } else {
      auto const sf = v.makeReg();
      v << cmpli{0, rCount, sf};
      static_assert(UncountedValue < 0 && StaticValue < 0, "");
      ifThen(v, CC_GE, sf, [&](Vout& v) {
        auto count1 = v.makeReg();
        v << addli{1, rCount, count1, v.makeReg()};
        v << storel{count1, base[FAST_REFCOUNT_OFFSET]};
      });
    }
  };

  auto& v = vmain();
  if (type.isKnownDataType()) {
    assert(IS_REFCOUNTED_TYPE(type.toDataType()));
    increfMaybeStatic(v);
  } else {
    auto const sf = v.makeReg();
    v << cmpli{KindOfRefCountThreshold, loc.reg(1), sf};
    ifThen(v, CC_G, sf, [&](Vout& v) { increfMaybeStatic(v); });
  }
}
Esempio n. 8
0
/*
 * reoptimize() runs a trace through a second pass of TraceBuilder
 * optimizations, like this:
 *
 *   reset state.
 *   move all blocks to a temporary list.
 *   compute immediate dominators.
 *   for each block in trace order:
 *     if we have a snapshot state for this block:
 *       clear cse entries that don't dominate this block.
 *       use snapshot state.
 *     move all instructions to a temporary list.
 *     for each instruction:
 *       optimizeWork - do CSE and simplify again
 *       if not simplified:
 *         append existing instruction and update state.
 *       else:
 *         if the instruction has a result, insert a mov from the
 *         simplified tmp to the original tmp and discard the instruction.
 *     if the last conditional branch was turned into a jump, remove the
 *     fall-through edge to the next block.
 */
void TraceBuilder::reoptimize() {
  FTRACE(5, "ReOptimize:vvvvvvvvvvvvvvvvvvvv\n");
  SCOPE_EXIT { FTRACE(5, "ReOptimize:^^^^^^^^^^^^^^^^^^^^\n"); };
  assert(m_curTrace->isMain());
  assert(m_savedTraces.empty());

  m_state.setEnableCse(RuntimeOption::EvalHHIRCse);
  m_enableSimplification = RuntimeOption::EvalHHIRSimplification;
  if (!m_state.enableCse() && !m_enableSimplification) return;
  always_assert(!m_inReoptimize);
  m_inReoptimize = true;

  BlockList sortedBlocks = rpoSortCfg(m_unit);
  auto const idoms = findDominators(m_unit, sortedBlocks);
  m_state.clear();

  auto& traceBlocks = m_curTrace->blocks();
  BlockList blocks(traceBlocks.begin(), traceBlocks.end());
  traceBlocks.clear();
  for (auto* block : blocks) {
    assert(block->trace() == m_curTrace);
    FTRACE(5, "Block: {}\n", block->id());

    assert(m_curTrace->isMain());
    m_state.startBlock(block);
    m_curTrace->push_back(block);

    auto instructions = std::move(block->instrs());
    assert(block->empty());
    while (!instructions.empty()) {
      auto *inst = &instructions.front();
      instructions.pop_front();
      m_state.setMarker(inst->marker());

      // merging state looks at the current marker, and optimizeWork
      // below may create new instructions. Use the marker from this
      // instruction.
      assert(inst->marker().valid());
      setMarker(inst->marker());

      auto const tmp = optimizeWork(inst, idoms); // Can generate new instrs!
      if (!tmp) {
        // Could not optimize; keep the old instruction
        appendInstruction(inst, block);
        m_state.update(inst);
        continue;
      }
      SSATmp* dst = inst->dst();
      if (dst->type() != Type::None && dst != tmp) {
        // The result of optimization has a different destination than the inst.
        // Generate a mov(tmp->dst) to get result into dst. If we get here then
        // assume the last instruction in the block isn't a guard. If it was,
        // we would have to insert the mov on the fall-through edge.
        assert(block->empty() || !block->back().isBlockEnd());
        IRInstruction* mov = m_unit.mov(dst, tmp, inst->marker());
        appendInstruction(mov, block);
        m_state.update(mov);
      }
      // Not re-adding inst; remove the inst->taken edge
      if (inst->taken()) inst->setTaken(nullptr);
    }

    if (block->empty()) {
      // If all the instructions in the block were optimized away, remove it
      // from the trace.
      auto it = traceBlocks.end();
      --it;
      assert(*it == block);
      m_curTrace->unlink(it);
    } else {
      if (block->back().isTerminal()) {
        // Could have converted a conditional branch to Jmp; clear next.
        block->setNext(nullptr);
      }
      m_state.finishBlock(block);
    }
  }
}
Esempio n. 9
0
void LinearScan::allocRegToInstruction(InstructionList::iterator it) {
  IRInstruction* inst = &*it;
  dumpIR<IRInstruction, kExtraLevel>(inst, "allocating to instruction");

  // Reload all source operands if necessary.
  // Mark registers as unpinned.
  for (int regNo = 0; regNo < kNumRegs; ++regNo) {
    m_regs[regNo].m_pinned = false;
  }
  smart::vector<bool> needsReloading(inst->numSrcs(), true);
  for (uint32_t i = 0; i < inst->numSrcs(); ++i) {
    SSATmp* tmp = inst->src(i);
    int32_t slotId = m_spillSlots[tmp];
    if (slotId == -1) {
      needsReloading[i] = false;
    } else if ((tmp = m_slots[slotId].latestReload)) {
      needsReloading[i] = false;
      inst->setSrc(i, tmp);
    }
    if (!needsReloading[i]) {
      for (int i = 0, n = m_allocInfo[tmp].numAllocatedRegs(); i < n; ++i) {
        m_regs[int(m_allocInfo[tmp].reg(i))].m_pinned = true;
      }
    }
  }
  for (uint32_t i = 0; i < inst->numSrcs(); ++i) {
    if (needsReloading[i]) {
      SSATmp* tmp = inst->src(i);
      int32_t slotId = m_spillSlots[tmp];
      // <tmp> is spilled, and not reloaded.
      // Therefore, We need to reload the value into a new SSATmp.

      // Insert the Reload instruction.
      SSATmp* spillTmp = m_slots[slotId].spillTmp;
      IRInstruction* reload = m_unit.gen(Reload, inst->marker(),
                                              spillTmp);
      inst->block()->insert(it, reload);

      // Create <reloadTmp> which inherits <tmp>'s slot ID and
      // <spillTmp>'s last use ID.
      // Replace <tmp> with <reloadTmp> in <inst>.
      SSATmp* reloadTmp = reload->dst();
      m_uses[reloadTmp].lastUse = m_uses[spillTmp].lastUse;
      m_spillSlots[reloadTmp] = slotId;
      inst->setSrc(i, reloadTmp);
      // reloadTmp and tmp share the same type.  Since it was spilled, it
      // must be using its entire needed-count of registers.
      assert(reloadTmp->type() == tmp->type());
      for (int locIndex = 0; locIndex < tmp->numNeededRegs();) {
        locIndex += allocRegToTmp(reloadTmp, locIndex);
      }
      // Remember this reload tmp in case we can reuse it in later blocks.
      m_slots[slotId].latestReload = reloadTmp;
      dumpIR<IRInstruction, kExtraLevel>(reload, "created reload");
    }
  }

  freeRegsAtId(m_linear[inst]);
  // Update next native.
  if (nextNative() == inst) {
    assert(!m_natives.empty());
    m_natives.pop_front();
    computePreColoringHint();
  }

  Range<SSATmp*> dsts = inst->dsts();
  if (dsts.empty()) return;

  Opcode opc = inst->op();
  if (opc == DefMIStateBase) {
    assert(dsts[0].isA(Type::PtrToCell));
    assignRegToTmp(&m_regs[int(rsp)], &dsts[0], 0);
    return;
  }

  for (SSATmp& dst : dsts) {
    for (int numAllocated = 0, n = dst.numNeededRegs(); numAllocated < n; ) {
      // LdRaw, loading a generator's embedded AR, is the only time we have a
      // pointer to an AR that is not in rVmFp.
      const bool abnormalFramePtr =
        (opc == LdRaw &&
          inst->src(1)->getValInt() == RawMemSlot::ContARPtr);

      // Note that the point of StashGeneratorSP is to save a StkPtr
      // somewhere other than rVmSp.  (TODO(#2288359): make rbx not
      // special.)
      const bool abnormalStkPtr = opc == StashGeneratorSP;

      if (!abnormalStkPtr && dst.isA(Type::StkPtr)) {
        assert(opc == DefSP ||
               opc == ReDefSP ||
               opc == ReDefGeneratorSP ||
               opc == PassSP ||
               opc == DefInlineSP ||
               opc == Call ||
               opc == CallArray ||
               opc == SpillStack ||
               opc == SpillFrame ||
               opc == CufIterSpillFrame ||
               opc == ExceptionBarrier ||
               opc == RetAdjustStack ||
               opc == InterpOne ||
               opc == InterpOneCF ||
               opc == GenericRetDecRefs ||
               opc == CheckStk ||
               opc == GuardStk ||
               opc == AssertStk ||
               opc == CastStk ||
               opc == CoerceStk ||
               opc == SideExitGuardStk  ||
               MInstrEffects::supported(opc));
        assignRegToTmp(&m_regs[int(rVmSp)], &dst, 0);
        numAllocated++;
        continue;
      }
      if (!abnormalFramePtr && dst.isA(Type::FramePtr)) {
        assignRegToTmp(&m_regs[int(rVmFp)], &dst, 0);
        numAllocated++;
        continue;
      }

      // Generally speaking, StkPtrs are pretty special due to
      // tracelet ABI registers. Keep track here of the allowed uses
      // that don't use the above allocation.
      assert(!dst.isA(Type::FramePtr) || abnormalFramePtr);
      assert(!dst.isA(Type::StkPtr) || abnormalStkPtr);

      if (!RuntimeOption::EvalHHIRDeadCodeElim || m_uses[dst].lastUse != 0) {
        numAllocated += allocRegToTmp(&dst, numAllocated);
      } else {
        numAllocated++;
      }
    }
  }
  if (!RuntimeOption::EvalHHIRDeadCodeElim) {
    // if any outputs were unused, free regs now.
    freeRegsAtId(m_linear[inst]);
  }
}
Esempio n. 10
0
/*
 * reoptimize() runs a trace through a second pass of TraceBuilder
 * optimizations, like this:
 *
 *   reset state.
 *   move all blocks to a temporary list.
 *   compute immediate dominators.
 *   for each block in trace order:
 *     if we have a snapshot state for this block:
 *       clear cse entries that don't dominate this block.
 *       use snapshot state.
 *     move all instructions to a temporary list.
 *     for each instruction:
 *       optimizeWork - do CSE and simplify again
 *       if not simplified:
 *         append existing instruction and update state.
 *       else:
 *         if the instruction has a result, insert a mov from the
 *         simplified tmp to the original tmp and discard the instruction.
 *     if the last conditional branch was turned into a jump, remove the
 *     fall-through edge to the next block.
 */
void TraceBuilder::reoptimize() {
  FTRACE(5, "ReOptimize:vvvvvvvvvvvvvvvvvvvv\n");
  SCOPE_EXIT { FTRACE(5, "ReOptimize:^^^^^^^^^^^^^^^^^^^^\n"); };
  assert(m_curTrace == m_mainTrace.get());
  assert(m_savedTraces.empty());
  assert(m_inlineSavedStates.empty());

  m_enableCse = RuntimeOption::EvalHHIRCse;
  m_enableSimplification = RuntimeOption::EvalHHIRSimplification;
  if (!m_enableCse && !m_enableSimplification) return;
  if (m_mainTrace->blocks().size() >
      RuntimeOption::EvalHHIRSimplificationMaxBlocks) {
    // TODO CSEHash::filter is very slow for large block sizes
    // t2135219 should address that
    return;
  }

  BlockList sortedBlocks = rpoSortCfg(m_mainTrace.get(), m_irFactory);
  auto const idoms = findDominators(sortedBlocks);
  clearTrackedState();

  auto blocks = std::move(m_mainTrace->blocks());
  assert(m_mainTrace->blocks().empty());
  while (!blocks.empty()) {
    Block* block = blocks.front();
    blocks.pop_front();
    assert(block->trace() == m_mainTrace.get());
    FTRACE(5, "Block: {}\n", block->id());

    m_mainTrace->push_back(block);
    if (m_snapshots[block]) {
      useState(block);
    }

    auto instructions = std::move(block->instrs());
    assert(block->empty());
    while (!instructions.empty()) {
      auto *inst = &instructions.front();
      instructions.pop_front();

      // last attempt to elide ActRecs, if we still need the InlineFPAnchor
      // it will be added back to the trace when we re-add instructions that
      // rely on it
      if (inst->op() == InlineFPAnchor) {
        continue;
      }

      // merging state looks at the current marker, and optimizeWork
      // below may create new instructions. Use the marker from this
      // instruction.
      assert(inst->marker().valid());
      setMarker(inst->marker());

      auto const tmp = optimizeWork(inst, idoms); // Can generate new instrs!
      if (!tmp) {
        // Could not optimize; keep the old instruction
        appendInstruction(inst, block);
        updateTrackedState(inst);
        continue;
      }
      SSATmp* dst = inst->dst();
      if (dst->type() != Type::None && dst != tmp) {
        // The result of optimization has a different destination than the inst.
        // Generate a mov(tmp->dst) to get result into dst. If we get here then
        // assume the last instruction in the block isn't a guard. If it was,
        // we would have to insert the mov on the fall-through edge.
        assert(block->empty() || !block->back()->isBlockEnd());
        IRInstruction* mov = m_irFactory.mov(dst, tmp, inst->marker());
        appendInstruction(mov, block);
        updateTrackedState(mov);
      }
      // Not re-adding inst; remove the inst->taken edge
      if (inst->taken()) inst->setTaken(nullptr);
    }
    if (block->back()->isTerminal()) {
      // Could have converted a conditional branch to Jmp; clear next.
      block->setNext(nullptr);
    } else {
      // if the last instruction was a branch, we already saved state
      // for the target in updateTrackedState().  Now save state for
      // the fall-through path.
      saveState(block->next());
    }
  }
}
Esempio n. 11
0
void CodeGenerator::cgGuardRefs(IRInstruction* inst) {
  assert(inst->numSrcs() == 5);

  SSATmp* funcPtrTmp = inst->src(0);
  SSATmp* nParamsTmp = inst->src(1);
  SSATmp* firstBitNumTmp = inst->src(2);
  SSATmp* mask64Tmp  = inst->src(3);
  SSATmp* vals64Tmp  = inst->src(4);

  // Get values in place
  assert(funcPtrTmp->type() == Type::Func);
  auto funcPtrReg = x2a(curOpd(funcPtrTmp).reg());
  assert(funcPtrReg.IsValid());

  assert(nParamsTmp->type() == Type::Int);
  auto nParamsReg = x2a(curOpd(nParamsTmp).reg());
  assert(nParamsReg.IsValid() || nParamsTmp->isConst());

  assert(firstBitNumTmp->isConst() && firstBitNumTmp->type() == Type::Int);
  uint32_t firstBitNum = (uint32_t)(firstBitNumTmp->getValInt());

  assert(mask64Tmp->type() == Type::Int);
  assert(mask64Tmp->isConst());
  auto mask64Reg = x2a(curOpd(mask64Tmp).reg());
  assert(mask64Reg.IsValid() || mask64Tmp->inst()->op() != LdConst);
  uint64_t mask64 = mask64Tmp->getValInt();
  assert(mask64);

  assert(vals64Tmp->type() == Type::Int);
  assert(vals64Tmp->isConst());
  auto vals64Reg = x2a(curOpd(vals64Tmp).reg());
  assert(vals64Reg.IsValid() || vals64Tmp->inst()->op() != LdConst);
  uint64_t vals64 = vals64Tmp->getValInt();
  assert((vals64 & mask64) == vals64);

  auto const destSK = SrcKey(curFunc(), m_unit.bcOff());
  auto const destSR = m_tx64->getSrcRec(destSK);

  auto thenBody = [&] {
    auto bitsOff = sizeof(uint64_t) * (firstBitNum / 64);
    auto cond = CC_NE;
    auto bitsPtrReg = rAsm;

    if (firstBitNum == 0) {
      bitsOff = Func::refBitValOff();
      bitsPtrReg = funcPtrReg;
    } else {
      m_as.    Ldr  (bitsPtrReg, funcPtrReg[Func::sharedOff()]);
      bitsOff -= sizeof(uint64_t);
    }

    // Don't need the bits pointer after this point
    auto bitsReg = rAsm;
    // Load the bits
    m_as.    Ldr  (bitsReg, bitsPtrReg[bitsOff]);

    // Mask the bits. There are restrictions on what can be encoded as an
    // immediate in ARM's logical instructions, and if they're not met, we'll
    // have to use a register.
    if (vixl::Assembler::IsImmLogical(mask64, vixl::kXRegSize)) {
      m_as.  And  (bitsReg, bitsReg, mask64);
    } else {
      if (mask64Reg.IsValid()) {
        m_as.And  (bitsReg, bitsReg, mask64Reg);
      } else {
        m_as.Mov  (rAsm2, mask64);
        m_as.And  (bitsReg, bitsReg, rAsm2);
      }
    }

    // Now do the compare. There are also restrictions on immediates in
    // arithmetic instructions (of which Cmp is one; it's just a subtract that
    // sets flags), so same deal as with the mask immediate above.
    if (vixl::Assembler::IsImmArithmetic(vals64)) {
      m_as.  Cmp  (bitsReg, vals64);
    } else {
      if (vals64Reg.IsValid()) {
        m_as.Cmp  (bitsReg, vals64Reg);
      } else {
        m_as.Mov  (rAsm2, vals64);
        m_as.Cmp  (bitsReg, rAsm2);
      }
    }
    destSR->emitFallbackJump(m_mainCode, cond);
  };

  if (firstBitNum == 0) {
    assert(!nParamsReg.IsValid());
    // This is the first 64 bits. No need to check
    // nParams.
    thenBody();
  } else {
    assert(nParamsReg.IsValid());
    // Check number of args...
    m_as.    Cmp   (nParamsReg, firstBitNum);

    if (vals64 != 0 && vals64 != mask64) {
      // If we're beyond nParams, then either all params
      // are refs, or all params are non-refs, so if vals64
      // isn't 0 and isnt mask64, there's no possibility of
      // a match
      destSR->emitFallbackJump(m_mainCode, CC_LE);
      thenBody();
    } else {
      ifThenElse(m_as, vixl::gt, thenBody, /* else */ [&] {
          //   If not special builtin...
          m_as.  Ldr  (rAsm, funcPtrReg[Func::attrsOff()]);
          m_as.  Tst  (rAsm, AttrVariadicByRef);
          destSR->emitFallbackJump(m_mainCode, vals64 ? CC_Z : CC_NZ);
        });
    }
  }
}
Esempio n. 12
0
/*
 * reoptimize() runs a trace through a second pass of TraceBuilder
 * optimizations, like this:
 *
 *   reset state.
 *   move all blocks to a temporary list.
 *   compute immediate dominators.
 *   for each block in trace order:
 *     if we have a snapshot state for this block:
 *       clear cse entries that don't dominate this block.
 *       use snapshot state.
 *     move all instructions to a temporary list.
 *     for each instruction:
 *       optimizeWork - do CSE and simplify again
 *       if not simplified:
 *         append existing instruction and update state.
 *       else:
 *         if the instruction has a result, insert a mov from the
 *         simplified tmp to the original tmp and discard the instruction.
 *     if the last conditional branch was turned into a jump, remove the
 *     fall-through edge to the next block.
 */
void TraceBuilder::reoptimize() {
  m_enableCse = RuntimeOption::EvalHHIRCse;
  m_enableSimplification = RuntimeOption::EvalHHIRSimplification;
  if (!m_enableCse && !m_enableSimplification) return;
  if (m_trace->getBlocks().size() >
      RuntimeOption::EvalHHIRSimplificationMaxBlocks) {
    // TODO CSEHash::filter is very slow for large block sizes
    // t2135219 should address that
    return;
  }
  BlockList sortedBlocks = sortCfg(m_trace.get(), m_irFactory);
  IdomVector idoms = findDominators(sortedBlocks);
  clearTrackedState();
  auto blocks = std::move(m_trace->getBlocks());
  assert(m_trace->getBlocks().empty());
  while (!blocks.empty()) {
    Block* block = blocks.front();
    blocks.pop_front();
    assert(block->getTrace() == m_trace.get());
    m_trace->push_back(block);
    if (m_snapshots[block]) {
      useState(block);
      m_cseHash.filter(block, idoms);
    }
    auto instructions = std::move(block->getInstrs());
    assert(block->empty());
    while (!instructions.empty()) {
      auto *inst = &instructions.front();
      instructions.pop_front();
      SSATmp* tmp = optimizeWork(inst); // Can generate new instrs!
      if (!tmp) {
        // Could not optimize; keep the old instruction
        appendInstruction(inst, block);
        updateTrackedState(inst);
        continue;
      }
      SSATmp* dst = inst->getDst();
      if (dst->type() != Type::None && dst != tmp) {
        // The result of optimization has a different destination than the inst.
        // Generate a mov(tmp->dst) to get result into dst. If we get here then
        // assume the last instruction in the block isn't a guard. If it was,
        // we would have to insert the mov on the fall-through edge.
        assert(!block->back()->isBlockEnd());
        IRInstruction* mov = m_irFactory.mov(dst, tmp);
        appendInstruction(mov, block);
        updateTrackedState(mov);
      }
      // Not re-adding inst; remove the inst->taken edge
      if (inst->getTaken()) inst->setTaken(nullptr);
    }
    if (block->back()->isTerminal()) {
      // Could have converted a conditional branch to Jmp; clear next.
      block->setNext(nullptr);
    } else {
      // if the last instruction was a branch, we already saved state
      // for the target in updateTrackedState().  Now save state for
      // the fall-through path.
      saveState(block->getNext());
    }
  }
}
Esempio n. 13
0
/*
 * reoptimize() runs a trace through a second pass of TraceBuilder
 * optimizations, like this:
 *
 *   reset state.
 *   move all blocks to a temporary list.
 *   compute immediate dominators.
 *   for each block in trace order:
 *     if we have a snapshot state for this block:
 *       clear cse entries that don't dominate this block.
 *       use snapshot state.
 *     move all instructions to a temporary list.
 *     for each instruction:
 *       optimizeWork - do CSE and simplify again
 *       if not simplified:
 *         append existing instruction and update state.
 *       else:
 *         if the instruction has a result, insert a mov from the
 *         simplified tmp to the original tmp and discard the instruction.
 *     if the last conditional branch was turned into a jump, remove the
 *     fall-through edge to the next block.
 */
void TraceBuilder::reoptimize() {
  FTRACE(5, "ReOptimize:vvvvvvvvvvvvvvvvvvvv\n");
  SCOPE_EXIT { FTRACE(5, "ReOptimize:^^^^^^^^^^^^^^^^^^^^\n"); };
  assert(m_savedBlocks.empty());
  assert(!m_curWhere);

  m_state.setEnableCse(RuntimeOption::EvalHHIRCse);
  m_enableSimplification = RuntimeOption::EvalHHIRSimplification;
  if (!m_state.enableCse() && !m_enableSimplification) return;
  setConstrainGuards(false);

  BlockList sortedBlocks = rpoSortCfg(m_unit);
  auto const idoms = findDominators(m_unit, sortedBlocks);
  m_state.clear();

  for (auto* block : rpoSortCfg(m_unit)) {
    FTRACE(5, "Block: {}\n", block->id());

    m_state.startBlock(block);
    m_curBlock = block;

    auto instructions = std::move(block->instrs());
    assert(block->empty());
    while (!instructions.empty()) {
      auto *inst = &instructions.front();
      instructions.pop_front();

      // merging state looks at the current marker, and optimizeWork
      // below may create new instructions. Use the marker from this
      // instruction.
      assert(inst->marker().valid());
      setMarker(inst->marker());

      auto const tmp = optimizeWork(inst, idoms); // Can generate new instrs!
      if (!tmp) {
        // Could not optimize; keep the old instruction
        appendInstruction(inst);
        continue;
      }

      SSATmp* dst = inst->dst();
      if (dst->type() != Type::None && dst != tmp) {
        // The result of optimization has a different destination than the inst.
        // Generate a mov(tmp->dst) to get result into dst. If we get here then
        // assume the last instruction in the block isn't a guard. If it was,
        // we would have to insert the mov on the fall-through edge.
        assert(block->empty() || !block->back().isBlockEnd());
        IRInstruction* mov = m_unit.mov(dst, tmp, inst->marker());
        appendInstruction(mov);
      }

      if (inst->isBlockEnd()) {
        // Not re-adding inst; replace it with a jump to the next block.
        auto next = inst->next();
        appendInstruction(m_unit.gen(Jmp, inst->marker(), next));
        inst->setTaken(nullptr);
        inst->setNext(nullptr);
      }
    }

    assert(!block->empty());
    m_state.finishBlock(block);
  }
}
Esempio n. 14
0
void CodeGenerator::cgGuardRefs(IRInstruction* inst) {
  assert(inst->numSrcs() == 5);

  SSATmp* funcPtrTmp = inst->src(0);
  SSATmp* nParamsTmp = inst->src(1);
  SSATmp* firstBitNumTmp = inst->src(2);
  SSATmp* mask64Tmp  = inst->src(3);
  SSATmp* vals64Tmp  = inst->src(4);

  // Get values in place
  assert(funcPtrTmp->type() == Type::Func);
  auto funcPtrReg = x2a(m_regs[funcPtrTmp].reg());
  assert(funcPtrReg.IsValid());

  assert(nParamsTmp->type() == Type::Int);
  auto nParamsReg = x2a(m_regs[nParamsTmp].reg());
  assert(nParamsReg.IsValid() || nParamsTmp->isConst());

  assert(firstBitNumTmp->isConst() && firstBitNumTmp->type() == Type::Int);
  uint32_t firstBitNum = (uint32_t)(firstBitNumTmp->getValInt());

  assert(mask64Tmp->type() == Type::Int);
  assert(mask64Tmp->isConst());
  auto mask64Reg = x2a(m_regs[mask64Tmp].reg());
  assert(mask64Reg.IsValid() || mask64Tmp->inst()->op() != LdConst);
  uint64_t mask64 = mask64Tmp->getValInt();
  assert(mask64);

  assert(vals64Tmp->type() == Type::Int);
  assert(vals64Tmp->isConst());
  auto vals64Reg = x2a(m_regs[vals64Tmp].reg());
  assert(vals64Reg.IsValid() || vals64Tmp->inst()->op() != LdConst);
  uint64_t vals64 = vals64Tmp->getValInt();
  assert((vals64 & mask64) == vals64);

  auto const destSK = SrcKey(curFunc(), m_unit.bcOff());
  auto const destSR = m_tx64->getSrcRec(destSK);

  auto thenBody = [&] {
    auto bitsOff = sizeof(uint64_t) * (firstBitNum / 64);
    auto cond = CC_NE;
    auto bitsPtrReg = rAsm;

    if (firstBitNum == 0) {
      bitsOff = Func::refBitValOff();
      bitsPtrReg = funcPtrReg;
    } else {
      m_as.    Ldr  (bitsPtrReg, funcPtrReg[Func::sharedOff()]);
      bitsOff -= sizeof(uint64_t);
    }

    if (vals64 == 0 || (mask64 & (mask64 - 1)) == 0) {
      // If vals64 is zero, or we're testing a single
      // bit, we can get away with a single test,
      // rather than mask-and-compare
      m_as.    Ldr  (rAsm2, bitsPtrReg[bitsOff]);
      if (mask64Reg.IsValid()) {
        m_as.  Tst  (rAsm2, mask64Reg);
      } else {
        assert(vixl::Assembler::IsImmLogical(mask64, vixl::kXRegSize));
        m_as.  Tst  (rAsm2, mask64);
      }
      if (vals64) cond = CC_E;
    } else {
      auto bitsValReg = rAsm;
      m_as.    Ldr  (bitsValReg, bitsPtrReg[bitsOff]);
      if (debug) bitsPtrReg = Register();

      //     bitsValReg <- bitsValReg & mask64
      // NB: these 'And' ops don't set flags. They don't need to.
      if (mask64Reg.IsValid()) {
        m_as.  And    (bitsValReg, bitsValReg, mask64Reg);
      } else {
        // There are restrictions on the immediates that can be encoded into
        // logical ops. If the mask doesn't meet those restrictions, we have to
        // load it into a register first.
        if (vixl::Assembler::IsImmLogical(mask64, vixl::kXRegSize)) {
          m_as.And    (bitsValReg, bitsValReg, mask64);
        } else {
          m_as.Mov    (rAsm2, mask64);
          m_as.And    (bitsValReg, bitsValReg, rAsm2);
        }
      }

      //   If bitsValReg != vals64, then goto Exit
      if (vals64Reg.IsValid()) {
        m_as.  Cmp    (bitsValReg, vals64Reg);
      } else {
        m_as.  Cmp    (bitsValReg, vals64);
      }
    }
    destSR->emitFallbackJump(m_mainCode, cond);
  };

  if (firstBitNum == 0) {
    assert(!nParamsReg.IsValid());
    // This is the first 64 bits. No need to check
    // nParams.
    thenBody();
  } else {
    assert(nParamsReg.IsValid());
    // Check number of args...
    m_as.    Cmp   (nParamsReg, firstBitNum);

    if (vals64 != 0 && vals64 != mask64) {
      // If we're beyond nParams, then either all params
      // are refs, or all params are non-refs, so if vals64
      // isn't 0 and isnt mask64, there's no possibility of
      // a match
      destSR->emitFallbackJump(m_mainCode, CC_LE);
      thenBody();
    } else {
      ifThenElse(m_as, vixl::gt, thenBody, /* else */ [&] {
          //   If not special builtin...
          m_as.  Ldr  (rAsm, funcPtrReg[Func::attrsOff()]);
          m_as.  Tst  (rAsm, AttrVariadicByRef);
          destSR->emitFallbackJump(m_mainCode, vals64 ? CC_Z : CC_NZ);
        });
    }
  }
}