/// Apply cost model and heuristics to the if-conversion in IfConv.
/// Return true if the conversion is a good idea.
///
bool AArch64ConditionalCompares::shouldConvert() {
  // Stress testing mode disables all cost considerations.
  if (Stress)
    return true;
  if (!MinInstr)
    MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount);

  // Head dominates CmpBB, so it is always included in its trace.
  MachineTraceMetrics::Trace Trace = MinInstr->getTrace(CmpConv.CmpBB);

  // If code size is the main concern
  if (MinSize) {
    int CodeSizeDelta = CmpConv.expectedCodeSizeDelta();
    DEBUG(dbgs() << "Code size delta:  " << CodeSizeDelta << '\n');
    // If we are minimizing the code size, do the conversion whatever
    // the cost is.
    if (CodeSizeDelta < 0)
      return true;
    if (CodeSizeDelta > 0) {
      DEBUG(dbgs() << "Code size is increasing, give up on this one.\n");
      return false;
    }
    // CodeSizeDelta == 0, continue with the regular heuristics
  }

  // Heuristic: The compare conversion delays the execution of the branch
  // instruction because we must wait for the inputs to the second compare as
  // well. The branch has no dependent instructions, but delaying it increases
  // the cost of a misprediction.
  //
  // Set a limit on the delay we will accept.
  unsigned DelayLimit = SchedModel.MispredictPenalty * 3 / 4;

  // Instruction depths can be computed for all trace instructions above CmpBB.
  unsigned HeadDepth =
      Trace.getInstrCycles(CmpConv.Head->getFirstTerminator()).Depth;
  unsigned CmpBBDepth =
      Trace.getInstrCycles(CmpConv.CmpBB->getFirstTerminator()).Depth;
  DEBUG(dbgs() << "Head depth:  " << HeadDepth
               << "\nCmpBB depth: " << CmpBBDepth << '\n');
  if (CmpBBDepth > HeadDepth + DelayLimit) {
    DEBUG(dbgs() << "Branch delay would be larger than " << DelayLimit
                 << " cycles.\n");
    return false;
  }

  // Check the resource depth at the bottom of CmpBB - these instructions will
  // be speculated.
  unsigned ResDepth = Trace.getResourceDepth(true);
  DEBUG(dbgs() << "Resources:   " << ResDepth << '\n');

  // Heuristic: The speculatively executed instructions must all be able to
  // merge into the Head block. The Head critical path should dominate the
  // resource cost of the speculated instructions.
  if (ResDepth > HeadDepth) {
    DEBUG(dbgs() << "Too many instructions to speculate.\n");
    return false;
  }
  return true;
}
Example #2
0
/// Return true if an STP can be added to this block without increasing the
/// critical resource height. STP is good to form in Ld/St limited blocks and
/// bad to form in float-point limited blocks. This is true independent of the
/// critical path. If the critical path is longer than the resource height, the
/// extra vector ops can limit physreg renaming. Otherwise, it could simply
/// oversaturate the vector units.
bool ARM64StorePairSuppress::shouldAddSTPToBlock(const MachineBasicBlock *BB) {
  if (!MinInstr)
    MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount);

  MachineTraceMetrics::Trace BBTrace = MinInstr->getTrace(BB);
  unsigned ResLength = BBTrace.getResourceLength();

  // Get the machine model's scheduling class for STPQi.
  // Bypass TargetSchedule's SchedClass resolution since we only have an opcode.
  unsigned SCIdx = TII->get(ARM64::STPDi).getSchedClass();
  const MCSchedClassDesc *SCDesc =
      SchedModel.getMCSchedModel()->getSchedClassDesc(SCIdx);

  // If a subtarget does not define resources for STPQi, bail here.
  if (SCDesc->isValid() && !SCDesc->isVariant()) {
    unsigned ResLenWithSTP = BBTrace.getResourceLength(
        ArrayRef<const MachineBasicBlock *>(), SCDesc);
    if (ResLenWithSTP > ResLength) {
      DEBUG(dbgs() << "  Suppress STP in BB: " << BB->getNumber()
                   << " resources " << ResLength << " -> " << ResLenWithSTP
                   << "\n");
      return false;
    }
  }
  return true;
}
Example #3
0
/// getDepth - Computes depth of instructions in vector \InsInstr.
///
/// \param InsInstrs is a vector of machine instructions
/// \param InstrIdxForVirtReg is a dense map of virtual register to index
/// of defining machine instruction in \p InsInstrs
/// \param BlockTrace is a trace of machine instructions
///
/// \returns Depth of last instruction in \InsInstrs ("NewRoot")
unsigned
MachineCombiner::getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs,
                          DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
                          MachineTraceMetrics::Trace BlockTrace) {

  SmallVector<unsigned, 16> InstrDepth;
  assert(TSchedModel.hasInstrSchedModel() && "Missing machine model\n");

  // Foreach instruction in in the new sequence compute the depth based on the
  // operands. Use the trace information when possible. For new operands which
  // are tracked in the InstrIdxForVirtReg map depth is looked up in InstrDepth
  for (auto *InstrPtr : InsInstrs) { // for each Use
    unsigned IDepth = 0;
    DEBUG(dbgs() << "NEW INSTR "; InstrPtr->dump(); dbgs() << "\n";);
    for (unsigned i = 0, e = InstrPtr->getNumOperands(); i != e; ++i) {
      const MachineOperand &MO = InstrPtr->getOperand(i);
      // Check for virtual register operand.
      if (!(MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())))
        continue;
      if (!MO.isUse())
        continue;
      unsigned DepthOp = 0;
      unsigned LatencyOp = 0;
      DenseMap<unsigned, unsigned>::iterator II =
          InstrIdxForVirtReg.find(MO.getReg());
      if (II != InstrIdxForVirtReg.end()) {
        // Operand is new virtual register not in trace
        assert(II->second < InstrDepth.size() && "Bad Index");
        MachineInstr *DefInstr = InsInstrs[II->second];
        assert(DefInstr &&
               "There must be a definition for a new virtual register");
        DepthOp = InstrDepth[II->second];
        LatencyOp = TSchedModel.computeOperandLatency(
            DefInstr, DefInstr->findRegisterDefOperandIdx(MO.getReg()),
            InstrPtr, InstrPtr->findRegisterUseOperandIdx(MO.getReg()));
      } else {
        MachineInstr *DefInstr = getOperandDef(MO);
        if (DefInstr) {
          DepthOp = BlockTrace.getInstrCycles(DefInstr).Depth;
          LatencyOp = TSchedModel.computeOperandLatency(
              DefInstr, DefInstr->findRegisterDefOperandIdx(MO.getReg()),
              InstrPtr, InstrPtr->findRegisterUseOperandIdx(MO.getReg()));
        }
      }
      IDepth = std::max(IDepth, DepthOp + LatencyOp);
    }
    InstrDepth.push_back(IDepth);
  }