/// Apply cost model and heuristics to the if-conversion in IfConv. /// Return true if the conversion is a good idea. /// bool AArch64ConditionalCompares::shouldConvert() { // Stress testing mode disables all cost considerations. if (Stress) return true; if (!MinInstr) MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount); // Head dominates CmpBB, so it is always included in its trace. MachineTraceMetrics::Trace Trace = MinInstr->getTrace(CmpConv.CmpBB); // If code size is the main concern if (MinSize) { int CodeSizeDelta = CmpConv.expectedCodeSizeDelta(); DEBUG(dbgs() << "Code size delta: " << CodeSizeDelta << '\n'); // If we are minimizing the code size, do the conversion whatever // the cost is. if (CodeSizeDelta < 0) return true; if (CodeSizeDelta > 0) { DEBUG(dbgs() << "Code size is increasing, give up on this one.\n"); return false; } // CodeSizeDelta == 0, continue with the regular heuristics } // Heuristic: The compare conversion delays the execution of the branch // instruction because we must wait for the inputs to the second compare as // well. The branch has no dependent instructions, but delaying it increases // the cost of a misprediction. // // Set a limit on the delay we will accept. unsigned DelayLimit = SchedModel.MispredictPenalty * 3 / 4; // Instruction depths can be computed for all trace instructions above CmpBB. unsigned HeadDepth = Trace.getInstrCycles(CmpConv.Head->getFirstTerminator()).Depth; unsigned CmpBBDepth = Trace.getInstrCycles(CmpConv.CmpBB->getFirstTerminator()).Depth; DEBUG(dbgs() << "Head depth: " << HeadDepth << "\nCmpBB depth: " << CmpBBDepth << '\n'); if (CmpBBDepth > HeadDepth + DelayLimit) { DEBUG(dbgs() << "Branch delay would be larger than " << DelayLimit << " cycles.\n"); return false; } // Check the resource depth at the bottom of CmpBB - these instructions will // be speculated. unsigned ResDepth = Trace.getResourceDepth(true); DEBUG(dbgs() << "Resources: " << ResDepth << '\n'); // Heuristic: The speculatively executed instructions must all be able to // merge into the Head block. The Head critical path should dominate the // resource cost of the speculated instructions. if (ResDepth > HeadDepth) { DEBUG(dbgs() << "Too many instructions to speculate.\n"); return false; } return true; }
/// Return true if an STP can be added to this block without increasing the /// critical resource height. STP is good to form in Ld/St limited blocks and /// bad to form in float-point limited blocks. This is true independent of the /// critical path. If the critical path is longer than the resource height, the /// extra vector ops can limit physreg renaming. Otherwise, it could simply /// oversaturate the vector units. bool ARM64StorePairSuppress::shouldAddSTPToBlock(const MachineBasicBlock *BB) { if (!MinInstr) MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount); MachineTraceMetrics::Trace BBTrace = MinInstr->getTrace(BB); unsigned ResLength = BBTrace.getResourceLength(); // Get the machine model's scheduling class for STPQi. // Bypass TargetSchedule's SchedClass resolution since we only have an opcode. unsigned SCIdx = TII->get(ARM64::STPDi).getSchedClass(); const MCSchedClassDesc *SCDesc = SchedModel.getMCSchedModel()->getSchedClassDesc(SCIdx); // If a subtarget does not define resources for STPQi, bail here. if (SCDesc->isValid() && !SCDesc->isVariant()) { unsigned ResLenWithSTP = BBTrace.getResourceLength( ArrayRef<const MachineBasicBlock *>(), SCDesc); if (ResLenWithSTP > ResLength) { DEBUG(dbgs() << " Suppress STP in BB: " << BB->getNumber() << " resources " << ResLength << " -> " << ResLenWithSTP << "\n"); return false; } } return true; }
/// getDepth - Computes depth of instructions in vector \InsInstr. /// /// \param InsInstrs is a vector of machine instructions /// \param InstrIdxForVirtReg is a dense map of virtual register to index /// of defining machine instruction in \p InsInstrs /// \param BlockTrace is a trace of machine instructions /// /// \returns Depth of last instruction in \InsInstrs ("NewRoot") unsigned MachineCombiner::getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs, DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, MachineTraceMetrics::Trace BlockTrace) { SmallVector<unsigned, 16> InstrDepth; assert(TSchedModel.hasInstrSchedModel() && "Missing machine model\n"); // Foreach instruction in in the new sequence compute the depth based on the // operands. Use the trace information when possible. For new operands which // are tracked in the InstrIdxForVirtReg map depth is looked up in InstrDepth for (auto *InstrPtr : InsInstrs) { // for each Use unsigned IDepth = 0; DEBUG(dbgs() << "NEW INSTR "; InstrPtr->dump(); dbgs() << "\n";); for (unsigned i = 0, e = InstrPtr->getNumOperands(); i != e; ++i) { const MachineOperand &MO = InstrPtr->getOperand(i); // Check for virtual register operand. if (!(MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))) continue; if (!MO.isUse()) continue; unsigned DepthOp = 0; unsigned LatencyOp = 0; DenseMap<unsigned, unsigned>::iterator II = InstrIdxForVirtReg.find(MO.getReg()); if (II != InstrIdxForVirtReg.end()) { // Operand is new virtual register not in trace assert(II->second < InstrDepth.size() && "Bad Index"); MachineInstr *DefInstr = InsInstrs[II->second]; assert(DefInstr && "There must be a definition for a new virtual register"); DepthOp = InstrDepth[II->second]; LatencyOp = TSchedModel.computeOperandLatency( DefInstr, DefInstr->findRegisterDefOperandIdx(MO.getReg()), InstrPtr, InstrPtr->findRegisterUseOperandIdx(MO.getReg())); } else { MachineInstr *DefInstr = getOperandDef(MO); if (DefInstr) { DepthOp = BlockTrace.getInstrCycles(DefInstr).Depth; LatencyOp = TSchedModel.computeOperandLatency( DefInstr, DefInstr->findRegisterDefOperandIdx(MO.getReg()), InstrPtr, InstrPtr->findRegisterUseOperandIdx(MO.getReg())); } } IDepth = std::max(IDepth, DepthOp + LatencyOp); } InstrDepth.push_back(IDepth); }