/// runOnMachineFunction - Loop over all of the basic blocks, inserting /// vzero upper instructions before function calls. bool VZeroUpperInserter::runOnMachineFunction(MachineFunction &MF) { if (MF.getTarget().getSubtarget<X86Subtarget>().hasAVX512()) return false; TII = MF.getTarget().getInstrInfo(); MachineRegisterInfo &MRI = MF.getRegInfo(); bool EverMadeChange = false; // Fast check: if the function doesn't use any ymm registers, we don't need // to insert any VZEROUPPER instructions. This is constant-time, so it is // cheap in the common case of no ymm use. bool YMMUsed = false; const TargetRegisterClass *RC = &X86::VR256RegClass; for (TargetRegisterClass::iterator i = RC->begin(), e = RC->end(); i != e; i++) { if (!MRI.reg_nodbg_empty(*i)) { YMMUsed = true; break; } } if (!YMMUsed) return EverMadeChange; // Pre-compute the existence of any live-in YMM registers to this function FnHasLiveInYmm = checkFnHasLiveInYmm(MRI); assert(BBState.empty()); BBState.resize(MF.getNumBlockIDs(), 0); BBSolved.resize(MF.getNumBlockIDs(), 0); // Each BB state depends on all predecessors, loop over until everything // converges. (Once we converge, we can implicitly mark everything that is // still ST_UNKNOWN as ST_CLEAN.) while (1) { bool MadeChange = false; // Process all basic blocks. for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) MadeChange |= processBasicBlock(MF, *I); // If this iteration over the code changed anything, keep iterating. if (!MadeChange) break; EverMadeChange = true; } BBState.clear(); BBSolved.clear(); return EverMadeChange; }
bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) { if (PredicateFtor && !PredicateFtor(*MF.getFunction())) return false; STI = &static_cast<const ARMSubtarget &>(MF.getSubtarget()); if (STI->isThumb1Only() || STI->prefers32BitThumb()) return false; TII = static_cast<const Thumb2InstrInfo *>(STI->getInstrInfo()); // Optimizing / minimizing size? OptimizeSize = MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize); MinimizeSize = MF.getFunction()->hasFnAttribute(Attribute::MinSize); BlockInfo.clear(); BlockInfo.resize(MF.getNumBlockIDs()); // Visit blocks in reverse post-order so LastCPSRDef is known for all // predecessors. ReversePostOrderTraversal<MachineFunction*> RPOT(&MF); bool Modified = false; for (ReversePostOrderTraversal<MachineFunction*>::rpo_iterator I = RPOT.begin(), E = RPOT.end(); I != E; ++I) Modified |= ReduceMBB(**I); return Modified; }
/// replaceFrameIndices - Replace all MO_FrameIndex operands with physical /// register references and actual offsets. /// void PEI::replaceFrameIndices(MachineFunction &Fn) { if (!Fn.getFrameInfo()->hasStackObjects()) return; // Nothing to do? // Store SPAdj at exit of a basic block. SmallVector<int, 8> SPState; SPState.resize(Fn.getNumBlockIDs()); SmallPtrSet<MachineBasicBlock*, 8> Reachable; // Iterate over the reachable blocks in DFS order. for (df_ext_iterator<MachineFunction*, SmallPtrSet<MachineBasicBlock*, 8> > DFI = df_ext_begin(&Fn, Reachable), DFE = df_ext_end(&Fn, Reachable); DFI != DFE; ++DFI) { int SPAdj = 0; // Check the exit state of the DFS stack predecessor. if (DFI.getPathLength() >= 2) { MachineBasicBlock *StackPred = DFI.getPath(DFI.getPathLength() - 2); assert(Reachable.count(StackPred) && "DFS stack predecessor is already visited.\n"); SPAdj = SPState[StackPred->getNumber()]; } MachineBasicBlock *BB = *DFI; replaceFrameIndices(BB, Fn, SPAdj); SPState[BB->getNumber()] = SPAdj; } // Handle the unreachable blocks. for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) { if (Reachable.count(BB)) // Already handled in DFS traversal. continue; int SPAdj = 0; replaceFrameIndices(BB, Fn, SPAdj); } }
/// Colorslots - Color all spill stack slots and rewrite all frameindex machine /// operands in the function. bool StackSlotColoring::ColorSlots(MachineFunction &MF) { unsigned NumObjs = MFI->getObjectIndexEnd(); SmallVector<int, 16> SlotMapping(NumObjs, -1); SmallVector<float, 16> SlotWeights(NumObjs, 0.0); SmallVector<SmallVector<int, 4>, 16> RevMap(NumObjs); BitVector UsedColors(NumObjs); DEBUG(dbgs() << "Color spill slot intervals:\n"); bool Changed = false; for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) { LiveInterval *li = SSIntervals[i]; int SS = TargetRegisterInfo::stackSlot2Index(li->reg); int NewSS = ColorSlot(li); assert(NewSS >= 0 && "Stack coloring failed?"); SlotMapping[SS] = NewSS; RevMap[NewSS].push_back(SS); SlotWeights[NewSS] += li->weight; UsedColors.set(NewSS); Changed |= (SS != NewSS); } DEBUG(dbgs() << "\nSpill slots after coloring:\n"); for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) { LiveInterval *li = SSIntervals[i]; int SS = TargetRegisterInfo::stackSlot2Index(li->reg); li->weight = SlotWeights[SS]; } // Sort them by new weight. std::stable_sort(SSIntervals.begin(), SSIntervals.end(), IntervalSorter()); #ifndef NDEBUG for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) DEBUG(SSIntervals[i]->dump()); DEBUG(dbgs() << '\n'); #endif if (!Changed) return false; // Rewrite all MO_FrameIndex operands. SmallVector<SmallSet<unsigned, 4>, 4> NewDefs(MF.getNumBlockIDs()); for (unsigned SS = 0, SE = SSRefs.size(); SS != SE; ++SS) { int NewFI = SlotMapping[SS]; if (NewFI == -1 || (NewFI == (int)SS)) continue; SmallVector<MachineInstr*, 8> &RefMIs = SSRefs[SS]; for (unsigned i = 0, e = RefMIs.size(); i != e; ++i) RewriteInstruction(RefMIs[i], SS, NewFI, MF); } // Delete unused stack slots. while (NextColor != -1) { DEBUG(dbgs() << "Removing unused stack object fi#" << NextColor << "\n"); MFI->RemoveStackObject(NextColor); NextColor = AllColors.find_next(NextColor); } return true; }
bool SpillPlacement::runOnMachineFunction(MachineFunction &mf) { MF = &mf; bundles = &getAnalysis<EdgeBundles>(); loops = &getAnalysis<MachineLoopInfo>(); assert(!nodes && "Leaking node array"); nodes = new Node[bundles->getNumBundles()]; // Compute total ingoing and outgoing block frequencies for all bundles. BlockFrequency.resize(mf.getNumBlockIDs()); for (MachineFunction::iterator I = mf.begin(), E = mf.end(); I != E; ++I) { float Freq = LiveIntervals::getSpillWeight(true, false, loops->getLoopDepth(I)); unsigned Num = I->getNumber(); BlockFrequency[Num] = Freq; nodes[bundles->getBundle(Num, 1)].Scale[0] += Freq; nodes[bundles->getBundle(Num, 0)].Scale[1] += Freq; } // Scales are reciprocal frequencies. for (unsigned i = 0, e = bundles->getNumBundles(); i != e; ++i) for (unsigned d = 0; d != 2; ++d) if (nodes[i].Scale[d] > 0) nodes[i].Scale[d] = 1 / nodes[i].Scale[d]; // We never change the function. return false; }
/// replaceFrameIndices - Replace all MO_FrameIndex operands with physical /// register references and actual offsets. void PEI::replaceFrameIndices(MachineFunction &MF) { const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering(); if (!TFI.needsFrameIndexResolution(MF)) return; // Store SPAdj at exit of a basic block. SmallVector<int, 8> SPState; SPState.resize(MF.getNumBlockIDs()); df_iterator_default_set<MachineBasicBlock*> Reachable; // Iterate over the reachable blocks in DFS order. for (auto DFI = df_ext_begin(&MF, Reachable), DFE = df_ext_end(&MF, Reachable); DFI != DFE; ++DFI) { int SPAdj = 0; // Check the exit state of the DFS stack predecessor. if (DFI.getPathLength() >= 2) { MachineBasicBlock *StackPred = DFI.getPath(DFI.getPathLength() - 2); assert(Reachable.count(StackPred) && "DFS stack predecessor is already visited.\n"); SPAdj = SPState[StackPred->getNumber()]; } MachineBasicBlock *BB = *DFI; replaceFrameIndices(BB, MF, SPAdj); SPState[BB->getNumber()] = SPAdj; } // Handle the unreachable blocks. for (auto &BB : MF) { if (Reachable.count(&BB)) // Already handled in DFS traversal. continue; int SPAdj = 0; replaceFrameIndices(&BB, MF, SPAdj); } }
/// replaceFrameIndices - Replace all MO_FrameIndex operands with physical /// register references and actual offsets. /// void PEI::replaceFrameIndices(MachineFunction &Fn) { const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering(); if (!TFI.needsFrameIndexResolution(Fn)) return; MachineModuleInfo &MMI = Fn.getMMI(); const Function *F = Fn.getFunction(); const Function *ParentF = MMI.getWinEHParent(F); unsigned FrameReg; if (F == ParentF) { WinEHFuncInfo &FuncInfo = MMI.getWinEHFuncInfo(Fn.getFunction()); // FIXME: This should be unconditional but we have bugs in the preparation // pass. if (FuncInfo.UnwindHelpFrameIdx != INT_MAX) FuncInfo.UnwindHelpFrameOffset = TFI.getFrameIndexReferenceFromSP( Fn, FuncInfo.UnwindHelpFrameIdx, FrameReg); for (WinEHTryBlockMapEntry &TBME : FuncInfo.TryBlockMap) { for (WinEHHandlerType &H : TBME.HandlerArray) { unsigned UnusedReg; if (H.CatchObj.FrameIndex == INT_MAX) H.CatchObj.FrameOffset = INT_MAX; else H.CatchObj.FrameOffset = TFI.getFrameIndexReference(Fn, H.CatchObj.FrameIndex, UnusedReg); } } } // Store SPAdj at exit of a basic block. SmallVector<int, 8> SPState; SPState.resize(Fn.getNumBlockIDs()); SmallPtrSet<MachineBasicBlock*, 8> Reachable; // Iterate over the reachable blocks in DFS order. for (auto DFI = df_ext_begin(&Fn, Reachable), DFE = df_ext_end(&Fn, Reachable); DFI != DFE; ++DFI) { int SPAdj = 0; // Check the exit state of the DFS stack predecessor. if (DFI.getPathLength() >= 2) { MachineBasicBlock *StackPred = DFI.getPath(DFI.getPathLength() - 2); assert(Reachable.count(StackPred) && "DFS stack predecessor is already visited.\n"); SPAdj = SPState[StackPred->getNumber()]; } MachineBasicBlock *BB = *DFI; replaceFrameIndices(BB, Fn, SPAdj); SPState[BB->getNumber()] = SPAdj; } // Handle the unreachable blocks. for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) { if (Reachable.count(BB)) // Already handled in DFS traversal. continue; int SPAdj = 0; replaceFrameIndices(BB, Fn, SPAdj); } }
/// Insert LOOP and BLOCK markers at appropriate places. static void PlaceMarkers(MachineFunction &MF, const MachineLoopInfo &MLI, const WebAssemblyInstrInfo &TII, MachineDominatorTree &MDT) { // For each block whose label represents the end of a scope, record the block // which holds the beginning of the scope. This will allow us to quickly skip // over scoped regions when walking blocks. We allocate one more than the // number of blocks in the function to accommodate for the possible fake block // we may insert at the end. SmallVector<MachineBasicBlock *, 8> ScopeTops(MF.getNumBlockIDs() + 1); for (auto &MBB : MF) { // Place the LOOP for MBB if MBB is the header of a loop. PlaceLoopMarker(MBB, MF, ScopeTops, TII, MLI); // Place the BLOCK for MBB if MBB is branched to from above. PlaceBlockMarker(MBB, MF, ScopeTops, TII, MLI, MDT); } }
bool SpillPlacement::runOnMachineFunction(MachineFunction &mf) { MF = &mf; bundles = &getAnalysis<EdgeBundles>(); loops = &getAnalysis<MachineLoopInfo>(); assert(!nodes && "Leaking node array"); nodes = new Node[bundles->getNumBundles()]; // Compute total ingoing and outgoing block frequencies for all bundles. BlockFrequencies.resize(mf.getNumBlockIDs()); MachineBlockFrequencyInfo &MBFI = getAnalysis<MachineBlockFrequencyInfo>(); for (MachineFunction::iterator I = mf.begin(), E = mf.end(); I != E; ++I) { unsigned Num = I->getNumber(); BlockFrequencies[Num] = MBFI.getBlockFreq(I); } // We never change the function. return false; }
/// Check if the CFG of \p MF is irreducible. static bool isIrreducibleCFG(const MachineFunction &MF, const MachineLoopInfo &MLI) { const MachineBasicBlock *Entry = &*MF.begin(); ReversePostOrderTraversal<const MachineBasicBlock *> RPOT(Entry); BitVector VisitedBB(MF.getNumBlockIDs()); for (const MachineBasicBlock *MBB : RPOT) { VisitedBB.set(MBB->getNumber()); for (const MachineBasicBlock *SuccBB : MBB->successors()) { if (!VisitedBB.test(SuccBB->getNumber())) continue; // We already visited SuccBB, thus MBB->SuccBB must be a backedge. // Check that the head matches what we have in the loop information. // Otherwise, we have an irreducible graph. if (!isProperBackedge(MLI, MBB, SuccBB)) return true; } } return false; }
bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) { const TargetMachine &TM = MF.getTarget(); TII = static_cast<const Thumb2InstrInfo*>(TM.getInstrInfo()); STI = &TM.getSubtarget<ARMSubtarget>(); // Optimizing / minimizing size? AttributeSet FnAttrs = MF.getFunction()->getAttributes(); OptimizeSize = FnAttrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize); MinimizeSize = STI->isMinSize(); BlockInfo.clear(); BlockInfo.resize(MF.getNumBlockIDs()); // Visit blocks in reverse post-order so LastCPSRDef is known for all // predecessors. ReversePostOrderTraversal<MachineFunction*> RPOT(&MF); bool Modified = false; for (ReversePostOrderTraversal<MachineFunction*>::rpo_iterator I = RPOT.begin(), E = RPOT.end(); I != E; ++I) Modified |= ReduceMBB(**I); return Modified; }
/// runOnMachineFunction - Loop over all of the basic blocks, inserting /// vzeroupper instructions before function calls. bool VZeroUpperInserter::runOnMachineFunction(MachineFunction &MF) { const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>(); if (!ST.hasAVX() || ST.hasAVX512() || ST.hasFastPartialYMMWrite()) return false; TII = ST.getInstrInfo(); MachineRegisterInfo &MRI = MF.getRegInfo(); EverMadeChange = false; bool FnHasLiveInYmm = checkFnHasLiveInYmm(MRI); // Fast check: if the function doesn't use any ymm registers, we don't need // to insert any VZEROUPPER instructions. This is constant-time, so it is // cheap in the common case of no ymm use. bool YMMUsed = FnHasLiveInYmm; if (!YMMUsed) { const TargetRegisterClass *RC = &X86::VR256RegClass; for (TargetRegisterClass::iterator i = RC->begin(), e = RC->end(); i != e; i++) { if (!MRI.reg_nodbg_empty(*i)) { YMMUsed = true; break; } } } if (!YMMUsed) { return false; } assert(BlockStates.empty() && DirtySuccessors.empty() && "X86VZeroUpper state should be clear"); BlockStates.resize(MF.getNumBlockIDs()); // Process all blocks. This will compute block exit states, record the first // unguarded call in each block, and add successors of dirty blocks to the // DirtySuccessors list. for (MachineBasicBlock &MBB : MF) processBasicBlock(MBB); // If any YMM regs are live in to this function, add the entry block to the // DirtySuccessors list if (FnHasLiveInYmm) addDirtySuccessor(MF.front()); // Re-visit all blocks that are successors of EXITS_DIRTY bsocks. Add // vzeroupper instructions to unguarded calls, and propagate EXITS_DIRTY // through PASS_THROUGH blocks. while (!DirtySuccessors.empty()) { MachineBasicBlock &MBB = *DirtySuccessors.back(); DirtySuccessors.pop_back(); BlockState &BBState = BlockStates[MBB.getNumber()]; // MBB is a successor of a dirty block, so its first call needs to be // guarded. if (BBState.FirstUnguardedCall != MBB.end()) insertVZeroUpper(BBState.FirstUnguardedCall, MBB); // If this successor was a pass-through block then it is now dirty, and its // successors need to be added to the worklist (if they haven't been // already). if (BBState.ExitState == PASS_THROUGH) { DEBUG(dbgs() << "MBB #" << MBB.getNumber() << " was Pass-through, is now Dirty-out.\n"); for (MachineBasicBlock::succ_iterator SI = MBB.succ_begin(), SE = MBB.succ_end(); SI != SE; ++SI) addDirtySuccessor(**SI); } } BlockStates.clear(); return EverMadeChange; }
/// Insert LOOP and BLOCK markers at appropriate places. static void PlaceMarkers(MachineFunction &MF, const MachineLoopInfo &MLI, const WebAssemblyInstrInfo &TII, MachineDominatorTree &MDT, WebAssemblyFunctionInfo &MFI) { // For each block whose label represents the end of a scope, record the block // which holds the beginning of the scope. This will allow us to quickly skip // over scoped regions when walking blocks. We allocate one more than the // number of blocks in the function to accommodate for the possible fake block // we may insert at the end. SmallVector<MachineBasicBlock *, 8> ScopeTops(MF.getNumBlockIDs() + 1); // For eacn LOOP_END, the corresponding LOOP. DenseMap<const MachineInstr *, const MachineBasicBlock *> LoopTops; for (auto &MBB : MF) { // Place the LOOP for MBB if MBB is the header of a loop. PlaceLoopMarker(MBB, MF, ScopeTops, LoopTops, TII, MLI); // Place the BLOCK for MBB if MBB is branched to from above. PlaceBlockMarker(MBB, MF, ScopeTops, TII, MLI, MDT, MFI); } // Now rewrite references to basic blocks to be depth immediates. SmallVector<const MachineBasicBlock *, 8> Stack; for (auto &MBB : reverse(MF)) { for (auto &MI : reverse(MBB)) { switch (MI.getOpcode()) { case WebAssembly::BLOCK: assert(ScopeTops[Stack.back()->getNumber()] == &MBB && "Block should be balanced"); Stack.pop_back(); break; case WebAssembly::LOOP: assert(Stack.back() == &MBB && "Loop top should be balanced"); Stack.pop_back(); Stack.pop_back(); break; case WebAssembly::END_BLOCK: Stack.push_back(&MBB); break; case WebAssembly::END_LOOP: Stack.push_back(&MBB); Stack.push_back(LoopTops[&MI]); break; default: if (MI.isTerminator()) { // Rewrite MBB operands to be depth immediates. SmallVector<MachineOperand, 4> Ops(MI.operands()); while (MI.getNumOperands() > 0) MI.RemoveOperand(MI.getNumOperands() - 1); for (auto MO : Ops) { if (MO.isMBB()) MO = MachineOperand::CreateImm(GetDepth(Stack, MO.getMBB())); MI.addOperand(MF, MO); } } break; } } } assert(Stack.empty() && "Control flow should be balanced"); }
/// Sort the blocks, taking special care to make sure that loops are not /// interrupted by blocks not dominated by their header. /// TODO: There are many opportunities for improving the heuristics here. /// Explore them. static void SortBlocks(MachineFunction &MF, const MachineLoopInfo &MLI, const MachineDominatorTree &MDT) { // Prepare for a topological sort: Record the number of predecessors each // block has, ignoring loop backedges. MF.RenumberBlocks(); SmallVector<unsigned, 16> NumPredsLeft(MF.getNumBlockIDs(), 0); for (MachineBasicBlock &MBB : MF) { unsigned N = MBB.pred_size(); if (MachineLoop *L = MLI.getLoopFor(&MBB)) if (L->getHeader() == &MBB) for (const MachineBasicBlock *Pred : MBB.predecessors()) if (L->contains(Pred)) --N; NumPredsLeft[MBB.getNumber()] = N; } // Topological sort the CFG, with additional constraints: // - Between a loop header and the last block in the loop, there can be // no blocks not dominated by the loop header. // - It's desirable to preserve the original block order when possible. // We use two ready lists; Preferred and Ready. Preferred has recently // processed sucessors, to help preserve block sequences from the original // order. Ready has the remaining ready blocks. PriorityQueue<MachineBasicBlock *, std::vector<MachineBasicBlock *>, CompareBlockNumbers> Preferred; PriorityQueue<MachineBasicBlock *, std::vector<MachineBasicBlock *>, CompareBlockNumbersBackwards> Ready; SmallVector<Entry, 4> Loops; for (MachineBasicBlock *MBB = &MF.front();;) { const MachineLoop *L = MLI.getLoopFor(MBB); if (L) { // If MBB is a loop header, add it to the active loop list. We can't put // any blocks that it doesn't dominate until we see the end of the loop. if (L->getHeader() == MBB) Loops.push_back(Entry(L)); // For each active loop the block is in, decrement the count. If MBB is // the last block in an active loop, take it off the list and pick up any // blocks deferred because the header didn't dominate them. for (Entry &E : Loops) if (E.Loop->contains(MBB) && --E.NumBlocksLeft == 0) for (auto DeferredBlock : E.Deferred) Ready.push(DeferredBlock); while (!Loops.empty() && Loops.back().NumBlocksLeft == 0) Loops.pop_back(); } // The main topological sort logic. for (MachineBasicBlock *Succ : MBB->successors()) { // Ignore backedges. if (MachineLoop *SuccL = MLI.getLoopFor(Succ)) if (SuccL->getHeader() == Succ && SuccL->contains(MBB)) continue; // Decrement the predecessor count. If it's now zero, it's ready. if (--NumPredsLeft[Succ->getNumber()] == 0) Preferred.push(Succ); } // Determine the block to follow MBB. First try to find a preferred block, // to preserve the original block order when possible. MachineBasicBlock *Next = nullptr; while (!Preferred.empty()) { Next = Preferred.top(); Preferred.pop(); // If X isn't dominated by the top active loop header, defer it until that // loop is done. if (!Loops.empty() && !MDT.dominates(Loops.back().Loop->getHeader(), Next)) { Loops.back().Deferred.push_back(Next); Next = nullptr; continue; } // If Next was originally ordered before MBB, and it isn't because it was // loop-rotated above the header, it's not preferred. if (Next->getNumber() < MBB->getNumber() && (!L || !L->contains(Next) || L->getHeader()->getNumber() < Next->getNumber())) { Ready.push(Next); Next = nullptr; continue; } break; } // If we didn't find a suitable block in the Preferred list, check the // general Ready list. if (!Next) { // If there are no more blocks to process, we're done. if (Ready.empty()) { MaybeUpdateTerminator(MBB); break; } for (;;) { Next = Ready.top(); Ready.pop(); // If Next isn't dominated by the top active loop header, defer it until // that loop is done. if (!Loops.empty() && !MDT.dominates(Loops.back().Loop->getHeader(), Next)) { Loops.back().Deferred.push_back(Next); continue; } break; } } // Move the next block into place and iterate. Next->moveAfter(MBB); MaybeUpdateTerminator(MBB); MBB = Next; } assert(Loops.empty() && "Active loop list not finished"); MF.RenumberBlocks(); #ifndef NDEBUG SmallSetVector<MachineLoop *, 8> OnStack; // Insert a sentinel representing the degenerate loop that starts at the // function entry block and includes the entire function as a "loop" that // executes once. OnStack.insert(nullptr); for (auto &MBB : MF) { assert(MBB.getNumber() >= 0 && "Renumbered blocks should be non-negative."); MachineLoop *Loop = MLI.getLoopFor(&MBB); if (Loop && &MBB == Loop->getHeader()) { // Loop header. The loop predecessor should be sorted above, and the other // predecessors should be backedges below. for (auto Pred : MBB.predecessors()) assert( (Pred->getNumber() < MBB.getNumber() || Loop->contains(Pred)) && "Loop header predecessors must be loop predecessors or backedges"); assert(OnStack.insert(Loop) && "Loops should be declared at most once."); } else { // Not a loop header. All predecessors should be sorted above. for (auto Pred : MBB.predecessors()) assert(Pred->getNumber() < MBB.getNumber() && "Non-loop-header predecessors should be topologically sorted"); assert(OnStack.count(MLI.getLoopFor(&MBB)) && "Blocks must be nested in their loops"); } while (OnStack.size() > 1 && &MBB == LoopBottom(OnStack.back())) OnStack.pop_back(); } assert(OnStack.pop_back_val() == nullptr && "The function entry block shouldn't actually be a loop header"); assert(OnStack.empty() && "Control flow stack pushes and pops should be balanced."); #endif }
bool IfConverter::runOnMachineFunction(MachineFunction &MF) { TLI = MF.getTarget().getTargetLowering(); TII = MF.getTarget().getInstrInfo(); if (!TII) return false; DEBUG(dbgs() << "\nIfcvt: function (" << ++FnNum << ") \'" << MF.getFunction()->getName() << "\'"); if (FnNum < IfCvtFnStart || (IfCvtFnStop != -1 && FnNum > IfCvtFnStop)) { DEBUG(dbgs() << " skipped\n"); return false; } DEBUG(dbgs() << "\n"); MF.RenumberBlocks(); BBAnalysis.resize(MF.getNumBlockIDs()); // Look for root nodes, i.e. blocks without successors. for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) if (I->succ_empty()) Roots.push_back(I); std::vector<IfcvtToken*> Tokens; MadeChange = false; unsigned NumIfCvts = NumSimple + NumSimpleFalse + NumTriangle + NumTriangleRev + NumTriangleFalse + NumTriangleFRev + NumDiamonds; while (IfCvtLimit == -1 || (int)NumIfCvts < IfCvtLimit) { // Do an initial analysis for each basic block and find all the potential // candidates to perform if-conversion. bool Change = AnalyzeBlocks(MF, Tokens); while (!Tokens.empty()) { IfcvtToken *Token = Tokens.back(); Tokens.pop_back(); BBInfo &BBI = Token->BBI; IfcvtKind Kind = Token->Kind; unsigned NumDups = Token->NumDups; unsigned NumDups2 = Token->NumDups2; delete Token; // If the block has been evicted out of the queue or it has already been // marked dead (due to it being predicated), then skip it. if (BBI.IsDone) BBI.IsEnqueued = false; if (!BBI.IsEnqueued) continue; BBI.IsEnqueued = false; bool RetVal = false; switch (Kind) { default: assert(false && "Unexpected!"); break; case ICSimple: case ICSimpleFalse: { bool isFalse = Kind == ICSimpleFalse; if ((isFalse && DisableSimpleF) || (!isFalse && DisableSimple)) break; DEBUG(dbgs() << "Ifcvt (Simple" << (Kind == ICSimpleFalse ? " false" :"") << "): BB#" << BBI.BB->getNumber() << " (" << ((Kind == ICSimpleFalse) ? BBI.FalseBB->getNumber() : BBI.TrueBB->getNumber()) << ") "); RetVal = IfConvertSimple(BBI, Kind); DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n"); if (RetVal) { if (isFalse) NumSimpleFalse++; else NumSimple++; } break; } case ICTriangle: case ICTriangleRev: case ICTriangleFalse: case ICTriangleFRev: { bool isFalse = Kind == ICTriangleFalse; bool isRev = (Kind == ICTriangleRev || Kind == ICTriangleFRev); if (DisableTriangle && !isFalse && !isRev) break; if (DisableTriangleR && !isFalse && isRev) break; if (DisableTriangleF && isFalse && !isRev) break; if (DisableTriangleFR && isFalse && isRev) break; DEBUG(dbgs() << "Ifcvt (Triangle"); if (isFalse) DEBUG(dbgs() << " false"); if (isRev) DEBUG(dbgs() << " rev"); DEBUG(dbgs() << "): BB#" << BBI.BB->getNumber() << " (T:" << BBI.TrueBB->getNumber() << ",F:" << BBI.FalseBB->getNumber() << ") "); RetVal = IfConvertTriangle(BBI, Kind); DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n"); if (RetVal) { if (isFalse) { if (isRev) NumTriangleFRev++; else NumTriangleFalse++; } else { if (isRev) NumTriangleRev++; else NumTriangle++; } } break; } case ICDiamond: { if (DisableDiamond) break; DEBUG(dbgs() << "Ifcvt (Diamond): BB#" << BBI.BB->getNumber() << " (T:" << BBI.TrueBB->getNumber() << ",F:" << BBI.FalseBB->getNumber() << ") "); RetVal = IfConvertDiamond(BBI, Kind, NumDups, NumDups2); DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n"); if (RetVal) NumDiamonds++; break; } } Change |= RetVal; NumIfCvts = NumSimple + NumSimpleFalse + NumTriangle + NumTriangleRev + NumTriangleFalse + NumTriangleFRev + NumDiamonds; if (IfCvtLimit != -1 && (int)NumIfCvts >= IfCvtLimit) break; } if (!Change) break; MadeChange |= Change; } // Delete tokens in case of early exit. while (!Tokens.empty()) { IfcvtToken *Token = Tokens.back(); Tokens.pop_back(); delete Token; } Tokens.clear(); Roots.clear(); BBAnalysis.clear(); if (MadeChange) { BranchFolder BF(false); BF.OptimizeFunction(MF, TII, MF.getTarget().getRegisterInfo(), getAnalysisIfAvailable<MachineModuleInfo>()); } return MadeChange; }
bool MSP430BSel::runOnMachineFunction(MachineFunction &Fn) { const MSP430InstrInfo *TII = static_cast<const MSP430InstrInfo*>(Fn.getTarget().getInstrInfo()); // Give the blocks of the function a dense, in-order, numbering. Fn.RenumberBlocks(); BlockSizes.resize(Fn.getNumBlockIDs()); // Measure each MBB and compute a size for the entire function. unsigned FuncSize = 0; for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; ++MFI) { MachineBasicBlock *MBB = MFI; unsigned BlockSize = 0; for (MachineBasicBlock::iterator MBBI = MBB->begin(), EE = MBB->end(); MBBI != EE; ++MBBI) BlockSize += TII->GetInstSizeInBytes(MBBI); BlockSizes[MBB->getNumber()] = BlockSize; FuncSize += BlockSize; } // If the entire function is smaller than the displacement of a branch field, // we know we don't need to shrink any branches in this function. This is a // common case. if (FuncSize < (1 << 9)) { BlockSizes.clear(); return false; } // For each conditional branch, if the offset to its destination is larger // than the offset field allows, transform it into a long branch sequence // like this: // short branch: // bCC MBB // long branch: // b!CC $PC+6 // b MBB // bool MadeChange = true; bool EverMadeChange = false; while (MadeChange) { // Iteratively expand branches until we reach a fixed point. MadeChange = false; for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; ++MFI) { MachineBasicBlock &MBB = *MFI; unsigned MBBStartOffset = 0; for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ++I) { if ((I->getOpcode() != MSP430::JCC || I->getOperand(0).isImm()) && I->getOpcode() != MSP430::JMP) { MBBStartOffset += TII->GetInstSizeInBytes(I); continue; } // Determine the offset from the current branch to the destination // block. MachineBasicBlock *Dest = I->getOperand(0).getMBB(); int BranchSize; if (Dest->getNumber() <= MBB.getNumber()) { // If this is a backwards branch, the delta is the offset from the // start of this block to this branch, plus the sizes of all blocks // from this block to the dest. BranchSize = MBBStartOffset; for (unsigned i = Dest->getNumber(), e = MBB.getNumber(); i != e; ++i) BranchSize += BlockSizes[i]; } else { // Otherwise, add the size of the blocks between this block and the // dest to the number of bytes left in this block. BranchSize = -MBBStartOffset; for (unsigned i = MBB.getNumber(), e = Dest->getNumber(); i != e; ++i) BranchSize += BlockSizes[i]; } // If this branch is in range, ignore it. if (isInt<10>(BranchSize)) { MBBStartOffset += 2; continue; } // Otherwise, we have to expand it to a long branch. unsigned NewSize; MachineInstr *OldBranch = I; DebugLoc dl = OldBranch->getDebugLoc(); if (I->getOpcode() == MSP430::JMP) { NewSize = 4; } else { // The BCC operands are: // 0. MSP430 branch predicate // 1. Target MBB SmallVector<MachineOperand, 1> Cond; Cond.push_back(I->getOperand(1)); // Jump over the uncond branch inst (i.e. $+6) on opposite condition. TII->ReverseBranchCondition(Cond); BuildMI(MBB, I, dl, TII->get(MSP430::JCC)) .addImm(4).addOperand(Cond[0]); NewSize = 6; } // Uncond branch to the real destination. I = BuildMI(MBB, I, dl, TII->get(MSP430::Bi)).addMBB(Dest); // Remove the old branch from the function. OldBranch->eraseFromParent(); // Remember that this instruction is NewSize bytes, increase the size of the // block by NewSize-2, remember to iterate. BlockSizes[MBB.getNumber()] += NewSize-2; MBBStartOffset += NewSize; ++NumExpanded; MadeChange = true; } } EverMadeChange |= MadeChange; } BlockSizes.clear(); return true; }
bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) { const TargetInstrInfo *TII = Fn.getTarget().getInstrInfo(); // Give the blocks of the function a dense, in-order, numbering. Fn.RenumberBlocks(); BlockSizes.resize(Fn.getNumBlockIDs()); // Measure each MBB and compute a size for the entire function. unsigned FuncSize = 0; for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; ++MFI) { MachineBasicBlock *MBB = MFI; unsigned BlockSize = 0; for (MachineBasicBlock::iterator MBBI = MBB->begin(), EE = MBB->end(); MBBI != EE; ++MBBI) BlockSize += TII->GetInstSizeInBytes(MBBI); BlockSizes[MBB->getNumber()] = BlockSize; FuncSize += BlockSize; } // If the entire function is smaller than the displacement of a branch field, // we know we don't need to shrink any branches in this function. This is a // common case. if (FuncSize < (1 << 15)) { BlockSizes.clear(); return false; } // For each conditional branch, if the offset to its destination is larger // than the offset field allows, transform it into a long branch sequence // like this: // short branch: // bCC MBB // long branch: // b!CC $PC+8 // b MBB // bool MadeChange = true; bool EverMadeChange = false; while (MadeChange) { // Iteratively expand branches until we reach a fixed point. MadeChange = false; for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; ++MFI) { MachineBasicBlock &MBB = *MFI; unsigned MBBStartOffset = 0; for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ++I) { if (I->getOpcode() != PPC::BCC || I->getOperand(2).isImmediate()) { MBBStartOffset += TII->GetInstSizeInBytes(I); continue; } // Determine the offset from the current branch to the destination // block. MachineBasicBlock *Dest = I->getOperand(2).getMBB(); int BranchSize; if (Dest->getNumber() <= MBB.getNumber()) { // If this is a backwards branch, the delta is the offset from the // start of this block to this branch, plus the sizes of all blocks // from this block to the dest. BranchSize = MBBStartOffset; for (unsigned i = Dest->getNumber(), e = MBB.getNumber(); i != e; ++i) BranchSize += BlockSizes[i]; } else { // Otherwise, add the size of the blocks between this block and the // dest to the number of bytes left in this block. BranchSize = -MBBStartOffset; for (unsigned i = MBB.getNumber(), e = Dest->getNumber(); i != e; ++i) BranchSize += BlockSizes[i]; } // If this branch is in range, ignore it. if (isInt16(BranchSize)) { MBBStartOffset += 4; continue; } // Otherwise, we have to expand it to a long branch. // The BCC operands are: // 0. PPC branch predicate // 1. CR register // 2. Target MBB PPC::Predicate Pred = (PPC::Predicate)I->getOperand(0).getImm(); unsigned CRReg = I->getOperand(1).getReg(); MachineInstr *OldBranch = I; // Jump over the uncond branch inst (i.e. $PC+8) on opposite condition. BuildMI(MBB, I, TII->get(PPC::BCC)) .addImm(PPC::InvertPredicate(Pred)).addReg(CRReg).addImm(2); // Uncond branch to the real destination. I = BuildMI(MBB, I, TII->get(PPC::B)).addMBB(Dest); // Remove the old branch from the function. OldBranch->eraseFromParent(); // Remember that this instruction is 8-bytes, increase the size of the // block by 4, remember to iterate. BlockSizes[MBB.getNumber()] += 4; MBBStartOffset += 8; ++NumExpanded; MadeChange = true; } } EverMadeChange |= MadeChange; } BlockSizes.clear(); return true; }