void SIWholeQuadMode::propagateBlock(MachineBasicBlock &MBB, std::vector<WorkItem>& Worklist) { BlockInfo BI = Blocks[&MBB]; // Make a copy to prevent dangling references. // Propagate through instructions if (!MBB.empty()) { MachineInstr *LastMI = &*MBB.rbegin(); InstrInfo &LastII = Instructions[LastMI]; if ((LastII.OutNeeds | BI.OutNeeds) != LastII.OutNeeds) { LastII.OutNeeds |= BI.OutNeeds; Worklist.push_back(LastMI); } } // Predecessor blocks must provide for our WQM/Exact needs. for (MachineBasicBlock *Pred : MBB.predecessors()) { BlockInfo &PredBI = Blocks[Pred]; if ((PredBI.OutNeeds | BI.InNeeds) == PredBI.OutNeeds) continue; PredBI.OutNeeds |= BI.InNeeds; PredBI.InNeeds |= BI.InNeeds; Worklist.push_back(Pred); } // All successors must be prepared to accept the same set of WQM/Exact data. for (MachineBasicBlock *Succ : MBB.successors()) { BlockInfo &SuccBI = Blocks[Succ]; if ((SuccBI.InNeeds | BI.OutNeeds) == SuccBI.InNeeds) continue; SuccBI.InNeeds |= BI.OutNeeds; Worklist.push_back(Succ); } }
bool TailDuplicator::canCompletelyDuplicateBB(MachineBasicBlock &BB) { for (MachineBasicBlock *PredBB : BB.predecessors()) { if (PredBB->succ_size() > 1) return false; MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr; SmallVector<MachineOperand, 4> PredCond; if (TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond)) return false; if (!PredCond.empty()) return false; } return true; }
/// Insert a BLOCK marker for branches to MBB (if needed). static void PlaceBlockMarker(MachineBasicBlock &MBB, MachineFunction &MF, SmallVectorImpl<MachineBasicBlock *> &ScopeTops, const WebAssemblyInstrInfo &TII, const MachineLoopInfo &MLI, MachineDominatorTree &MDT, WebAssemblyFunctionInfo &MFI) { // First compute the nearest common dominator of all forward non-fallthrough // predecessors so that we minimize the time that the BLOCK is on the stack, // which reduces overall stack height. MachineBasicBlock *Header = nullptr; bool IsBranchedTo = false; int MBBNumber = MBB.getNumber(); for (MachineBasicBlock *Pred : MBB.predecessors()) if (Pred->getNumber() < MBBNumber) { Header = Header ? MDT.findNearestCommonDominator(Header, Pred) : Pred; if (ExplicitlyBranchesTo(Pred, &MBB)) IsBranchedTo = true; } if (!Header) return; if (!IsBranchedTo) return; assert(&MBB != &MF.front() && "Header blocks shouldn't have predecessors"); MachineBasicBlock *LayoutPred = &*prev(MachineFunction::iterator(&MBB)); // If the nearest common dominator is inside a more deeply nested context, // walk out to the nearest scope which isn't more deeply nested. for (MachineFunction::iterator I(LayoutPred), E(Header); I != E; --I) { if (MachineBasicBlock *ScopeTop = ScopeTops[I->getNumber()]) { if (ScopeTop->getNumber() > Header->getNumber()) { // Skip over an intervening scope. I = next(MachineFunction::iterator(ScopeTop)); } else { // We found a scope level at an appropriate depth. Header = ScopeTop; break; } } } // If there's a loop which ends just before MBB which contains Header, we can // reuse its label instead of inserting a new BLOCK. for (MachineLoop *Loop = MLI.getLoopFor(LayoutPred); Loop && Loop->contains(LayoutPred); Loop = Loop->getParentLoop()) if (Loop && LoopBottom(Loop) == LayoutPred && Loop->contains(Header)) return; // Decide where in Header to put the BLOCK. MachineBasicBlock::iterator InsertPos; MachineLoop *HeaderLoop = MLI.getLoopFor(Header); if (HeaderLoop && MBB.getNumber() > LoopBottom(HeaderLoop)->getNumber()) { // Header is the header of a loop that does not lexically contain MBB, so // the BLOCK needs to be above the LOOP, after any END constructs. InsertPos = Header->begin(); while (InsertPos->getOpcode() != WebAssembly::LOOP) ++InsertPos; } else { // Otherwise, insert the BLOCK as late in Header as we can, but before the // beginning of the local expression tree and any nested BLOCKs. InsertPos = Header->getFirstTerminator(); while (InsertPos != Header->begin() && IsChild(prev(InsertPos), MFI) && prev(InsertPos)->getOpcode() != WebAssembly::LOOP && prev(InsertPos)->getOpcode() != WebAssembly::END_BLOCK && prev(InsertPos)->getOpcode() != WebAssembly::END_LOOP) --InsertPos; } // Add the BLOCK. BuildMI(*Header, InsertPos, DebugLoc(), TII.get(WebAssembly::BLOCK)); // Mark the end of the block. InsertPos = MBB.begin(); while (InsertPos != MBB.end() && InsertPos->getOpcode() == WebAssembly::END_LOOP) ++InsertPos; BuildMI(MBB, InsertPos, DebugLoc(), TII.get(WebAssembly::END_BLOCK)); // Track the farthest-spanning scope that ends at this point. int Number = MBB.getNumber(); if (!ScopeTops[Number] || ScopeTops[Number]->getNumber() > Header->getNumber()) ScopeTops[Number] = Header; }
bool WebAssemblyFixIrreducibleControlFlow::VisitLoop(MachineFunction &MF, MachineLoopInfo &MLI, MachineLoop *Loop) { MachineBasicBlock *Header = Loop ? Loop->getHeader() : &*MF.begin(); SetVector<MachineBasicBlock *> RewriteSuccs; // DFS through Loop's body, looking for for irreducible control flow. Loop is // natural, and we stay in its body, and we treat any nested loops // monolithically, so any cycles we encounter indicate irreducibility. SmallPtrSet<MachineBasicBlock *, 8> OnStack; SmallPtrSet<MachineBasicBlock *, 8> Visited; SmallVector<SuccessorList, 4> LoopWorklist; LoopWorklist.push_back(SuccessorList(Header)); OnStack.insert(Header); Visited.insert(Header); while (!LoopWorklist.empty()) { SuccessorList &Top = LoopWorklist.back(); if (Top.HasNext()) { MachineBasicBlock *Next = Top.Next(); if (Next == Header || (Loop && !Loop->contains(Next))) continue; if (LLVM_LIKELY(OnStack.insert(Next).second)) { if (!Visited.insert(Next).second) { OnStack.erase(Next); continue; } MachineLoop *InnerLoop = MLI.getLoopFor(Next); if (InnerLoop != Loop) LoopWorklist.push_back(SuccessorList(InnerLoop)); else LoopWorklist.push_back(SuccessorList(Next)); } else { RewriteSuccs.insert(Top.getBlock()); } continue; } OnStack.erase(Top.getBlock()); LoopWorklist.pop_back(); } // Most likely, we didn't find any irreducible control flow. if (LLVM_LIKELY(RewriteSuccs.empty())) return false; DEBUG(dbgs() << "Irreducible control flow detected!\n"); // Ok. We have irreducible control flow! Create a dispatch block which will // contains a jump table to any block in the problematic set of blocks. MachineBasicBlock *Dispatch = MF.CreateMachineBasicBlock(); MF.insert(MF.end(), Dispatch); MLI.changeLoopFor(Dispatch, Loop); // Add the jump table. const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo(); MachineInstrBuilder MIB = BuildMI(*Dispatch, Dispatch->end(), DebugLoc(), TII.get(WebAssembly::BR_TABLE_I32)); // Add the register which will be used to tell the jump table which block to // jump to. MachineRegisterInfo &MRI = MF.getRegInfo(); unsigned Reg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); MIB.addReg(Reg); // Collect all the blocks which need to have their successors rewritten, // add the successors to the jump table, and remember their index. DenseMap<MachineBasicBlock *, unsigned> Indices; SmallVector<MachineBasicBlock *, 4> SuccWorklist(RewriteSuccs.begin(), RewriteSuccs.end()); while (!SuccWorklist.empty()) { MachineBasicBlock *MBB = SuccWorklist.pop_back_val(); auto Pair = Indices.insert(std::make_pair(MBB, 0)); if (!Pair.second) continue; unsigned Index = MIB.getInstr()->getNumExplicitOperands() - 1; DEBUG(dbgs() << printMBBReference(*MBB) << " has index " << Index << "\n"); Pair.first->second = Index; for (auto Pred : MBB->predecessors()) RewriteSuccs.insert(Pred); MIB.addMBB(MBB); Dispatch->addSuccessor(MBB); MetaBlock Meta(MBB); for (auto *Succ : Meta.successors()) if (Succ != Header && (!Loop || Loop->contains(Succ))) SuccWorklist.push_back(Succ); } // Rewrite the problematic successors for every block in RewriteSuccs. // For simplicity, we just introduce a new block for every edge we need to // rewrite. Fancier things are possible. for (MachineBasicBlock *MBB : RewriteSuccs) { DenseMap<MachineBasicBlock *, MachineBasicBlock *> Map; for (auto *Succ : MBB->successors()) { if (!Indices.count(Succ)) continue; MachineBasicBlock *Split = MF.CreateMachineBasicBlock(); MF.insert(MBB->isLayoutSuccessor(Succ) ? MachineFunction::iterator(Succ) : MF.end(), Split); MLI.changeLoopFor(Split, Loop); // Set the jump table's register of the index of the block we wish to // jump to, and jump to the jump table. BuildMI(*Split, Split->end(), DebugLoc(), TII.get(WebAssembly::CONST_I32), Reg) .addImm(Indices[Succ]); BuildMI(*Split, Split->end(), DebugLoc(), TII.get(WebAssembly::BR)) .addMBB(Dispatch); Split->addSuccessor(Dispatch); Map[Succ] = Split; } // Remap the terminator operands and the successor list. for (MachineInstr &Term : MBB->terminators()) for (auto &Op : Term.explicit_uses()) if (Op.isMBB() && Indices.count(Op.getMBB())) Op.setMBB(Map[Op.getMBB()]); for (auto Rewrite : Map) MBB->replaceSuccessor(Rewrite.first, Rewrite.second); } // Create a fake default label, because br_table requires one. MIB.addMBB(MIB.getInstr() ->getOperand(MIB.getInstr()->getNumExplicitOperands() - 1) .getMBB()); return true; }
bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { bool Modified = false; // Yes, CPSR could be livein. bool LiveCPSR = MBB.isLiveIn(ARM::CPSR); MachineInstr *BundleMI = nullptr; CPSRDef = nullptr; HighLatencyCPSR = false; // Check predecessors for the latest CPSRDef. for (auto *Pred : MBB.predecessors()) { const MBBInfo &PInfo = BlockInfo[Pred->getNumber()]; if (!PInfo.Visited) { // Since blocks are visited in RPO, this must be a back-edge. continue; } if (PInfo.HighLatencyCPSR) { HighLatencyCPSR = true; break; } } // If this BB loops back to itself, conservatively avoid narrowing the // first instruction that does partial flag update. bool IsSelfLoop = MBB.isSuccessor(&MBB); MachineBasicBlock::instr_iterator MII = MBB.instr_begin(),E = MBB.instr_end(); MachineBasicBlock::instr_iterator NextMII; for (; MII != E; MII = NextMII) { NextMII = std::next(MII); MachineInstr *MI = &*MII; if (MI->isBundle()) { BundleMI = MI; continue; } if (MI->isDebugValue()) continue; LiveCPSR = UpdateCPSRUse(*MI, LiveCPSR); // Does NextMII belong to the same bundle as MI? bool NextInSameBundle = NextMII != E && NextMII->isBundledWithPred(); if (ReduceMI(MBB, MI, LiveCPSR, IsSelfLoop)) { Modified = true; MachineBasicBlock::instr_iterator I = std::prev(NextMII); MI = &*I; // Removing and reinserting the first instruction in a bundle will break // up the bundle. Fix the bundling if it was broken. if (NextInSameBundle && !NextMII->isBundledWithPred()) NextMII->bundleWithPred(); } if (!NextInSameBundle && MI->isInsideBundle()) { // FIXME: Since post-ra scheduler operates on bundles, the CPSR kill // marker is only on the BUNDLE instruction. Process the BUNDLE // instruction as we finish with the bundled instruction to work around // the inconsistency. if (BundleMI->killsRegister(ARM::CPSR)) LiveCPSR = false; MachineOperand *MO = BundleMI->findRegisterDefOperand(ARM::CPSR); if (MO && !MO->isDead()) LiveCPSR = true; MO = BundleMI->findRegisterUseOperand(ARM::CPSR); if (MO && !MO->isKill()) LiveCPSR = true; } bool DefCPSR = false; LiveCPSR = UpdateCPSRDef(*MI, LiveCPSR, DefCPSR); if (MI->isCall()) { // Calls don't really set CPSR. CPSRDef = nullptr; HighLatencyCPSR = false; IsSelfLoop = false; } else if (DefCPSR) { // This is the last CPSR defining instruction. CPSRDef = MI; HighLatencyCPSR = isHighLatencyCPSR(CPSRDef); IsSelfLoop = false; } } MBBInfo &Info = BlockInfo[MBB.getNumber()]; Info.HighLatencyCPSR = HighLatencyCPSR; Info.Visited = true; return Modified; }
// This is essentially the same iterative algorithm that SSAUpdater uses, // except we already have a dominator tree, so we don't have to recompute it. void LiveRangeCalc::updateSSA() { assert(Indexes && "Missing SlotIndexes"); assert(DomTree && "Missing dominator tree"); // Interate until convergence. bool Changed; do { Changed = false; // Propagate live-out values down the dominator tree, inserting phi-defs // when necessary. for (LiveInBlock &I : LiveIn) { MachineDomTreeNode *Node = I.DomNode; // Skip block if the live-in value has already been determined. if (!Node) continue; MachineBasicBlock *MBB = Node->getBlock(); MachineDomTreeNode *IDom = Node->getIDom(); LiveOutPair IDomValue; // We need a live-in value to a block with no immediate dominator? // This is probably an unreachable block that has survived somehow. bool needPHI = !IDom || !Seen.test(IDom->getBlock()->getNumber()); // IDom dominates all of our predecessors, but it may not be their // immediate dominator. Check if any of them have live-out values that are // properly dominated by IDom. If so, we need a phi-def here. if (!needPHI) { IDomValue = Map[IDom->getBlock()]; // Cache the DomTree node that defined the value. if (IDomValue.first && IDomValue.first != &UndefVNI && !IDomValue.second) { Map[IDom->getBlock()].second = IDomValue.second = DomTree->getNode(Indexes->getMBBFromIndex(IDomValue.first->def)); } for (MachineBasicBlock *Pred : MBB->predecessors()) { LiveOutPair &Value = Map[Pred]; if (!Value.first || Value.first == IDomValue.first) continue; if (Value.first == &UndefVNI) { needPHI = true; break; } // Cache the DomTree node that defined the value. if (!Value.second) Value.second = DomTree->getNode(Indexes->getMBBFromIndex(Value.first->def)); // This predecessor is carrying something other than IDomValue. // It could be because IDomValue hasn't propagated yet, or it could be // because MBB is in the dominance frontier of that value. if (DomTree->dominates(IDom, Value.second)) { needPHI = true; break; } } } // The value may be live-through even if Kill is set, as can happen when // we are called from extendRange. In that case LiveOutSeen is true, and // LiveOut indicates a foreign or missing value. LiveOutPair &LOP = Map[MBB]; // Create a phi-def if required. if (needPHI) { Changed = true; assert(Alloc && "Need VNInfo allocator to create PHI-defs"); SlotIndex Start, End; std::tie(Start, End) = Indexes->getMBBRange(MBB); LiveRange &LR = I.LR; VNInfo *VNI = LR.getNextValue(Start, *Alloc); I.Value = VNI; // This block is done, we know the final value. I.DomNode = nullptr; // Add liveness since updateFromLiveIns now skips this node. if (I.Kill.isValid()) { if (VNI) LR.addSegment(LiveInterval::Segment(Start, I.Kill, VNI)); } else { if (VNI) LR.addSegment(LiveInterval::Segment(Start, End, VNI)); LOP = LiveOutPair(VNI, Node); } } else if (IDomValue.first && IDomValue.first != &UndefVNI) { // No phi-def here. Remember incoming value. I.Value = IDomValue.first; // If the IDomValue is killed in the block, don't propagate through. if (I.Kill.isValid()) continue; // Propagate IDomValue if it isn't killed: // MBB is live-out and doesn't define its own value. if (LOP.first == IDomValue.first) continue; Changed = true; LOP = IDomValue; } } } while (Changed); }
bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB, SlotIndex Use, unsigned PhysReg, ArrayRef<SlotIndex> Undefs) { unsigned UseMBBNum = UseMBB.getNumber(); // Block numbers where LR should be live-in. SmallVector<unsigned, 16> WorkList(1, UseMBBNum); // Remember if we have seen more than one value. bool UniqueVNI = true; VNInfo *TheVNI = nullptr; bool FoundUndef = false; // Using Seen as a visited set, perform a BFS for all reaching defs. for (unsigned i = 0; i != WorkList.size(); ++i) { MachineBasicBlock *MBB = MF->getBlockNumbered(WorkList[i]); #ifndef NDEBUG if (MBB->pred_empty()) { MBB->getParent()->verify(); errs() << "Use of " << printReg(PhysReg) << " does not have a corresponding definition on every path:\n"; const MachineInstr *MI = Indexes->getInstructionFromIndex(Use); if (MI != nullptr) errs() << Use << " " << *MI; report_fatal_error("Use not jointly dominated by defs."); } if (TargetRegisterInfo::isPhysicalRegister(PhysReg) && !MBB->isLiveIn(PhysReg)) { MBB->getParent()->verify(); const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo(); errs() << "The register " << printReg(PhysReg, TRI) << " needs to be live in to " << printMBBReference(*MBB) << ", but is missing from the live-in list.\n"; report_fatal_error("Invalid global physical register"); } #endif FoundUndef |= MBB->pred_empty(); for (MachineBasicBlock *Pred : MBB->predecessors()) { // Is this a known live-out block? if (Seen.test(Pred->getNumber())) { if (VNInfo *VNI = Map[Pred].first) { if (TheVNI && TheVNI != VNI) UniqueVNI = false; TheVNI = VNI; } continue; } SlotIndex Start, End; std::tie(Start, End) = Indexes->getMBBRange(Pred); // First time we see Pred. Try to determine the live-out value, but set // it as null if Pred is live-through with an unknown value. auto EP = LR.extendInBlock(Undefs, Start, End); VNInfo *VNI = EP.first; FoundUndef |= EP.second; setLiveOutValue(Pred, EP.second ? &UndefVNI : VNI); if (VNI) { if (TheVNI && TheVNI != VNI) UniqueVNI = false; TheVNI = VNI; } if (VNI || EP.second) continue; // No, we need a live-in value for Pred as well if (Pred != &UseMBB) WorkList.push_back(Pred->getNumber()); else // Loopback to UseMBB, so value is really live through. Use = SlotIndex(); } } LiveIn.clear(); FoundUndef |= (TheVNI == nullptr || TheVNI == &UndefVNI); if (!Undefs.empty() && FoundUndef) UniqueVNI = false; // Both updateSSA() and LiveRangeUpdater benefit from ordered blocks, but // neither require it. Skip the sorting overhead for small updates. if (WorkList.size() > 4) array_pod_sort(WorkList.begin(), WorkList.end()); // If a unique reaching def was found, blit in the live ranges immediately. if (UniqueVNI) { assert(TheVNI != nullptr && TheVNI != &UndefVNI); LiveRangeUpdater Updater(&LR); for (unsigned BN : WorkList) { SlotIndex Start, End; std::tie(Start, End) = Indexes->getMBBRange(BN); // Trim the live range in UseMBB. if (BN == UseMBBNum && Use.isValid()) End = Use; else Map[MF->getBlockNumbered(BN)] = LiveOutPair(TheVNI, nullptr); Updater.add(Start, End, TheVNI); } return true; } // Prepare the defined/undefined bit vectors. EntryInfoMap::iterator Entry; bool DidInsert; std::tie(Entry, DidInsert) = EntryInfos.insert( std::make_pair(&LR, std::make_pair(BitVector(), BitVector()))); if (DidInsert) { // Initialize newly inserted entries. unsigned N = MF->getNumBlockIDs(); Entry->second.first.resize(N); Entry->second.second.resize(N); } BitVector &DefOnEntry = Entry->second.first; BitVector &UndefOnEntry = Entry->second.second; // Multiple values were found, so transfer the work list to the LiveIn array // where UpdateSSA will use it as a work list. LiveIn.reserve(WorkList.size()); for (unsigned BN : WorkList) { MachineBasicBlock *MBB = MF->getBlockNumbered(BN); if (!Undefs.empty() && !isDefOnEntry(LR, Undefs, *MBB, DefOnEntry, UndefOnEntry)) continue; addLiveInBlock(LR, DomTree->getNode(MBB)); if (MBB == &UseMBB) LiveIn.back().Kill = Use; } return false; }
bool LiveRangeCalc::isDefOnEntry(LiveRange &LR, ArrayRef<SlotIndex> Undefs, MachineBasicBlock &MBB, BitVector &DefOnEntry, BitVector &UndefOnEntry) { unsigned BN = MBB.getNumber(); if (DefOnEntry[BN]) return true; if (UndefOnEntry[BN]) return false; auto MarkDefined = [BN, &DefOnEntry](MachineBasicBlock &B) -> bool { for (MachineBasicBlock *S : B.successors()) DefOnEntry[S->getNumber()] = true; DefOnEntry[BN] = true; return true; }; SetVector<unsigned> WorkList; // Checking if the entry of MBB is reached by some def: add all predecessors // that are potentially defined-on-exit to the work list. for (MachineBasicBlock *P : MBB.predecessors()) WorkList.insert(P->getNumber()); for (unsigned i = 0; i != WorkList.size(); ++i) { // Determine if the exit from the block is reached by some def. unsigned N = WorkList[i]; MachineBasicBlock &B = *MF->getBlockNumbered(N); if (Seen[N]) { const LiveOutPair &LOB = Map[&B]; if (LOB.first != nullptr && LOB.first != &UndefVNI) return MarkDefined(B); } SlotIndex Begin, End; std::tie(Begin, End) = Indexes->getMBBRange(&B); // Treat End as not belonging to B. // If LR has a segment S that starts at the next block, i.e. [End, ...), // std::upper_bound will return the segment following S. Instead, // S should be treated as the first segment that does not overlap B. LiveRange::iterator UB = std::upper_bound(LR.begin(), LR.end(), End.getPrevSlot()); if (UB != LR.begin()) { LiveRange::Segment &Seg = *std::prev(UB); if (Seg.end > Begin) { // There is a segment that overlaps B. If the range is not explicitly // undefined between the end of the segment and the end of the block, // treat the block as defined on exit. If it is, go to the next block // on the work list. if (LR.isUndefIn(Undefs, Seg.end, End)) continue; return MarkDefined(B); } } // No segment overlaps with this block. If this block is not defined on // entry, or it undefines the range, do not process its predecessors. if (UndefOnEntry[N] || LR.isUndefIn(Undefs, Begin, End)) { UndefOnEntry[N] = true; continue; } if (DefOnEntry[N]) return MarkDefined(B); // Still don't know: add all predecessors to the work list. for (MachineBasicBlock *P : B.predecessors()) WorkList.insert(P->getNumber()); } UndefOnEntry[BN] = true; return false; }
void HexagonExpandCondsets::updateDeadsInRange(unsigned Reg, LaneBitmask LM, LiveRange &Range) { assert(TargetRegisterInfo::isVirtualRegister(Reg)); if (Range.empty()) return; // Return two booleans: { def-modifes-reg, def-covers-reg }. auto IsRegDef = [this,Reg,LM] (MachineOperand &Op) -> std::pair<bool,bool> { if (!Op.isReg() || !Op.isDef()) return { false, false }; unsigned DR = Op.getReg(), DSR = Op.getSubReg(); if (!TargetRegisterInfo::isVirtualRegister(DR) || DR != Reg) return { false, false }; LaneBitmask SLM = getLaneMask(DR, DSR); LaneBitmask A = SLM & LM; return { A.any(), A == SLM }; }; // The splitting step will create pairs of predicated definitions without // any implicit uses (since implicit uses would interfere with predication). // This can cause the reaching defs to become dead after live range // recomputation, even though they are not really dead. // We need to identify predicated defs that need implicit uses, and // dead defs that are not really dead, and correct both problems. auto Dominate = [this] (SetVector<MachineBasicBlock*> &Defs, MachineBasicBlock *Dest) -> bool { for (MachineBasicBlock *D : Defs) if (D != Dest && MDT->dominates(D, Dest)) return true; MachineBasicBlock *Entry = &Dest->getParent()->front(); SetVector<MachineBasicBlock*> Work(Dest->pred_begin(), Dest->pred_end()); for (unsigned i = 0; i < Work.size(); ++i) { MachineBasicBlock *B = Work[i]; if (Defs.count(B)) continue; if (B == Entry) return false; for (auto *P : B->predecessors()) Work.insert(P); } return true; }; // First, try to extend live range within individual basic blocks. This // will leave us only with dead defs that do not reach any predicated // defs in the same block. SetVector<MachineBasicBlock*> Defs; SmallVector<SlotIndex,4> PredDefs; for (auto &Seg : Range) { if (!Seg.start.isRegister()) continue; MachineInstr *DefI = LIS->getInstructionFromIndex(Seg.start); Defs.insert(DefI->getParent()); if (HII->isPredicated(*DefI)) PredDefs.push_back(Seg.start); } SmallVector<SlotIndex,8> Undefs; LiveInterval &LI = LIS->getInterval(Reg); LI.computeSubRangeUndefs(Undefs, LM, *MRI, *LIS->getSlotIndexes()); for (auto &SI : PredDefs) { MachineBasicBlock *BB = LIS->getMBBFromIndex(SI); auto P = Range.extendInBlock(Undefs, LIS->getMBBStartIdx(BB), SI); if (P.first != nullptr || P.second) SI = SlotIndex(); } // Calculate reachability for those predicated defs that were not handled // by the in-block extension. SmallVector<SlotIndex,4> ExtTo; for (auto &SI : PredDefs) { if (!SI.isValid()) continue; MachineBasicBlock *BB = LIS->getMBBFromIndex(SI); if (BB->pred_empty()) continue; // If the defs from this range reach SI via all predecessors, it is live. // It can happen that SI is reached by the defs through some paths, but // not all. In the IR coming into this optimization, SI would not be // considered live, since the defs would then not jointly dominate SI. // That means that SI is an overwriting def, and no implicit use is // needed at this point. Do not add SI to the extension points, since // extendToIndices will abort if there is no joint dominance. // If the abort was avoided by adding extra undefs added to Undefs, // extendToIndices could actually indicate that SI is live, contrary // to the original IR. if (Dominate(Defs, BB)) ExtTo.push_back(SI); } if (!ExtTo.empty()) LIS->extendToIndices(Range, ExtTo, Undefs); // Remove <dead> flags from all defs that are not dead after live range // extension, and collect all def operands. They will be used to generate // the necessary implicit uses. // At the same time, add <dead> flag to all defs that are actually dead. // This can happen, for example, when a mux with identical inputs is // replaced with a COPY: the use of the predicate register disappears and // the dead can become dead. std::set<RegisterRef> DefRegs; for (auto &Seg : Range) { if (!Seg.start.isRegister()) continue; MachineInstr *DefI = LIS->getInstructionFromIndex(Seg.start); for (auto &Op : DefI->operands()) { auto P = IsRegDef(Op); if (P.second && Seg.end.isDead()) { Op.setIsDead(true); } else if (P.first) { DefRegs.insert(Op); Op.setIsDead(false); } } } // Now, add implicit uses to each predicated def that is reached // by other defs. for (auto &Seg : Range) { if (!Seg.start.isRegister() || !Range.liveAt(Seg.start.getPrevSlot())) continue; MachineInstr *DefI = LIS->getInstructionFromIndex(Seg.start); if (!HII->isPredicated(*DefI)) continue; // Construct the set of all necessary implicit uses, based on the def // operands in the instruction. std::set<RegisterRef> ImpUses; for (auto &Op : DefI->operands()) if (Op.isReg() && Op.isDef() && DefRegs.count(Op)) ImpUses.insert(Op); if (ImpUses.empty()) continue; MachineFunction &MF = *DefI->getParent()->getParent(); for (RegisterRef R : ImpUses) MachineInstrBuilder(MF, DefI).addReg(R.Reg, RegState::Implicit, R.Sub); } }
void MachineDominatorTree::applySplitCriticalEdges() const { // Bail out early if there is nothing to do. if (CriticalEdgesToSplit.empty()) return; // For each element in CriticalEdgesToSplit, remember whether or not element // is the new immediate domminator of its successor. The mapping is done by // index, i.e., the information for the ith element of CriticalEdgesToSplit is // the ith element of IsNewIDom. SmallBitVector IsNewIDom(CriticalEdgesToSplit.size(), true); size_t Idx = 0; // Collect all the dominance properties info, before invalidating // the underlying DT. for (CriticalEdge &Edge : CriticalEdgesToSplit) { // Update dominator information. MachineBasicBlock *Succ = Edge.ToBB; MachineDomTreeNode *SuccDTNode = DT->getNode(Succ); for (MachineBasicBlock *PredBB : Succ->predecessors()) { if (PredBB == Edge.NewBB) continue; // If we are in this situation: // FromBB1 FromBB2 // + + // + + + + // + + + + // ... Split1 Split2 ... // + + // + + // + // Succ // Instead of checking the domiance property with Split2, we check it with // FromBB2 since Split2 is still unknown of the underlying DT structure. if (NewBBs.count(PredBB)) { assert(PredBB->pred_size() == 1 && "A basic block resulting from a " "critical edge split has more " "than one predecessor!"); PredBB = *PredBB->pred_begin(); } if (!DT->dominates(SuccDTNode, DT->getNode(PredBB))) { IsNewIDom[Idx] = false; break; } } ++Idx; } // Now, update DT with the collected dominance properties info. Idx = 0; for (CriticalEdge &Edge : CriticalEdgesToSplit) { // We know FromBB dominates NewBB. MachineDomTreeNode *NewDTNode = DT->addNewBlock(Edge.NewBB, Edge.FromBB); // If all the other predecessors of "Succ" are dominated by "Succ" itself // then the new block is the new immediate dominator of "Succ". Otherwise, // the new block doesn't dominate anything. if (IsNewIDom[Idx]) DT->changeImmediateDominator(DT->getNode(Edge.ToBB), NewDTNode); ++Idx; } NewBBs.clear(); CriticalEdgesToSplit.clear(); }