/// TailDuplicate - If it is profitable, duplicate TailBB's contents in each /// of its predecessors. bool TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, bool IsSimple, MachineFunction &MF, SmallVector<MachineBasicBlock*, 8> &TDBBs, SmallVector<MachineInstr*, 16> &Copies) { DEBUG(dbgs() << "\n*** Tail-duplicating BB#" << TailBB->getNumber() << '\n'); DenseSet<unsigned> UsedByPhi; getRegsUsedByPHIs(*TailBB, &UsedByPhi); if (IsSimple) return duplicateSimpleBB(TailBB, TDBBs, UsedByPhi, Copies); // Iterate through all the unique predecessors and tail-duplicate this // block into them, if possible. Copying the list ahead of time also // avoids trouble with the predecessor list reallocating. bool Changed = false; SmallSetVector<MachineBasicBlock*, 8> Preds(TailBB->pred_begin(), TailBB->pred_end()); for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(), PE = Preds.end(); PI != PE; ++PI) { MachineBasicBlock *PredBB = *PI; assert(TailBB != PredBB && "Single-block loop should have been rejected earlier!"); // EH edges are ignored by AnalyzeBranch. if (PredBB->succ_size() > 1) continue; MachineBasicBlock *PredTBB, *PredFBB; SmallVector<MachineOperand, 4> PredCond; if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true)) continue; if (!PredCond.empty()) continue; // Don't duplicate into a fall-through predecessor (at least for now). if (PredBB->isLayoutSuccessor(TailBB) && PredBB->canFallThrough()) continue; DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB << "From Succ: " << *TailBB); TDBBs.push_back(PredBB); // Remove PredBB's unconditional branch. TII->RemoveBranch(*PredBB); if (RS && !TailBB->livein_empty()) { // Update PredBB livein. RS->enterBasicBlock(PredBB); if (!PredBB->empty()) RS->forward(prior(PredBB->end())); BitVector RegsLiveAtExit(TRI->getNumRegs()); RS->getRegsUsed(RegsLiveAtExit, false); for (MachineBasicBlock::livein_iterator I = TailBB->livein_begin(), E = TailBB->livein_end(); I != E; ++I) { if (!RegsLiveAtExit[*I]) // If a register is previously livein to the tail but it's not live // at the end of predecessor BB, then it should be added to its // livein list. PredBB->addLiveIn(*I); } } // Clone the contents of TailBB into PredBB. DenseMap<unsigned, unsigned> LocalVRMap; SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos; // Use instr_iterator here to properly handle bundles, e.g. // ARM Thumb2 IT block. MachineBasicBlock::instr_iterator I = TailBB->instr_begin(); while (I != TailBB->instr_end()) { MachineInstr *MI = &*I; ++I; if (MI->isPHI()) { // Replace the uses of the def of the PHI with the register coming // from PredBB. ProcessPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, true); } else { // Replace def of virtual registers with new registers, and update // uses with PHI source register or the new registers. DuplicateInstruction(MI, TailBB, PredBB, MF, LocalVRMap, UsedByPhi); } } MachineBasicBlock::iterator Loc = PredBB->getFirstTerminator(); for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) { Copies.push_back(BuildMI(*PredBB, Loc, DebugLoc(), TII->get(TargetOpcode::COPY), CopyInfos[i].first).addReg(CopyInfos[i].second)); } // Simplify TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true); NumInstrDups += TailBB->size() - 1; // subtract one for removed branch // Update the CFG. PredBB->removeSuccessor(PredBB->succ_begin()); assert(PredBB->succ_empty() && "TailDuplicate called on block with multiple successors!"); for (MachineBasicBlock::succ_iterator I = TailBB->succ_begin(), E = TailBB->succ_end(); I != E; ++I) PredBB->addSuccessor(*I); Changed = true; ++NumTailDups; } // If TailBB was duplicated into all its predecessors except for the prior // block, which falls through unconditionally, move the contents of this // block into the prior block. MachineBasicBlock *PrevBB = prior(MachineFunction::iterator(TailBB)); MachineBasicBlock *PriorTBB = 0, *PriorFBB = 0; SmallVector<MachineOperand, 4> PriorCond; // This has to check PrevBB->succ_size() because EH edges are ignored by // AnalyzeBranch. if (PrevBB->succ_size() == 1 && !TII->AnalyzeBranch(*PrevBB, PriorTBB, PriorFBB, PriorCond, true) && PriorCond.empty() && !PriorTBB && TailBB->pred_size() == 1 && !TailBB->hasAddressTaken()) { DEBUG(dbgs() << "\nMerging into block: " << *PrevBB << "From MBB: " << *TailBB); if (PreRegAlloc) { DenseMap<unsigned, unsigned> LocalVRMap; SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos; MachineBasicBlock::iterator I = TailBB->begin(); // Process PHI instructions first. while (I != TailBB->end() && I->isPHI()) { // Replace the uses of the def of the PHI with the register coming // from PredBB. MachineInstr *MI = &*I++; ProcessPHI(MI, TailBB, PrevBB, LocalVRMap, CopyInfos, UsedByPhi, true); if (MI->getParent()) MI->eraseFromParent(); } // Now copy the non-PHI instructions. while (I != TailBB->end()) { // Replace def of virtual registers with new registers, and update // uses with PHI source register or the new registers. MachineInstr *MI = &*I++; assert(!MI->isBundle() && "Not expecting bundles before regalloc!"); DuplicateInstruction(MI, TailBB, PrevBB, MF, LocalVRMap, UsedByPhi); MI->eraseFromParent(); } MachineBasicBlock::iterator Loc = PrevBB->getFirstTerminator(); for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) { Copies.push_back(BuildMI(*PrevBB, Loc, DebugLoc(), TII->get(TargetOpcode::COPY), CopyInfos[i].first) .addReg(CopyInfos[i].second)); } } else { // No PHIs to worry about, just splice the instructions over. PrevBB->splice(PrevBB->end(), TailBB, TailBB->begin(), TailBB->end()); } PrevBB->removeSuccessor(PrevBB->succ_begin()); assert(PrevBB->succ_empty()); PrevBB->transferSuccessors(TailBB); TDBBs.push_back(PrevBB); Changed = true; } // If this is after register allocation, there are no phis to fix. if (!PreRegAlloc) return Changed; // If we made no changes so far, we are safe. if (!Changed) return Changed; // Handle the nasty case in that we duplicated a block that is part of a loop // into some but not all of its predecessors. For example: // 1 -> 2 <-> 3 | // \ | // \---> rest | // if we duplicate 2 into 1 but not into 3, we end up with // 12 -> 3 <-> 2 -> rest | // \ / | // \----->-----/ | // If there was a "var = phi(1, 3)" in 2, it has to be ultimately replaced // with a phi in 3 (which now dominates 2). // What we do here is introduce a copy in 3 of the register defined by the // phi, just like when we are duplicating 2 into 3, but we don't copy any // real instructions or remove the 3 -> 2 edge from the phi in 2. for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(), PE = Preds.end(); PI != PE; ++PI) { MachineBasicBlock *PredBB = *PI; if (std::find(TDBBs.begin(), TDBBs.end(), PredBB) != TDBBs.end()) continue; // EH edges if (PredBB->succ_size() != 1) continue; DenseMap<unsigned, unsigned> LocalVRMap; SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos; MachineBasicBlock::iterator I = TailBB->begin(); // Process PHI instructions first. while (I != TailBB->end() && I->isPHI()) { // Replace the uses of the def of the PHI with the register coming // from PredBB. MachineInstr *MI = &*I++; ProcessPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, false); } MachineBasicBlock::iterator Loc = PredBB->getFirstTerminator(); for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) { Copies.push_back(BuildMI(*PredBB, Loc, DebugLoc(), TII->get(TargetOpcode::COPY), CopyInfos[i].first).addReg(CopyInfos[i].second)); } } return Changed; }
/// TailDuplicate - If it is profitable, duplicate TailBB's contents in each /// of its predecessors. bool TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF, SmallVector<MachineBasicBlock*, 8> &TDBBs, SmallVector<MachineInstr*, 16> &Copies) { // Set the limit on the number of instructions to duplicate, with a default // of one less than the tail-merge threshold. When optimizing for size, // duplicate only one, because one branch instruction can be eliminated to // compensate for the duplication. unsigned MaxDuplicateCount; if (TailDuplicateSize.getNumOccurrences() == 0 && MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize)) MaxDuplicateCount = 1; else MaxDuplicateCount = TailDuplicateSize; if (PreRegAlloc) { if (TailBB->empty()) return false; const TargetInstrDesc &TID = TailBB->back().getDesc(); // Pre-regalloc tail duplication hurts compile time and doesn't help // much except for indirect branches and returns. if (!TID.isIndirectBranch() && !TID.isReturn()) return false; // If the target has hardware branch prediction that can handle indirect // branches, duplicating them can often make them predictable when there // are common paths through the code. The limit needs to be high enough // to allow undoing the effects of tail merging and other optimizations // that rearrange the predecessors of the indirect branch. MaxDuplicateCount = 20; } // Don't try to tail-duplicate single-block loops. if (TailBB->isSuccessor(TailBB)) return false; // Check the instructions in the block to determine whether tail-duplication // is invalid or unlikely to be profitable. unsigned InstrCount = 0; bool HasCall = false; for (MachineBasicBlock::iterator I = TailBB->begin(); I != TailBB->end(); ++I) { // Non-duplicable things shouldn't be tail-duplicated. if (I->getDesc().isNotDuplicable()) return false; // Do not duplicate 'return' instructions if this is a pre-regalloc run. // A return may expand into a lot more instructions (e.g. reload of callee // saved registers) after PEI. if (PreRegAlloc && I->getDesc().isReturn()) return false; // Don't duplicate more than the threshold. if (InstrCount == MaxDuplicateCount) return false; // Remember if we saw a call. if (I->getDesc().isCall()) HasCall = true; if (!I->isPHI() && !I->isDebugValue()) InstrCount += 1; } // Don't tail-duplicate calls before register allocation. Calls presents a // barrier to register allocation so duplicating them may end up increasing // spills. if (InstrCount > 1 && (PreRegAlloc && HasCall)) return false; DEBUG(dbgs() << "\n*** Tail-duplicating BB#" << TailBB->getNumber() << '\n'); // Iterate through all the unique predecessors and tail-duplicate this // block into them, if possible. Copying the list ahead of time also // avoids trouble with the predecessor list reallocating. bool Changed = false; SmallSetVector<MachineBasicBlock*, 8> Preds(TailBB->pred_begin(), TailBB->pred_end()); for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(), PE = Preds.end(); PI != PE; ++PI) { MachineBasicBlock *PredBB = *PI; assert(TailBB != PredBB && "Single-block loop should have been rejected earlier!"); if (PredBB->succ_size() > 1) continue; MachineBasicBlock *PredTBB, *PredFBB; SmallVector<MachineOperand, 4> PredCond; if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true)) continue; if (!PredCond.empty()) continue; // EH edges are ignored by AnalyzeBranch. if (PredBB->succ_size() != 1) continue; // Don't duplicate into a fall-through predecessor (at least for now). if (PredBB->isLayoutSuccessor(TailBB) && PredBB->canFallThrough()) continue; DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB << "From Succ: " << *TailBB); TDBBs.push_back(PredBB); // Remove PredBB's unconditional branch. TII->RemoveBranch(*PredBB); // Clone the contents of TailBB into PredBB. DenseMap<unsigned, unsigned> LocalVRMap; SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos; MachineBasicBlock::iterator I = TailBB->begin(); while (I != TailBB->end()) { MachineInstr *MI = &*I; ++I; if (MI->isPHI()) { // Replace the uses of the def of the PHI with the register coming // from PredBB. ProcessPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos); } else { // Replace def of virtual registers with new registers, and update // uses with PHI source register or the new registers. DuplicateInstruction(MI, TailBB, PredBB, MF, LocalVRMap); } } MachineBasicBlock::iterator Loc = PredBB->getFirstTerminator(); for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) { Copies.push_back(BuildMI(*PredBB, Loc, DebugLoc(), TII->get(TargetOpcode::COPY), CopyInfos[i].first).addReg(CopyInfos[i].second)); } NumInstrDups += TailBB->size() - 1; // subtract one for removed branch // Update the CFG. PredBB->removeSuccessor(PredBB->succ_begin()); assert(PredBB->succ_empty() && "TailDuplicate called on block with multiple successors!"); for (MachineBasicBlock::succ_iterator I = TailBB->succ_begin(), E = TailBB->succ_end(); I != E; ++I) PredBB->addSuccessor(*I); Changed = true; ++NumTailDups; } // If TailBB was duplicated into all its predecessors except for the prior // block, which falls through unconditionally, move the contents of this // block into the prior block. MachineBasicBlock *PrevBB = prior(MachineFunction::iterator(TailBB)); MachineBasicBlock *PriorTBB = 0, *PriorFBB = 0; SmallVector<MachineOperand, 4> PriorCond; bool PriorUnAnalyzable = TII->AnalyzeBranch(*PrevBB, PriorTBB, PriorFBB, PriorCond, true); // This has to check PrevBB->succ_size() because EH edges are ignored by // AnalyzeBranch. if (!PriorUnAnalyzable && PriorCond.empty() && !PriorTBB && TailBB->pred_size() == 1 && PrevBB->succ_size() == 1 && !TailBB->hasAddressTaken()) { DEBUG(dbgs() << "\nMerging into block: " << *PrevBB << "From MBB: " << *TailBB); if (PreRegAlloc) { DenseMap<unsigned, unsigned> LocalVRMap; SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos; MachineBasicBlock::iterator I = TailBB->begin(); // Process PHI instructions first. while (I != TailBB->end() && I->isPHI()) { // Replace the uses of the def of the PHI with the register coming // from PredBB. MachineInstr *MI = &*I++; ProcessPHI(MI, TailBB, PrevBB, LocalVRMap, CopyInfos); if (MI->getParent()) MI->eraseFromParent(); } // Now copy the non-PHI instructions. while (I != TailBB->end()) { // Replace def of virtual registers with new registers, and update // uses with PHI source register or the new registers. MachineInstr *MI = &*I++; DuplicateInstruction(MI, TailBB, PrevBB, MF, LocalVRMap); MI->eraseFromParent(); } MachineBasicBlock::iterator Loc = PrevBB->getFirstTerminator(); for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) { Copies.push_back(BuildMI(*PrevBB, Loc, DebugLoc(), TII->get(TargetOpcode::COPY), CopyInfos[i].first) .addReg(CopyInfos[i].second)); } } else { // No PHIs to worry about, just splice the instructions over. PrevBB->splice(PrevBB->end(), TailBB, TailBB->begin(), TailBB->end()); } PrevBB->removeSuccessor(PrevBB->succ_begin()); assert(PrevBB->succ_empty()); PrevBB->transferSuccessors(TailBB); TDBBs.push_back(PrevBB); Changed = true; } return Changed; }
bool HexagonEarlyIfConversion::matchFlowPattern(MachineBasicBlock *B, MachineLoop *L, FlowPattern &FP) { DEBUG(dbgs() << "Checking flow pattern at BB#" << B->getNumber() << "\n"); // Interested only in conditional branches, no .new, no new-value, etc. // Check the terminators directly, it's easier than handling all responses // from AnalyzeBranch. MachineBasicBlock *TB = 0, *FB = 0; MachineBasicBlock::const_iterator T1I = B->getFirstTerminator(); if (T1I == B->end()) return false; unsigned Opc = T1I->getOpcode(); if (Opc != Hexagon::J2_jumpt && Opc != Hexagon::J2_jumpf) return false; unsigned PredR = T1I->getOperand(0).getReg(); // Get the layout successor, or 0 if B does not have one. MachineFunction::iterator NextBI = std::next(MachineFunction::iterator(B)); MachineBasicBlock *NextB = (NextBI != MFN->end()) ? &*NextBI : 0; MachineBasicBlock *T1B = T1I->getOperand(1).getMBB(); MachineBasicBlock::const_iterator T2I = std::next(T1I); // The second terminator should be an unconditional branch. assert(T2I == B->end() || T2I->getOpcode() == Hexagon::J2_jump); MachineBasicBlock *T2B = (T2I == B->end()) ? NextB : T2I->getOperand(0).getMBB(); if (T1B == T2B) { // XXX merge if T1B == NextB, or convert branch to unconditional. // mark as diamond with both sides equal? return false; } // Loop could be null for both. if (MLI->getLoopFor(T1B) != L || MLI->getLoopFor(T2B) != L) return false; // Record the true/false blocks in such a way that "true" means "if (PredR)", // and "false" means "if (!PredR)". if (Opc == Hexagon::J2_jumpt) TB = T1B, FB = T2B; else TB = T2B, FB = T1B; if (!MDT->properlyDominates(B, TB) || !MDT->properlyDominates(B, FB)) return false; // Detect triangle first. In case of a triangle, one of the blocks TB/FB // can fall through into the other, in other words, it will be executed // in both cases. We only want to predicate the block that is executed // conditionally. unsigned TNP = TB->pred_size(), FNP = FB->pred_size(); unsigned TNS = TB->succ_size(), FNS = FB->succ_size(); // A block is predicable if it has one predecessor (it must be B), and // it has a single successor. In fact, the block has to end either with // an unconditional branch (which can be predicated), or with a fall- // through. bool TOk = (TNP == 1) && (TNS == 1); bool FOk = (FNP == 1) && (FNS == 1); // If neither is predicable, there is nothing interesting. if (!TOk && !FOk) return false; MachineBasicBlock *TSB = (TNS > 0) ? *TB->succ_begin() : 0; MachineBasicBlock *FSB = (FNS > 0) ? *FB->succ_begin() : 0; MachineBasicBlock *JB = 0; if (TOk) { if (FOk) { if (TSB == FSB) JB = TSB; // Diamond: "if (P) then TB; else FB;". } else { // TOk && !FOk if (TSB == FB) { JB = FB; FB = 0; } } } else { // !TOk && FOk (at least one must be true by now). if (FSB == TB) { JB = TB; TB = 0; } } // Don't try to predicate loop preheaders. if ((TB && isPreheader(TB)) || (FB && isPreheader(FB))) { DEBUG(dbgs() << "One of blocks " << PrintMB(TB) << ", " << PrintMB(FB) << " is a loop preheader. Skipping.\n"); return false; } FP = FlowPattern(B, PredR, TB, FB, JB); DEBUG(dbgs() << "Detected " << PrintFP(FP, *TRI) << "\n"); return true; }
bool TailDuplicatePass::duplicateSimpleBB(MachineBasicBlock *TailBB, SmallVector<MachineBasicBlock*, 8> &TDBBs, const DenseSet<unsigned> &UsedByPhi, SmallVector<MachineInstr*, 16> &Copies) { SmallPtrSet<MachineBasicBlock*, 8> Succs(TailBB->succ_begin(), TailBB->succ_end()); SmallVector<MachineBasicBlock*, 8> Preds(TailBB->pred_begin(), TailBB->pred_end()); bool Changed = false; for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(), PE = Preds.end(); PI != PE; ++PI) { MachineBasicBlock *PredBB = *PI; if (PredBB->getLandingPadSuccessor()) continue; if (bothUsedInPHI(*PredBB, Succs)) continue; MachineBasicBlock *PredTBB = NULL, *PredFBB = NULL; SmallVector<MachineOperand, 4> PredCond; if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true)) continue; Changed = true; DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB << "From simple Succ: " << *TailBB); MachineBasicBlock *NewTarget = *TailBB->succ_begin(); MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(PredBB)); // Make PredFBB explicit. if (PredCond.empty()) PredFBB = PredTBB; // Make fall through explicit. if (!PredTBB) PredTBB = NextBB; if (!PredFBB) PredFBB = NextBB; // Redirect if (PredFBB == TailBB) PredFBB = NewTarget; if (PredTBB == TailBB) PredTBB = NewTarget; // Make the branch unconditional if possible if (PredTBB == PredFBB) { PredCond.clear(); PredFBB = NULL; } // Avoid adding fall through branches. if (PredFBB == NextBB) PredFBB = NULL; if (PredTBB == NextBB && PredFBB == NULL) PredTBB = NULL; TII->RemoveBranch(*PredBB); if (PredTBB) TII->InsertBranch(*PredBB, PredTBB, PredFBB, PredCond, DebugLoc()); PredBB->removeSuccessor(TailBB); unsigned NumSuccessors = PredBB->succ_size(); assert(NumSuccessors <= 1); if (NumSuccessors == 0 || *PredBB->succ_begin() != NewTarget) PredBB->addSuccessor(NewTarget); TDBBs.push_back(PredBB); } return Changed; }
/// Walk the specified loop in the CFG (defined by all blocks dominated by the /// specified header block, and that are in the current loop) in depth first /// order w.r.t the DominatorTree. This allows us to visit definitions before /// uses, allowing us to hoist a loop body in one pass without iteration. /// void MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) { MachineBasicBlock *Preheader = getCurPreheader(); if (!Preheader) return; SmallVector<MachineDomTreeNode*, 32> Scopes; SmallVector<MachineDomTreeNode*, 8> WorkList; DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> ParentMap; DenseMap<MachineDomTreeNode*, unsigned> OpenChildren; // Perform a DFS walk to determine the order of visit. WorkList.push_back(HeaderN); while (!WorkList.empty()) { MachineDomTreeNode *Node = WorkList.pop_back_val(); assert(Node && "Null dominator tree node?"); MachineBasicBlock *BB = Node->getBlock(); // If the header of the loop containing this basic block is a landing pad, // then don't try to hoist instructions out of this loop. const MachineLoop *ML = MLI->getLoopFor(BB); if (ML && ML->getHeader()->isEHPad()) continue; // If this subregion is not in the top level loop at all, exit. if (!CurLoop->contains(BB)) continue; Scopes.push_back(Node); const std::vector<MachineDomTreeNode*> &Children = Node->getChildren(); unsigned NumChildren = Children.size(); // Don't hoist things out of a large switch statement. This often causes // code to be hoisted that wasn't going to be executed, and increases // register pressure in a situation where it's likely to matter. if (BB->succ_size() >= 25) NumChildren = 0; OpenChildren[Node] = NumChildren; // Add children in reverse order as then the next popped worklist node is // the first child of this node. This means we ultimately traverse the // DOM tree in exactly the same order as if we'd recursed. for (int i = (int)NumChildren-1; i >= 0; --i) { MachineDomTreeNode *Child = Children[i]; ParentMap[Child] = Node; WorkList.push_back(Child); } } if (Scopes.size() == 0) return; // Compute registers which are livein into the loop headers. RegSeen.clear(); BackTrace.clear(); InitRegPressure(Preheader); // Now perform LICM. for (unsigned i = 0, e = Scopes.size(); i != e; ++i) { MachineDomTreeNode *Node = Scopes[i]; MachineBasicBlock *MBB = Node->getBlock(); EnterScope(MBB); // Process the block SpeculationState = SpeculateUnknown; for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end(); MII != E; ) { MachineBasicBlock::iterator NextMII = MII; ++NextMII; MachineInstr *MI = &*MII; if (!Hoist(MI, Preheader)) UpdateRegPressure(MI); MII = NextMII; } // If it's a leaf node, it's done. Traverse upwards to pop ancestors. ExitScopeIfDone(Node, OpenChildren, ParentMap); } }
/// If it is profitable, duplicate TailBB's contents in each /// of its predecessors. /// \p IsSimple result of isSimpleBB /// \p TailBB Block to be duplicated. /// \p ForcedLayoutPred When non-null, use this block as the layout predecessor /// instead of the previous block in MF's order. /// \p TDBBs A vector to keep track of all blocks tail-duplicated /// into. /// \p Copies A vector of copy instructions inserted. Used later to /// walk all the inserted copies and remove redundant ones. bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB, MachineBasicBlock *ForcedLayoutPred, SmallVectorImpl<MachineBasicBlock *> &TDBBs, SmallVectorImpl<MachineInstr *> &Copies) { LLVM_DEBUG(dbgs() << "\n*** Tail-duplicating " << printMBBReference(*TailBB) << '\n'); DenseSet<unsigned> UsedByPhi; getRegsUsedByPHIs(*TailBB, &UsedByPhi); if (IsSimple) return duplicateSimpleBB(TailBB, TDBBs, UsedByPhi, Copies); // Iterate through all the unique predecessors and tail-duplicate this // block into them, if possible. Copying the list ahead of time also // avoids trouble with the predecessor list reallocating. bool Changed = false; SmallSetVector<MachineBasicBlock *, 8> Preds(TailBB->pred_begin(), TailBB->pred_end()); for (MachineBasicBlock *PredBB : Preds) { assert(TailBB != PredBB && "Single-block loop should have been rejected earlier!"); if (!canTailDuplicate(TailBB, PredBB)) continue; // Don't duplicate into a fall-through predecessor (at least for now). bool IsLayoutSuccessor = false; if (ForcedLayoutPred) IsLayoutSuccessor = (ForcedLayoutPred == PredBB); else if (PredBB->isLayoutSuccessor(TailBB) && PredBB->canFallThrough()) IsLayoutSuccessor = true; if (IsLayoutSuccessor) continue; LLVM_DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB << "From Succ: " << *TailBB); TDBBs.push_back(PredBB); // Remove PredBB's unconditional branch. TII->removeBranch(*PredBB); // Clone the contents of TailBB into PredBB. DenseMap<unsigned, RegSubRegPair> LocalVRMap; SmallVector<std::pair<unsigned, RegSubRegPair>, 4> CopyInfos; for (MachineBasicBlock::iterator I = TailBB->begin(), E = TailBB->end(); I != E; /* empty */) { MachineInstr *MI = &*I; ++I; if (MI->isPHI()) { // Replace the uses of the def of the PHI with the register coming // from PredBB. processPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, true); } else { // Replace def of virtual registers with new registers, and update // uses with PHI source register or the new registers. duplicateInstruction(MI, TailBB, PredBB, LocalVRMap, UsedByPhi); } } appendCopies(PredBB, CopyInfos, Copies); // Simplify MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr; SmallVector<MachineOperand, 4> PredCond; TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond); NumTailDupAdded += TailBB->size() - 1; // subtract one for removed branch // Update the CFG. PredBB->removeSuccessor(PredBB->succ_begin()); assert(PredBB->succ_empty() && "TailDuplicate called on block with multiple successors!"); for (MachineBasicBlock *Succ : TailBB->successors()) PredBB->addSuccessor(Succ, MBPI->getEdgeProbability(TailBB, Succ)); Changed = true; ++NumTailDups; } // If TailBB was duplicated into all its predecessors except for the prior // block, which falls through unconditionally, move the contents of this // block into the prior block. MachineBasicBlock *PrevBB = ForcedLayoutPred; if (!PrevBB) PrevBB = &*std::prev(TailBB->getIterator()); MachineBasicBlock *PriorTBB = nullptr, *PriorFBB = nullptr; SmallVector<MachineOperand, 4> PriorCond; // This has to check PrevBB->succ_size() because EH edges are ignored by // analyzeBranch. if (PrevBB->succ_size() == 1 && // Layout preds are not always CFG preds. Check. *PrevBB->succ_begin() == TailBB && !TII->analyzeBranch(*PrevBB, PriorTBB, PriorFBB, PriorCond) && PriorCond.empty() && (!PriorTBB || PriorTBB == TailBB) && TailBB->pred_size() == 1 && !TailBB->hasAddressTaken()) { LLVM_DEBUG(dbgs() << "\nMerging into block: " << *PrevBB << "From MBB: " << *TailBB); // There may be a branch to the layout successor. This is unlikely but it // happens. The correct thing to do is to remove the branch before // duplicating the instructions in all cases. TII->removeBranch(*PrevBB); if (PreRegAlloc) { DenseMap<unsigned, RegSubRegPair> LocalVRMap; SmallVector<std::pair<unsigned, RegSubRegPair>, 4> CopyInfos; MachineBasicBlock::iterator I = TailBB->begin(); // Process PHI instructions first. while (I != TailBB->end() && I->isPHI()) { // Replace the uses of the def of the PHI with the register coming // from PredBB. MachineInstr *MI = &*I++; processPHI(MI, TailBB, PrevBB, LocalVRMap, CopyInfos, UsedByPhi, true); } // Now copy the non-PHI instructions. while (I != TailBB->end()) { // Replace def of virtual registers with new registers, and update // uses with PHI source register or the new registers. MachineInstr *MI = &*I++; assert(!MI->isBundle() && "Not expecting bundles before regalloc!"); duplicateInstruction(MI, TailBB, PrevBB, LocalVRMap, UsedByPhi); MI->eraseFromParent(); } appendCopies(PrevBB, CopyInfos, Copies); } else { TII->removeBranch(*PrevBB); // No PHIs to worry about, just splice the instructions over. PrevBB->splice(PrevBB->end(), TailBB, TailBB->begin(), TailBB->end()); } PrevBB->removeSuccessor(PrevBB->succ_begin()); assert(PrevBB->succ_empty()); PrevBB->transferSuccessors(TailBB); TDBBs.push_back(PrevBB); Changed = true; } // If this is after register allocation, there are no phis to fix. if (!PreRegAlloc) return Changed; // If we made no changes so far, we are safe. if (!Changed) return Changed; // Handle the nasty case in that we duplicated a block that is part of a loop // into some but not all of its predecessors. For example: // 1 -> 2 <-> 3 | // \ | // \---> rest | // if we duplicate 2 into 1 but not into 3, we end up with // 12 -> 3 <-> 2 -> rest | // \ / | // \----->-----/ | // If there was a "var = phi(1, 3)" in 2, it has to be ultimately replaced // with a phi in 3 (which now dominates 2). // What we do here is introduce a copy in 3 of the register defined by the // phi, just like when we are duplicating 2 into 3, but we don't copy any // real instructions or remove the 3 -> 2 edge from the phi in 2. for (MachineBasicBlock *PredBB : Preds) { if (is_contained(TDBBs, PredBB)) continue; // EH edges if (PredBB->succ_size() != 1) continue; DenseMap<unsigned, RegSubRegPair> LocalVRMap; SmallVector<std::pair<unsigned, RegSubRegPair>, 4> CopyInfos; MachineBasicBlock::iterator I = TailBB->begin(); // Process PHI instructions first. while (I != TailBB->end() && I->isPHI()) { // Replace the uses of the def of the PHI with the register coming // from PredBB. MachineInstr *MI = &*I++; processPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, false); } appendCopies(PredBB, CopyInfos, Copies); } return Changed; }
bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB, bool DoIt) const { MachineFunction &MF = *MBB.getParent(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); const TargetInstrInfo &TII = *STI.getInstrInfo(); const ThumbRegisterInfo *RegInfo = static_cast<const ThumbRegisterInfo *>(STI.getRegisterInfo()); // If MBBI is a return instruction, or is a tPOP followed by a return // instruction in the successor BB, we may be able to directly restore // LR in the PC. // This is only possible with v5T ops (v4T can't change the Thumb bit via // a POP PC instruction), and only if we do not need to emit any SP update. // Otherwise, we need a temporary register to pop the value // and copy that value into LR. auto MBBI = MBB.getFirstTerminator(); bool CanRestoreDirectly = STI.hasV5TOps() && !ArgRegsSaveSize; if (CanRestoreDirectly) { if (MBBI != MBB.end() && MBBI->getOpcode() != ARM::tB) CanRestoreDirectly = (MBBI->getOpcode() == ARM::tBX_RET || MBBI->getOpcode() == ARM::tPOP_RET); else { auto MBBI_prev = MBBI; MBBI_prev--; assert(MBBI_prev->getOpcode() == ARM::tPOP); assert(MBB.succ_size() == 1); if ((*MBB.succ_begin())->begin()->getOpcode() == ARM::tBX_RET) MBBI = MBBI_prev; // Replace the final tPOP with a tPOP_RET. else CanRestoreDirectly = false; } } if (CanRestoreDirectly) { if (!DoIt || MBBI->getOpcode() == ARM::tPOP_RET) return true; MachineInstrBuilder MIB = AddDefaultPred( BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII.get(ARM::tPOP_RET))); // Copy implicit ops and popped registers, if any. for (auto MO: MBBI->operands()) if (MO.isReg() && (MO.isImplicit() || MO.isDef())) MIB.addOperand(MO); MIB.addReg(ARM::PC, RegState::Define); // Erase the old instruction (tBX_RET or tPOP). MBB.erase(MBBI); return true; } // Look for a temporary register to use. // First, compute the liveness information. LivePhysRegs UsedRegs(STI.getRegisterInfo()); UsedRegs.addLiveOuts(MBB); // The semantic of pristines changed recently and now, // the callee-saved registers that are touched in the function // are not part of the pristines set anymore. // Add those callee-saved now. const TargetRegisterInfo *TRI = STI.getRegisterInfo(); const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF); for (unsigned i = 0; CSRegs[i]; ++i) UsedRegs.addReg(CSRegs[i]); DebugLoc dl = DebugLoc(); if (MBBI != MBB.end()) { dl = MBBI->getDebugLoc(); auto InstUpToMBBI = MBB.end(); while (InstUpToMBBI != MBBI) // The pre-decrement is on purpose here. // We want to have the liveness right before MBBI. UsedRegs.stepBackward(*--InstUpToMBBI); } // Look for a register that can be directly use in the POP. unsigned PopReg = 0; // And some temporary register, just in case. unsigned TemporaryReg = 0; BitVector PopFriendly = TRI->getAllocatableSet(MF, TRI->getRegClass(ARM::tGPRRegClassID)); assert(PopFriendly.any() && "No allocatable pop-friendly register?!"); // Rebuild the GPRs from the high registers because they are removed // form the GPR reg class for thumb1. BitVector GPRsNoLRSP = TRI->getAllocatableSet(MF, TRI->getRegClass(ARM::hGPRRegClassID)); GPRsNoLRSP |= PopFriendly; GPRsNoLRSP.reset(ARM::LR); GPRsNoLRSP.reset(ARM::SP); GPRsNoLRSP.reset(ARM::PC); for (int Register = GPRsNoLRSP.find_first(); Register != -1; Register = GPRsNoLRSP.find_next(Register)) { if (!UsedRegs.contains(Register)) { // Remember the first pop-friendly register and exit. if (PopFriendly.test(Register)) { PopReg = Register; TemporaryReg = 0; break; } // Otherwise, remember that the register will be available to // save a pop-friendly register. TemporaryReg = Register; } } if (!DoIt && !PopReg && !TemporaryReg) return false; assert((PopReg || TemporaryReg) && "Cannot get LR"); if (TemporaryReg) { assert(!PopReg && "Unnecessary MOV is about to be inserted"); PopReg = PopFriendly.find_first(); AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) .addReg(TemporaryReg, RegState::Define) .addReg(PopReg, RegState::Kill)); } if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPOP_RET) { // We couldn't use the direct restoration above, so // perform the opposite conversion: tPOP_RET to tPOP. MachineInstrBuilder MIB = AddDefaultPred( BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII.get(ARM::tPOP))); bool Popped = false; for (auto MO: MBBI->operands()) if (MO.isReg() && (MO.isImplicit() || MO.isDef()) && MO.getReg() != ARM::PC) { MIB.addOperand(MO); if (!MO.isImplicit()) Popped = true; } // Is there anything left to pop? if (!Popped) MBB.erase(MIB.getInstr()); // Erase the old instruction. MBB.erase(MBBI); MBBI = AddDefaultPred(BuildMI(MBB, MBB.end(), dl, TII.get(ARM::tBX_RET))); } assert(PopReg && "Do not know how to get LR"); AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP))) .addReg(PopReg, RegState::Define); emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize); AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) .addReg(ARM::LR, RegState::Define) .addReg(PopReg, RegState::Kill)); if (TemporaryReg) AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) .addReg(PopReg, RegState::Define) .addReg(TemporaryReg, RegState::Kill)); return true; }
bool AArch64RedundantCopyElimination::optimizeCopy(MachineBasicBlock *MBB) { // Check if the current basic block has a single predecessor. if (MBB->pred_size() != 1) return false; MachineBasicBlock *PredMBB = *MBB->pred_begin(); MachineBasicBlock::iterator CompBr = PredMBB->getLastNonDebugInstr(); if (CompBr == PredMBB->end() || PredMBB->succ_size() != 2) return false; ++CompBr; do { --CompBr; if (guaranteesZeroRegInBlock(*CompBr, MBB)) break; } while (CompBr != PredMBB->begin() && CompBr->isTerminator()); // We've not found a CBZ/CBNZ, time to bail out. if (!guaranteesZeroRegInBlock(*CompBr, MBB)) return false; unsigned TargetReg = CompBr->getOperand(0).getReg(); if (!TargetReg) return false; assert(TargetRegisterInfo::isPhysicalRegister(TargetReg) && "Expect physical register"); // Remember all registers aliasing with TargetReg. SmallSetVector<unsigned, 8> TargetRegs; for (MCRegAliasIterator AI(TargetReg, TRI, true); AI.isValid(); ++AI) TargetRegs.insert(*AI); bool Changed = false; MachineBasicBlock::iterator LastChange = MBB->begin(); unsigned SmallestDef = TargetReg; // Remove redundant Copy instructions unless TargetReg is modified. for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;) { MachineInstr *MI = &*I; ++I; if (MI->isCopy() && MI->getOperand(0).isReg() && MI->getOperand(1).isReg()) { unsigned DefReg = MI->getOperand(0).getReg(); unsigned SrcReg = MI->getOperand(1).getReg(); if ((SrcReg == AArch64::XZR || SrcReg == AArch64::WZR) && !MRI->isReserved(DefReg) && (TargetReg == DefReg || TRI->isSuperRegister(DefReg, TargetReg))) { DEBUG(dbgs() << "Remove redundant Copy : "); DEBUG((MI)->print(dbgs())); MI->eraseFromParent(); Changed = true; LastChange = I; NumCopiesRemoved++; SmallestDef = TRI->isSubRegister(SmallestDef, DefReg) ? DefReg : SmallestDef; continue; } } if (MI->modifiesRegister(TargetReg, TRI)) break; } if (!Changed) return false; // Otherwise, we have to fixup the use-def chain, starting with the // CBZ/CBNZ. Conservatively mark as much as we can live. CompBr->clearRegisterKills(SmallestDef, TRI); if (std::none_of(TargetRegs.begin(), TargetRegs.end(), [&](unsigned Reg) { return MBB->isLiveIn(Reg); })) MBB->addLiveIn(TargetReg); // Clear any kills of TargetReg between CompBr and the last removed COPY. for (MachineInstr &MMI : make_range(MBB->begin()->getIterator(), LastChange->getIterator())) MMI.clearRegisterKills(SmallestDef, TRI); return true; }
bool PHIElimination::SplitPHIEdges(MachineFunction &MF, MachineBasicBlock &MBB, MachineLoopInfo *MLI) { if (MBB.empty() || !MBB.front().isPHI() || MBB.isLandingPad()) return false; // Quick exit for basic blocks without PHIs. const MachineLoop *CurLoop = MLI ? MLI->getLoopFor(&MBB) : nullptr; bool IsLoopHeader = CurLoop && &MBB == CurLoop->getHeader(); bool Changed = false; for (MachineBasicBlock::iterator BBI = MBB.begin(), BBE = MBB.end(); BBI != BBE && BBI->isPHI(); ++BBI) { for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) { unsigned Reg = BBI->getOperand(i).getReg(); MachineBasicBlock *PreMBB = BBI->getOperand(i+1).getMBB(); // Is there a critical edge from PreMBB to MBB? if (PreMBB->succ_size() == 1) continue; // Avoid splitting backedges of loops. It would introduce small // out-of-line blocks into the loop which is very bad for code placement. if (PreMBB == &MBB && !SplitAllCriticalEdges) continue; const MachineLoop *PreLoop = MLI ? MLI->getLoopFor(PreMBB) : nullptr; if (IsLoopHeader && PreLoop == CurLoop && !SplitAllCriticalEdges) continue; // LV doesn't consider a phi use live-out, so isLiveOut only returns true // when the source register is live-out for some other reason than a phi // use. That means the copy we will insert in PreMBB won't be a kill, and // there is a risk it may not be coalesced away. // // If the copy would be a kill, there is no need to split the edge. if (!isLiveOutPastPHIs(Reg, PreMBB) && !SplitAllCriticalEdges) continue; DEBUG(dbgs() << PrintReg(Reg) << " live-out before critical edge BB#" << PreMBB->getNumber() << " -> BB#" << MBB.getNumber() << ": " << *BBI); // If Reg is not live-in to MBB, it means it must be live-in to some // other PreMBB successor, and we can avoid the interference by splitting // the edge. // // If Reg *is* live-in to MBB, the interference is inevitable and a copy // is likely to be left after coalescing. If we are looking at a loop // exiting edge, split it so we won't insert code in the loop, otherwise // don't bother. bool ShouldSplit = !isLiveIn(Reg, &MBB) || SplitAllCriticalEdges; // Check for a loop exiting edge. if (!ShouldSplit && CurLoop != PreLoop) { DEBUG({ dbgs() << "Split wouldn't help, maybe avoid loop copies?\n"; if (PreLoop) dbgs() << "PreLoop: " << *PreLoop; if (CurLoop) dbgs() << "CurLoop: " << *CurLoop; }); // This edge could be entering a loop, exiting a loop, or it could be // both: Jumping directly form one loop to the header of a sibling // loop. // Split unless this edge is entering CurLoop from an outer loop. ShouldSplit = PreLoop && !PreLoop->contains(CurLoop); } if (!ShouldSplit) continue; if (!PreMBB->SplitCriticalEdge(&MBB, this)) { DEBUG(dbgs() << "Failed to split critical edge.\n"); continue; } Changed = true; ++NumCriticalEdgesSplit; }
bool SIFixSGPRLiveRanges::runOnMachineFunction(MachineFunction &MF) { MachineRegisterInfo &MRI = MF.getRegInfo(); const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); const SIRegisterInfo *TRI = static_cast<const SIRegisterInfo *>( MF.getSubtarget().getRegisterInfo()); LiveIntervals *LIS = &getAnalysis<LiveIntervals>(); MachinePostDominatorTree *PDT = &getAnalysis<MachinePostDominatorTree>(); std::vector<std::pair<unsigned, LiveRange *>> SGPRLiveRanges; // First pass, collect all live intervals for SGPRs for (const MachineBasicBlock &MBB : MF) { for (const MachineInstr &MI : MBB) { for (const MachineOperand &MO : MI.defs()) { if (MO.isImplicit()) continue; unsigned Def = MO.getReg(); if (TargetRegisterInfo::isVirtualRegister(Def)) { if (TRI->isSGPRClass(MRI.getRegClass(Def))) SGPRLiveRanges.push_back( std::make_pair(Def, &LIS->getInterval(Def))); } else if (TRI->isSGPRClass(TRI->getPhysRegClass(Def))) { SGPRLiveRanges.push_back( std::make_pair(Def, &LIS->getRegUnit(Def))); } } } } // Second pass fix the intervals for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE; ++BI) { MachineBasicBlock &MBB = *BI; if (MBB.succ_size() < 2) continue; // We have structured control flow, so number of succesors should be two. assert(MBB.succ_size() == 2); MachineBasicBlock *SuccA = *MBB.succ_begin(); MachineBasicBlock *SuccB = *(++MBB.succ_begin()); MachineBasicBlock *NCD = PDT->findNearestCommonDominator(SuccA, SuccB); if (!NCD) continue; MachineBasicBlock::iterator NCDTerm = NCD->getFirstTerminator(); if (NCDTerm != NCD->end() && NCDTerm->getOpcode() == AMDGPU::SI_ELSE) { assert(NCD->succ_size() == 2); // We want to make sure we insert the Use after the ENDIF, not after // the ELSE. NCD = PDT->findNearestCommonDominator(*NCD->succ_begin(), *(++NCD->succ_begin())); } assert(SuccA && SuccB); for (std::pair<unsigned, LiveRange*> RegLR : SGPRLiveRanges) { unsigned Reg = RegLR.first; LiveRange *LR = RegLR.second; // FIXME: We could be smarter here. If the register is Live-In to // one block, but the other doesn't have any SGPR defs, then there // won't be a conflict. Also, if the branch decision is based on // a value in an SGPR, then there will be no conflict. bool LiveInToA = LIS->isLiveInToMBB(*LR, SuccA); bool LiveInToB = LIS->isLiveInToMBB(*LR, SuccB); if ((!LiveInToA && !LiveInToB) || (LiveInToA && LiveInToB)) continue; // This interval is live in to one successor, but not the other, so // we need to update its range so it is live in to both. DEBUG(dbgs() << "Possible SGPR conflict detected " << " in " << *LR << " BB#" << SuccA->getNumber() << ", BB#" << SuccB->getNumber() << " with NCD = " << NCD->getNumber() << '\n'); // FIXME: Need to figure out how to update LiveRange here so this pass // will be able to preserve LiveInterval analysis. BuildMI(*NCD, NCD->getFirstNonPHI(), DebugLoc(), TII->get(AMDGPU::SGPR_USE)) .addReg(Reg, RegState::Implicit); DEBUG(NCD->getFirstNonPHI()->dump()); } } return false; }
/// Splits a MachineBasicBlock to branch before \p SplitBefore. The original /// branch is \p OrigBranch. The target of the new branch can either be the same /// as the target of the original branch or the fallthrough successor of the /// original block as determined by \p BranchToFallThrough. The branch /// conditions will be inverted according to \p InvertNewBranch and /// \p InvertOrigBranch. If an instruction that previously fed the branch is to /// be deleted, it is provided in \p MIToDelete and \p NewCond will be used as /// the branch condition. The branch probabilities will be set if the /// MachineBranchProbabilityInfo isn't null. static bool splitMBB(BlockSplitInfo &BSI) { assert(BSI.allInstrsInSameMBB() && "All instructions must be in the same block."); MachineBasicBlock *ThisMBB = BSI.OrigBranch->getParent(); MachineFunction *MF = ThisMBB->getParent(); MachineRegisterInfo *MRI = &MF->getRegInfo(); assert(MRI->isSSA() && "Can only do this while the function is in SSA form."); if (ThisMBB->succ_size() != 2) { LLVM_DEBUG( dbgs() << "Don't know how to handle blocks that don't have exactly" << " two succesors.\n"); return false; } const PPCInstrInfo *TII = MF->getSubtarget<PPCSubtarget>().getInstrInfo(); unsigned OrigBROpcode = BSI.OrigBranch->getOpcode(); unsigned InvertedOpcode = OrigBROpcode == PPC::BC ? PPC::BCn : OrigBROpcode == PPC::BCn ? PPC::BC : OrigBROpcode == PPC::BCLR ? PPC::BCLRn : PPC::BCLR; unsigned NewBROpcode = BSI.InvertNewBranch ? InvertedOpcode : OrigBROpcode; MachineBasicBlock *OrigTarget = BSI.OrigBranch->getOperand(1).getMBB(); MachineBasicBlock *OrigFallThrough = OrigTarget == *ThisMBB->succ_begin() ? *ThisMBB->succ_rbegin() : *ThisMBB->succ_begin(); MachineBasicBlock *NewBRTarget = BSI.BranchToFallThrough ? OrigFallThrough : OrigTarget; BranchProbability ProbToNewTarget = !BSI.MBPI ? BranchProbability::getUnknown() : BSI.MBPI->getEdgeProbability(ThisMBB, NewBRTarget); // Create a new basic block. MachineBasicBlock::iterator InsertPoint = BSI.SplitBefore; const BasicBlock *LLVM_BB = ThisMBB->getBasicBlock(); MachineFunction::iterator It = ThisMBB->getIterator(); MachineBasicBlock *NewMBB = MF->CreateMachineBasicBlock(LLVM_BB); MF->insert(++It, NewMBB); // Move everything after SplitBefore into the new block. NewMBB->splice(NewMBB->end(), ThisMBB, InsertPoint, ThisMBB->end()); NewMBB->transferSuccessors(ThisMBB); // Add the two successors to ThisMBB. The probabilities come from the // existing blocks if available. ThisMBB->addSuccessor(NewBRTarget, ProbToNewTarget); ThisMBB->addSuccessor(NewMBB, ProbToNewTarget.getCompl()); // Add the branches to ThisMBB. BuildMI(*ThisMBB, ThisMBB->end(), BSI.SplitBefore->getDebugLoc(), TII->get(NewBROpcode)) .addReg(BSI.SplitCond->getOperand(0).getReg()) .addMBB(NewBRTarget); BuildMI(*ThisMBB, ThisMBB->end(), BSI.SplitBefore->getDebugLoc(), TII->get(PPC::B)) .addMBB(NewMBB); if (BSI.MIToDelete) BSI.MIToDelete->eraseFromParent(); // Change the condition on the original branch and invert it if requested. auto FirstTerminator = NewMBB->getFirstTerminator(); if (BSI.NewCond) { assert(FirstTerminator->getOperand(0).isReg() && "Can't update condition of unconditional branch."); FirstTerminator->getOperand(0).setReg(BSI.NewCond->getOperand(0).getReg()); } if (BSI.InvertOrigBranch) FirstTerminator->setDesc(TII->get(InvertedOpcode)); // If any of the PHIs in the successors of NewMBB reference values that // now come from NewMBB, they need to be updated. for (auto *Succ : NewMBB->successors()) { updatePHIs(Succ, ThisMBB, NewMBB, MRI); } addIncomingValuesToPHIs(NewBRTarget, ThisMBB, NewMBB, MRI); LLVM_DEBUG(dbgs() << "After splitting, ThisMBB:\n"; ThisMBB->dump()); LLVM_DEBUG(dbgs() << "NewMBB:\n"; NewMBB->dump()); LLVM_DEBUG(dbgs() << "New branch-to block:\n"; NewBRTarget->dump()); return true; }