void HexagonEarlyIfConversion::replacePhiEdges(MachineBasicBlock *OldB, MachineBasicBlock *NewB) { for (auto I = OldB->succ_begin(), E = OldB->succ_end(); I != E; ++I) { MachineBasicBlock *SB = *I; MachineBasicBlock::iterator P, N = SB->getFirstNonPHI(); for (P = SB->begin(); P != N; ++P) { MachineInstr *PN = &*P; for (MIOperands MO(PN); MO.isValid(); ++MO) if (MO->isMBB() && MO->getMBB() == OldB) MO->setMBB(NewB); } } }
void HexagonEarlyIfConversion::replacePhiEdges(MachineBasicBlock *OldB, MachineBasicBlock *NewB) { for (auto I = OldB->succ_begin(), E = OldB->succ_end(); I != E; ++I) { MachineBasicBlock *SB = *I; MachineBasicBlock::iterator P, N = SB->getFirstNonPHI(); for (P = SB->begin(); P != N; ++P) { MachineInstr &PN = *P; for (MachineOperand &MO : PN.operands()) if (MO.isMBB() && MO.getMBB() == OldB) MO.setMBB(NewB); } } }
/// Sink instructions into loops if profitable. This especially tries to prevent /// register spills caused by register pressure if there is little to no /// overhead moving instructions into loops. void MachineLICM::SinkIntoLoop() { MachineBasicBlock *Preheader = getCurPreheader(); if (!Preheader) return; SmallVector<MachineInstr *, 8> Candidates; for (MachineBasicBlock::instr_iterator I = Preheader->instr_begin(); I != Preheader->instr_end(); ++I) { // We need to ensure that we can safely move this instruction into the loop. // As such, it must not have side-effects, e.g. such as a call has. if (IsLoopInvariantInst(*I) && !HasLoopPHIUse(&*I)) Candidates.push_back(&*I); } for (MachineInstr *I : Candidates) { const MachineOperand &MO = I->getOperand(0); if (!MO.isDef() || !MO.isReg() || !MO.getReg()) continue; if (!MRI->hasOneDef(MO.getReg())) continue; bool CanSink = true; MachineBasicBlock *B = nullptr; for (MachineInstr &MI : MRI->use_instructions(MO.getReg())) { // FIXME: Come up with a proper cost model that estimates whether sinking // the instruction (and thus possibly executing it on every loop // iteration) is more expensive than a register. // For now assumes that copies are cheap and thus almost always worth it. if (!MI.isCopy()) { CanSink = false; break; } if (!B) { B = MI.getParent(); continue; } B = DT->findNearestCommonDominator(B, MI.getParent()); if (!B) { CanSink = false; break; } } if (!CanSink || !B || B == Preheader) continue; B->splice(B->getFirstNonPHI(), Preheader, I); } }
bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) { df_iterator_default_set<MachineBasicBlock*> Reachable; bool ModifiedPHI = false; MMI = getAnalysisIfAvailable<MachineModuleInfo>(); MachineDominatorTree *MDT = getAnalysisIfAvailable<MachineDominatorTree>(); MachineLoopInfo *MLI = getAnalysisIfAvailable<MachineLoopInfo>(); // Mark all reachable blocks. for (MachineBasicBlock *BB : depth_first_ext(&F, Reachable)) (void)BB/* Mark all reachable blocks */; // Loop over all dead blocks, remembering them and deleting all instructions // in them. std::vector<MachineBasicBlock*> DeadBlocks; for (MachineFunction::iterator I = F.begin(), E = F.end(); I != E; ++I) { MachineBasicBlock *BB = &*I; // Test for deadness. if (!Reachable.count(BB)) { DeadBlocks.push_back(BB); // Update dominator and loop info. if (MLI) MLI->removeBlock(BB); if (MDT && MDT->getNode(BB)) MDT->eraseNode(BB); while (BB->succ_begin() != BB->succ_end()) { MachineBasicBlock* succ = *BB->succ_begin(); MachineBasicBlock::iterator start = succ->begin(); while (start != succ->end() && start->isPHI()) { for (unsigned i = start->getNumOperands() - 1; i >= 2; i-=2) if (start->getOperand(i).isMBB() && start->getOperand(i).getMBB() == BB) { start->RemoveOperand(i); start->RemoveOperand(i-1); } start++; } BB->removeSuccessor(BB->succ_begin()); } } } // Actually remove the blocks now. for (unsigned i = 0, e = DeadBlocks.size(); i != e; ++i) DeadBlocks[i]->eraseFromParent(); // Cleanup PHI nodes. for (MachineFunction::iterator I = F.begin(), E = F.end(); I != E; ++I) { MachineBasicBlock *BB = &*I; // Prune unneeded PHI entries. SmallPtrSet<MachineBasicBlock*, 8> preds(BB->pred_begin(), BB->pred_end()); MachineBasicBlock::iterator phi = BB->begin(); while (phi != BB->end() && phi->isPHI()) { for (unsigned i = phi->getNumOperands() - 1; i >= 2; i-=2) if (!preds.count(phi->getOperand(i).getMBB())) { phi->RemoveOperand(i); phi->RemoveOperand(i-1); ModifiedPHI = true; } if (phi->getNumOperands() == 3) { const MachineOperand &Input = phi->getOperand(1); const MachineOperand &Output = phi->getOperand(0); unsigned InputReg = Input.getReg(); unsigned OutputReg = Output.getReg(); assert(Output.getSubReg() == 0 && "Cannot have output subregister"); ModifiedPHI = true; if (InputReg != OutputReg) { MachineRegisterInfo &MRI = F.getRegInfo(); unsigned InputSub = Input.getSubReg(); if (InputSub == 0 && MRI.constrainRegClass(InputReg, MRI.getRegClass(OutputReg))) { MRI.replaceRegWith(OutputReg, InputReg); } else { // The input register to the PHI has a subregister or it can't be // constrained to the proper register class: // insert a COPY instead of simply replacing the output // with the input. const TargetInstrInfo *TII = F.getSubtarget().getInstrInfo(); BuildMI(*BB, BB->getFirstNonPHI(), phi->getDebugLoc(), TII->get(TargetOpcode::COPY), OutputReg) .addReg(InputReg, getRegState(Input), InputSub); } phi++->eraseFromParent(); } continue; } ++phi; } } F.RenumberBlocks(); return (!DeadBlocks.empty() || ModifiedPHI); }
bool SIFixSGPRLiveRanges::runOnMachineFunction(MachineFunction &MF) { MachineRegisterInfo &MRI = MF.getRegInfo(); const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); const SIRegisterInfo *TRI = static_cast<const SIRegisterInfo *>( MF.getSubtarget().getRegisterInfo()); bool MadeChange = false; MachinePostDominatorTree *PDT = &getAnalysis<MachinePostDominatorTree>(); std::vector<std::pair<unsigned, LiveRange *>> SGPRLiveRanges; LiveIntervals *LIS = &getAnalysis<LiveIntervals>(); LiveVariables *LV = getAnalysisIfAvailable<LiveVariables>(); MachineBasicBlock *Entry = MF.begin(); // Use a depth first order so that in SSA, we encounter all defs before // uses. Once the defs of the block have been found, attempt to insert // SGPR_USE instructions in successor blocks if required. for (MachineBasicBlock *MBB : depth_first(Entry)) { for (const MachineInstr &MI : *MBB) { for (const MachineOperand &MO : MI.defs()) { if (MO.isImplicit()) continue; unsigned Def = MO.getReg(); if (TargetRegisterInfo::isVirtualRegister(Def)) { if (TRI->isSGPRClass(MRI.getRegClass(Def))) { // Only consider defs that are live outs. We don't care about def / // use within the same block. LiveRange &LR = LIS->getInterval(Def); if (LIS->isLiveOutOfMBB(LR, MBB)) SGPRLiveRanges.push_back(std::make_pair(Def, &LR)); } } else if (TRI->isSGPRClass(TRI->getPhysRegClass(Def))) { SGPRLiveRanges.push_back(std::make_pair(Def, &LIS->getRegUnit(Def))); } } } if (MBB->succ_size() < 2) continue; // We have structured control flow, so the number of successors should be // two. assert(MBB->succ_size() == 2); MachineBasicBlock *SuccA = *MBB->succ_begin(); MachineBasicBlock *SuccB = *(++MBB->succ_begin()); MachineBasicBlock *NCD = PDT->findNearestCommonDominator(SuccA, SuccB); if (!NCD) continue; MachineBasicBlock::iterator NCDTerm = NCD->getFirstTerminator(); if (NCDTerm != NCD->end() && NCDTerm->getOpcode() == AMDGPU::SI_ELSE) { assert(NCD->succ_size() == 2); // We want to make sure we insert the Use after the ENDIF, not after // the ELSE. NCD = PDT->findNearestCommonDominator(*NCD->succ_begin(), *(++NCD->succ_begin())); } for (std::pair<unsigned, LiveRange*> RegLR : SGPRLiveRanges) { unsigned Reg = RegLR.first; LiveRange *LR = RegLR.second; // FIXME: We could be smarter here. If the register is Live-In to one // block, but the other doesn't have any SGPR defs, then there won't be a // conflict. Also, if the branch condition is uniform then there will be // no conflict. bool LiveInToA = LIS->isLiveInToMBB(*LR, SuccA); bool LiveInToB = LIS->isLiveInToMBB(*LR, SuccB); if (!LiveInToA && !LiveInToB) { DEBUG(dbgs() << PrintReg(Reg, TRI, 0) << " is live into neither successor\n"); continue; } if (LiveInToA && LiveInToB) { DEBUG(dbgs() << PrintReg(Reg, TRI, 0) << " is live into both successors\n"); continue; } // This interval is live in to one successor, but not the other, so // we need to update its range so it is live in to both. DEBUG(dbgs() << "Possible SGPR conflict detected for " << PrintReg(Reg, TRI, 0) << " in " << *LR << " BB#" << SuccA->getNumber() << ", BB#" << SuccB->getNumber() << " with NCD = BB#" << NCD->getNumber() << '\n'); assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Not expecting to extend live range of physreg"); // FIXME: Need to figure out how to update LiveRange here so this pass // will be able to preserve LiveInterval analysis. MachineInstr *NCDSGPRUse = BuildMI(*NCD, NCD->getFirstNonPHI(), DebugLoc(), TII->get(AMDGPU::SGPR_USE)) .addReg(Reg, RegState::Implicit); MadeChange = true; SlotIndex SI = LIS->InsertMachineInstrInMaps(NCDSGPRUse); LIS->extendToIndices(*LR, SI.getRegSlot()); if (LV) { // TODO: This won't work post-SSA LV->HandleVirtRegUse(Reg, NCD, NCDSGPRUse); } DEBUG(NCDSGPRUse->dump()); } } return MadeChange; }
bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB, MachineFunction &MF, const TargetRegisterInfo *TRI, const TargetInstrInfo *TII) { SmallPtrSet<MachineBasicBlock *, 2> SinkableBBs; // FIXME: For now, we sink only to a successor which has a single predecessor // so that we can directly sink COPY instructions to the successor without // adding any new block or branch instruction. for (MachineBasicBlock *SI : CurBB.successors()) if (!SI->livein_empty() && SI->pred_size() == 1) SinkableBBs.insert(SI); if (SinkableBBs.empty()) return false; bool Changed = false; // Track which registers have been modified and used between the end of the // block and the current instruction. ModifiedRegUnits.clear(); UsedRegUnits.clear(); for (auto I = CurBB.rbegin(), E = CurBB.rend(); I != E;) { MachineInstr *MI = &*I; ++I; if (MI->isDebugInstr()) continue; // Do not move any instruction across function call. if (MI->isCall()) return false; if (!MI->isCopy() || !MI->getOperand(0).isRenamable()) { LiveRegUnits::accumulateUsedDefed(*MI, ModifiedRegUnits, UsedRegUnits, TRI); continue; } // Track the operand index for use in Copy. SmallVector<unsigned, 2> UsedOpsInCopy; // Track the register number defed in Copy. SmallVector<unsigned, 2> DefedRegsInCopy; // Don't sink the COPY if it would violate a register dependency. if (hasRegisterDependency(MI, UsedOpsInCopy, DefedRegsInCopy, ModifiedRegUnits, UsedRegUnits)) { LiveRegUnits::accumulateUsedDefed(*MI, ModifiedRegUnits, UsedRegUnits, TRI); continue; } assert((!UsedOpsInCopy.empty() && !DefedRegsInCopy.empty()) && "Unexpect SrcReg or DefReg"); MachineBasicBlock *SuccBB = getSingleLiveInSuccBB(CurBB, SinkableBBs, DefedRegsInCopy, TRI); // Don't sink if we cannot find a single sinkable successor in which Reg // is live-in. if (!SuccBB) { LiveRegUnits::accumulateUsedDefed(*MI, ModifiedRegUnits, UsedRegUnits, TRI); continue; } assert((SuccBB->pred_size() == 1 && *SuccBB->pred_begin() == &CurBB) && "Unexpected predecessor"); // Clear the kill flag if SrcReg is killed between MI and the end of the // block. clearKillFlags(MI, CurBB, UsedOpsInCopy, UsedRegUnits, TRI); MachineBasicBlock::iterator InsertPos = SuccBB->getFirstNonPHI(); performSink(*MI, *SuccBB, InsertPos); updateLiveIn(MI, SuccBB, UsedOpsInCopy, DefedRegsInCopy); Changed = true; ++NumPostRACopySink; } return Changed; }
void SIWholeQuadMode::processBlock(MachineBasicBlock &MBB, unsigned LiveMaskReg, bool isEntry) { auto BII = Blocks.find(&MBB); if (BII == Blocks.end()) return; const BlockInfo &BI = BII->second; // This is a non-entry block that is WQM throughout, so no need to do // anything. if (!isEntry && BI.Needs == StateWQM && BI.OutNeeds != StateExact) return; LLVM_DEBUG(dbgs() << "\nProcessing block " << printMBBReference(MBB) << ":\n"); unsigned SavedWQMReg = 0; unsigned SavedNonWWMReg = 0; bool WQMFromExec = isEntry; char State = (isEntry || !(BI.InNeeds & StateWQM)) ? StateExact : StateWQM; char NonWWMState = 0; auto II = MBB.getFirstNonPHI(), IE = MBB.end(); if (isEntry) ++II; // Skip the instruction that saves LiveMask // This stores the first instruction where it's safe to switch from WQM to // Exact or vice versa. MachineBasicBlock::iterator FirstWQM = IE; // This stores the first instruction where it's safe to switch from WWM to // Exact/WQM or to switch to WWM. It must always be the same as, or after, // FirstWQM since if it's safe to switch to/from WWM, it must be safe to // switch to/from WQM as well. MachineBasicBlock::iterator FirstWWM = IE; for (;;) { MachineBasicBlock::iterator Next = II; char Needs = StateExact | StateWQM; // WWM is disabled by default char OutNeeds = 0; if (FirstWQM == IE) FirstWQM = II; if (FirstWWM == IE) FirstWWM = II; // First, figure out the allowed states (Needs) based on the propagated // flags. if (II != IE) { MachineInstr &MI = *II; if (requiresCorrectState(MI)) { auto III = Instructions.find(&MI); if (III != Instructions.end()) { if (III->second.Needs & StateWWM) Needs = StateWWM; else if (III->second.Needs & StateWQM) Needs = StateWQM; else Needs &= ~III->second.Disabled; OutNeeds = III->second.OutNeeds; } } else { // If the instruction doesn't actually need a correct EXEC, then we can // safely leave WWM enabled. Needs = StateExact | StateWQM | StateWWM; } if (MI.isTerminator() && OutNeeds == StateExact) Needs = StateExact; if (MI.getOpcode() == AMDGPU::SI_ELSE && BI.OutNeeds == StateExact) MI.getOperand(3).setImm(1); ++Next; } else { // End of basic block if (BI.OutNeeds & StateWQM) Needs = StateWQM; else if (BI.OutNeeds == StateExact) Needs = StateExact; else Needs = StateWQM | StateExact; } // Now, transition if necessary. if (!(Needs & State)) { MachineBasicBlock::iterator First; if (State == StateWWM || Needs == StateWWM) { // We must switch to or from WWM First = FirstWWM; } else { // We only need to switch to/from WQM, so we can use FirstWQM First = FirstWQM; } MachineBasicBlock::iterator Before = prepareInsertion(MBB, First, II, Needs == StateWQM, Needs == StateExact || WQMFromExec); if (State == StateWWM) { assert(SavedNonWWMReg); fromWWM(MBB, Before, SavedNonWWMReg); State = NonWWMState; } if (Needs == StateWWM) { NonWWMState = State; SavedNonWWMReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass); toWWM(MBB, Before, SavedNonWWMReg); State = StateWWM; } else { if (State == StateWQM && (Needs & StateExact) && !(Needs & StateWQM)) { if (!WQMFromExec && (OutNeeds & StateWQM)) SavedWQMReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass); toExact(MBB, Before, SavedWQMReg, LiveMaskReg); State = StateExact; } else if (State == StateExact && (Needs & StateWQM) && !(Needs & StateExact)) { assert(WQMFromExec == (SavedWQMReg == 0)); toWQM(MBB, Before, SavedWQMReg); if (SavedWQMReg) { LIS->createAndComputeVirtRegInterval(SavedWQMReg); SavedWQMReg = 0; } State = StateWQM; } else { // We can get here if we transitioned from WWM to a non-WWM state that // already matches our needs, but we shouldn't need to do anything. assert(Needs & State); } } } if (Needs != (StateExact | StateWQM | StateWWM)) { if (Needs != (StateExact | StateWQM)) FirstWQM = IE; FirstWWM = IE; } if (II == IE) break; II = Next; } }
void SIWholeQuadMode::processBlock(MachineBasicBlock &MBB, unsigned LiveMaskReg, bool isEntry) { auto BII = Blocks.find(&MBB); if (BII == Blocks.end()) return; const BlockInfo &BI = BII->second; if (!(BI.InNeeds & StateWQM)) return; // This is a non-entry block that is WQM throughout, so no need to do // anything. if (!isEntry && !(BI.Needs & StateExact) && BI.OutNeeds != StateExact) return; DEBUG(dbgs() << "\nProcessing block BB#" << MBB.getNumber() << ":\n"); unsigned SavedWQMReg = 0; bool WQMFromExec = isEntry; char State = isEntry ? StateExact : StateWQM; auto II = MBB.getFirstNonPHI(), IE = MBB.end(); if (isEntry) ++II; // Skip the instruction that saves LiveMask MachineBasicBlock::iterator First = IE; for (;;) { MachineBasicBlock::iterator Next = II; char Needs = 0; char OutNeeds = 0; if (First == IE) First = II; if (II != IE) { MachineInstr &MI = *II; if (requiresCorrectState(MI)) { auto III = Instructions.find(&MI); if (III != Instructions.end()) { Needs = III->second.Needs; OutNeeds = III->second.OutNeeds; } } if (MI.isTerminator() && !Needs && OutNeeds == StateExact) Needs = StateExact; if (MI.getOpcode() == AMDGPU::SI_ELSE && BI.OutNeeds == StateExact) MI.getOperand(3).setImm(1); ++Next; } else { // End of basic block if (BI.OutNeeds & StateWQM) Needs = StateWQM; else if (BI.OutNeeds == StateExact) Needs = StateExact; } if (Needs) { if (Needs != State) { MachineBasicBlock::iterator Before = prepareInsertion(MBB, First, II, Needs == StateWQM, Needs == StateExact || WQMFromExec); if (Needs == StateExact) { if (!WQMFromExec && (OutNeeds & StateWQM)) SavedWQMReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass); toExact(MBB, Before, SavedWQMReg, LiveMaskReg); } else { assert(WQMFromExec == (SavedWQMReg == 0)); toWQM(MBB, Before, SavedWQMReg); if (SavedWQMReg) { LIS->createAndComputeVirtRegInterval(SavedWQMReg); SavedWQMReg = 0; } } State = Needs; } First = IE; } if (II == IE) break; II = Next; } }
void SIWholeQuadMode::processBlock(MachineBasicBlock &MBB, unsigned LiveMaskReg, bool isEntry) { auto BII = Blocks.find(&MBB); if (BII == Blocks.end()) return; const BlockInfo &BI = BII->second; if (!(BI.InNeeds & StateWQM)) return; // This is a non-entry block that is WQM throughout, so no need to do // anything. if (!isEntry && !(BI.Needs & StateExact) && BI.OutNeeds != StateExact) return; unsigned SavedWQMReg = 0; bool WQMFromExec = isEntry; char State = isEntry ? StateExact : StateWQM; auto II = MBB.getFirstNonPHI(), IE = MBB.end(); while (II != IE) { MachineInstr &MI = *II; ++II; // Skip instructions that are not affected by EXEC if (TII->isScalarUnit(MI) && !MI.isTerminator()) continue; // Generic instructions such as COPY will either disappear by register // coalescing or be lowered to SALU or VALU instructions. if (TargetInstrInfo::isGenericOpcode(MI.getOpcode())) { if (MI.getNumExplicitOperands() >= 1) { const MachineOperand &Op = MI.getOperand(0); if (Op.isReg()) { if (TRI->isSGPRReg(*MRI, Op.getReg())) { // SGPR instructions are not affected by EXEC continue; } } } } char Needs = 0; char OutNeeds = 0; auto InstrInfoIt = Instructions.find(&MI); if (InstrInfoIt != Instructions.end()) { Needs = InstrInfoIt->second.Needs; OutNeeds = InstrInfoIt->second.OutNeeds; // Make sure to switch to Exact mode before the end of the block when // Exact and only Exact is needed further downstream. if (OutNeeds == StateExact && MI.isTerminator()) { assert(Needs == 0); Needs = StateExact; } } // State switching if (Needs && State != Needs) { if (Needs == StateExact) { assert(!SavedWQMReg); if (!WQMFromExec && (OutNeeds & StateWQM)) SavedWQMReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass); toExact(MBB, &MI, SavedWQMReg, LiveMaskReg); } else { assert(WQMFromExec == (SavedWQMReg == 0)); toWQM(MBB, &MI, SavedWQMReg); SavedWQMReg = 0; } State = Needs; } } if ((BI.OutNeeds & StateWQM) && State != StateWQM) { assert(WQMFromExec == (SavedWQMReg == 0)); toWQM(MBB, MBB.end(), SavedWQMReg); } else if (BI.OutNeeds == StateExact && State != StateExact) { toExact(MBB, MBB.end(), 0, LiveMaskReg); } }
bool SIFixSGPRLiveRanges::runOnMachineFunction(MachineFunction &MF) { MachineRegisterInfo &MRI = MF.getRegInfo(); const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); const SIRegisterInfo *TRI = static_cast<const SIRegisterInfo *>( MF.getSubtarget().getRegisterInfo()); LiveIntervals *LIS = &getAnalysis<LiveIntervals>(); MachinePostDominatorTree *PDT = &getAnalysis<MachinePostDominatorTree>(); std::vector<std::pair<unsigned, LiveRange *>> SGPRLiveRanges; // First pass, collect all live intervals for SGPRs for (const MachineBasicBlock &MBB : MF) { for (const MachineInstr &MI : MBB) { for (const MachineOperand &MO : MI.defs()) { if (MO.isImplicit()) continue; unsigned Def = MO.getReg(); if (TargetRegisterInfo::isVirtualRegister(Def)) { if (TRI->isSGPRClass(MRI.getRegClass(Def))) SGPRLiveRanges.push_back( std::make_pair(Def, &LIS->getInterval(Def))); } else if (TRI->isSGPRClass(TRI->getPhysRegClass(Def))) { SGPRLiveRanges.push_back( std::make_pair(Def, &LIS->getRegUnit(Def))); } } } } // Second pass fix the intervals for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE; ++BI) { MachineBasicBlock &MBB = *BI; if (MBB.succ_size() < 2) continue; // We have structured control flow, so number of succesors should be two. assert(MBB.succ_size() == 2); MachineBasicBlock *SuccA = *MBB.succ_begin(); MachineBasicBlock *SuccB = *(++MBB.succ_begin()); MachineBasicBlock *NCD = PDT->findNearestCommonDominator(SuccA, SuccB); if (!NCD) continue; MachineBasicBlock::iterator NCDTerm = NCD->getFirstTerminator(); if (NCDTerm != NCD->end() && NCDTerm->getOpcode() == AMDGPU::SI_ELSE) { assert(NCD->succ_size() == 2); // We want to make sure we insert the Use after the ENDIF, not after // the ELSE. NCD = PDT->findNearestCommonDominator(*NCD->succ_begin(), *(++NCD->succ_begin())); } assert(SuccA && SuccB); for (std::pair<unsigned, LiveRange*> RegLR : SGPRLiveRanges) { unsigned Reg = RegLR.first; LiveRange *LR = RegLR.second; // FIXME: We could be smarter here. If the register is Live-In to // one block, but the other doesn't have any SGPR defs, then there // won't be a conflict. Also, if the branch decision is based on // a value in an SGPR, then there will be no conflict. bool LiveInToA = LIS->isLiveInToMBB(*LR, SuccA); bool LiveInToB = LIS->isLiveInToMBB(*LR, SuccB); if ((!LiveInToA && !LiveInToB) || (LiveInToA && LiveInToB)) continue; // This interval is live in to one successor, but not the other, so // we need to update its range so it is live in to both. DEBUG(dbgs() << "Possible SGPR conflict detected " << " in " << *LR << " BB#" << SuccA->getNumber() << ", BB#" << SuccB->getNumber() << " with NCD = " << NCD->getNumber() << '\n'); // FIXME: Need to figure out how to update LiveRange here so this pass // will be able to preserve LiveInterval analysis. BuildMI(*NCD, NCD->getFirstNonPHI(), DebugLoc(), TII->get(AMDGPU::SGPR_USE)) .addReg(Reg, RegState::Implicit); DEBUG(NCD->getFirstNonPHI()->dump()); } } return false; }