/// Merge an autorelease with a retain into a fused call. bool ObjCARCContract::ContractAutorelease(Function &F, Instruction *Autorelease, InstructionClass Class, SmallPtrSet<Instruction *, 4> &DependingInstructions, SmallPtrSet<const BasicBlock *, 4> &Visited) { const Value *Arg = GetObjCArg(Autorelease); // Check that there are no instructions between the retain and the autorelease // (such as an autorelease_pop) which may change the count. CallInst *Retain = 0; if (Class == IC_AutoreleaseRV) FindDependencies(RetainAutoreleaseRVDep, Arg, Autorelease->getParent(), Autorelease, DependingInstructions, Visited, PA); else FindDependencies(RetainAutoreleaseDep, Arg, Autorelease->getParent(), Autorelease, DependingInstructions, Visited, PA); Visited.clear(); if (DependingInstructions.size() != 1) { DependingInstructions.clear(); return false; } Retain = dyn_cast_or_null<CallInst>(*DependingInstructions.begin()); DependingInstructions.clear(); if (!Retain || GetBasicInstructionClass(Retain) != IC_Retain || GetObjCArg(Retain) != Arg) return false; Changed = true; ++NumPeeps; DEBUG(dbgs() << "ObjCARCContract::ContractAutorelease: Fusing " "retain/autorelease. Erasing: " << *Autorelease << "\n" " Old Retain: " << *Retain << "\n"); if (Class == IC_AutoreleaseRV) Retain->setCalledFunction(getRetainAutoreleaseRVCallee(F.getParent())); else Retain->setCalledFunction(getRetainAutoreleaseCallee(F.getParent())); DEBUG(dbgs() << " New Retain: " << *Retain << "\n"); EraseInstruction(Autorelease); return true; }
// Check PHI instructions at the beginning of MBB. It is assumed that // calcRegsPassed has been run so BBInfo::isLiveOut is valid. void MachineVerifier::checkPHIOps(const MachineBasicBlock *MBB) { SmallPtrSet<const MachineBasicBlock*, 8> seen; for (MachineBasicBlock::const_iterator BBI = MBB->begin(), BBE = MBB->end(); BBI != BBE && BBI->isPHI(); ++BBI) { seen.clear(); for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) { unsigned Reg = BBI->getOperand(i).getReg(); const MachineBasicBlock *Pre = BBI->getOperand(i + 1).getMBB(); if (!Pre->isSuccessor(MBB)) continue; seen.insert(Pre); BBInfo &PrInfo = MBBInfoMap[Pre]; if (PrInfo.reachable && !PrInfo.isLiveOut(Reg)) report("PHI operand is not live-out from predecessor", &BBI->getOperand(i), i); } // Did we see all predecessors? for (MachineBasicBlock::const_pred_iterator PrI = MBB->pred_begin(), PrE = MBB->pred_end(); PrI != PrE; ++PrI) { if (!seen.count(*PrI)) { report("Missing PHI operand", BBI); *OS << "BB#" << (*PrI)->getNumber() << " is a predecessor according to the CFG.\n"; } } } }
/// FindSelectorAndURoR - Find the eh.selector call associated with the /// eh.exception call. And indicate if there is a URoR "invoke" associated with /// the eh.exception call. This recursively looks past instructions which don't /// change the EH pointer value, like casts or PHI nodes. bool DwarfEHPrepare::FindSelectorAndURoR(Instruction *Inst, bool &URoRInvoke, SmallPtrSet<IntrinsicInst*, 8> &SelCalls) { SmallPtrSet<PHINode*, 32> SeenPHIs; bool Changed = false; restart: for (Value::use_iterator I = Inst->use_begin(), E = Inst->use_end(); I != E; ++I) { Instruction *II = dyn_cast<Instruction>(*I); if (!II || II->getParent()->getParent() != F) continue; if (IntrinsicInst *Sel = dyn_cast<IntrinsicInst>(II)) { if (Sel->getIntrinsicID() == Intrinsic::eh_selector) SelCalls.insert(Sel); } else if (InvokeInst *Invoke = dyn_cast<InvokeInst>(II)) { if (Invoke->getCalledFunction() == URoR) URoRInvoke = true; } else if (CastInst *CI = dyn_cast<CastInst>(II)) { Changed |= FindSelectorAndURoR(CI, URoRInvoke, SelCalls); } else if (StoreInst *SI = dyn_cast<StoreInst>(II)) { if (!PromoteStoreInst(SI)) continue; Changed = true; SeenPHIs.clear(); goto restart; // Uses may have changed, restart loop. } else if (PHINode *PN = dyn_cast<PHINode>(II)) { if (SeenPHIs.insert(PN)) // Don't process a PHI node more than once. Changed |= FindSelectorAndURoR(PN, URoRInvoke, SelCalls); } } return Changed; }
static void EliminateMultipleEntryLoops(MachineFunction &MF, const MachineLoopInfo &MLI) { SmallPtrSet<MachineBasicBlock *, 8> InSet; for (scc_iterator<MachineFunction *> I = scc_begin(&MF), E = scc_end(&MF); I != E; ++I) { const std::vector<MachineBasicBlock *> &CurrentSCC = *I; // Skip trivial SCCs. if (CurrentSCC.size() == 1) continue; InSet.insert(CurrentSCC.begin(), CurrentSCC.end()); MachineBasicBlock *Header = nullptr; for (MachineBasicBlock *MBB : CurrentSCC) { for (MachineBasicBlock *Pred : MBB->predecessors()) { if (InSet.count(Pred)) continue; if (!Header) { Header = MBB; break; } // TODO: Implement multiple-entry loops. report_fatal_error("multiple-entry loops are not supported yet"); } } assert(MLI.isLoopHeader(Header)); InSet.clear(); } }
/// Return a set of basic blocks to insert sinked instructions. /// /// The returned set of basic blocks (BBsToSinkInto) should satisfy: /// /// * Inside the loop \p L /// * For each UseBB in \p UseBBs, there is at least one BB in BBsToSinkInto /// that domintates the UseBB /// * Has minimum total frequency that is no greater than preheader frequency /// /// The purpose of the function is to find the optimal sinking points to /// minimize execution cost, which is defined as "sum of frequency of /// BBsToSinkInto". /// As a result, the returned BBsToSinkInto needs to have minimum total /// frequency. /// Additionally, if the total frequency of BBsToSinkInto exceeds preheader /// frequency, the optimal solution is not sinking (return empty set). /// /// \p ColdLoopBBs is used to help find the optimal sinking locations. /// It stores a list of BBs that is: /// /// * Inside the loop \p L /// * Has a frequency no larger than the loop's preheader /// * Sorted by BB frequency /// /// The complexity of the function is O(UseBBs.size() * ColdLoopBBs.size()). /// To avoid expensive computation, we cap the maximum UseBBs.size() in its /// caller. static SmallPtrSet<BasicBlock *, 2> findBBsToSinkInto(const Loop &L, const SmallPtrSetImpl<BasicBlock *> &UseBBs, const SmallVectorImpl<BasicBlock *> &ColdLoopBBs, DominatorTree &DT, BlockFrequencyInfo &BFI) { SmallPtrSet<BasicBlock *, 2> BBsToSinkInto; if (UseBBs.size() == 0) return BBsToSinkInto; BBsToSinkInto.insert(UseBBs.begin(), UseBBs.end()); SmallPtrSet<BasicBlock *, 2> BBsDominatedByColdestBB; // For every iteration: // * Pick the ColdestBB from ColdLoopBBs // * Find the set BBsDominatedByColdestBB that satisfy: // - BBsDominatedByColdestBB is a subset of BBsToSinkInto // - Every BB in BBsDominatedByColdestBB is dominated by ColdestBB // * If Freq(ColdestBB) < Freq(BBsDominatedByColdestBB), remove // BBsDominatedByColdestBB from BBsToSinkInto, add ColdestBB to // BBsToSinkInto for (BasicBlock *ColdestBB : ColdLoopBBs) { BBsDominatedByColdestBB.clear(); for (BasicBlock *SinkedBB : BBsToSinkInto) if (DT.dominates(ColdestBB, SinkedBB)) BBsDominatedByColdestBB.insert(SinkedBB); if (BBsDominatedByColdestBB.size() == 0) continue; if (adjustedSumFreq(BBsDominatedByColdestBB, BFI) > BFI.getBlockFreq(ColdestBB)) { for (BasicBlock *DominatedBB : BBsDominatedByColdestBB) { BBsToSinkInto.erase(DominatedBB); } BBsToSinkInto.insert(ColdestBB); } } // If the total frequency of BBsToSinkInto is larger than preheader frequency, // do not sink. if (adjustedSumFreq(BBsToSinkInto, BFI) > BFI.getBlockFreq(L.getLoopPreheader())) BBsToSinkInto.clear(); return BBsToSinkInto; }
TEST(SmallPtrSetTest, GrowthTest) { int i; int buf[8]; for(i=0; i<8; ++i) buf[i]=0; SmallPtrSet<int *, 4> s; typedef SmallPtrSet<int *, 4>::iterator iter; s.insert(&buf[0]); s.insert(&buf[1]); s.insert(&buf[2]); s.insert(&buf[3]); EXPECT_EQ(4U, s.size()); i = 0; for(iter I=s.begin(), E=s.end(); I!=E; ++I, ++i) (**I)++; EXPECT_EQ(4, i); for(i=0; i<8; ++i) EXPECT_EQ(i<4?1:0,buf[i]); s.insert(&buf[4]); s.insert(&buf[5]); s.insert(&buf[6]); s.insert(&buf[7]); i = 0; for(iter I=s.begin(), E=s.end(); I!=E; ++I, ++i) (**I)++; EXPECT_EQ(8, i); s.erase(&buf[4]); s.erase(&buf[5]); s.erase(&buf[6]); s.erase(&buf[7]); EXPECT_EQ(4U, s.size()); i = 0; for(iter I=s.begin(), E=s.end(); I!=E; ++I, ++i) (**I)++; EXPECT_EQ(4, i); for(i=0; i<8; ++i) EXPECT_EQ(i<4?3:1,buf[i]); s.clear(); for(i=0; i<8; ++i) buf[i]=0; for(i=0; i<128; ++i) s.insert(&buf[i%8]); // test repeated entires EXPECT_EQ(8U, s.size()); for(iter I=s.begin(), E=s.end(); I!=E; ++I, ++i) (**I)++; for(i=0; i<8; ++i) EXPECT_EQ(1,buf[i]); }
bool PropagateJuliaAddrspaces::runOnFunction(Function &F) { visit(F); for (auto it : ToInsert) it.first->insertBefore(it.second); for (Instruction *I : ToDelete) I->eraseFromParent(); ToInsert.clear(); ToDelete.clear(); LiftingMap.clear(); Visited.clear(); return true; }
static void determineMissingVNIs(const SlotIndexes &Indexes, LiveInterval &LI) { SmallPtrSet<const MachineBasicBlock*, 5> Visited; LiveRange::iterator OutIt; VNInfo *PrevValNo = nullptr; for (LiveRange::iterator I = LI.begin(), E = LI.end(); I != E; ++I) { LiveRange::Segment &S = *I; // Determine final VNI if necessary. if (S.valno == nullptr) { // This can only happen at the begin of a basic block. assert(S.start.isBlock() && "valno should only be missing at block begin"); Visited.clear(); const MachineBasicBlock *MBB = Indexes.getMBBFromIndex(S.start); for (const MachineBasicBlock *Pred : MBB->predecessors()) { VNInfo *VNI = searchForVNI(Indexes, LI, Pred, Visited); if (VNI != nullptr) { S.valno = VNI; break; } } assert(S.valno != nullptr && "could not determine valno"); } // Merge with previous segment if it has the same VNI. if (PrevValNo == S.valno && OutIt->end == S.start) { OutIt->end = S.end; } else { // Didn't merge. Move OutIt to next segment. if (PrevValNo == nullptr) OutIt = LI.begin(); else ++OutIt; if (OutIt != I) *OutIt = *I; PrevValNo = S.valno; } } // If we merged some segments chop off the end. ++OutIt; LI.segments.erase(OutIt, LI.end()); }
static void determineMissingVNIs(const SlotIndexes &Indexes, LiveInterval &LI) { SmallPtrSet<const MachineBasicBlock*, 5> Visited; for (LiveRange::Segment &S : LI.segments) { if (S.valno != nullptr) continue; // This can only happen at the begin of a basic block. assert(S.start.isBlock() && "valno should only be missing at block begin"); Visited.clear(); const MachineBasicBlock *MBB = Indexes.getMBBFromIndex(S.start); for (const MachineBasicBlock *Pred : MBB->predecessors()) { VNInfo *VNI = searchForVNI(Indexes, LI, Pred, Visited); if (VNI != nullptr) { S.valno = VNI; break; } } assert(S.valno != nullptr && "could not determine valno"); } }
// Given a list of loads that could be constant-folded (LoadBaseAddresses), // estimate number of optimized instructions after substituting the concrete // values for the given Iteration. // Fill in SimplifiedInsns map for future use in DCE-estimation. unsigned EstimateNumberOfSimplifiedInsns(unsigned Iteration) { SmallVector<Instruction *, 8> Worklist; SimplifiedValues.clear(); CountedInsns.clear(); NumberOfOptimizedInstructions = 0; // We start by adding all loads to the worklist. for (auto LoadDescr : LoadBaseAddresses) { LoadInst *LI = LoadDescr.first; SimplifiedValues[LI] = computeLoadValue(LI, Iteration); if (CountedInsns.insert(LI).second) NumberOfOptimizedInstructions += TTI.getUserCost(LI); for (auto U : LI->users()) { Instruction *UI = dyn_cast<Instruction>(U); if (!UI) continue; if (!L->contains(UI)) continue; Worklist.push_back(UI); } } // And then we try to simplify every user of every instruction from the // worklist. If we do simplify a user, add it to the worklist to process // its users as well. while (!Worklist.empty()) { Instruction *I = Worklist.pop_back_val(); if (!visit(I)) continue; for (auto U : I->users()) { Instruction *UI = dyn_cast<Instruction>(U); if (!UI) continue; if (!L->contains(UI)) continue; Worklist.push_back(UI); } } return NumberOfOptimizedInstructions; }
bool OptimizeExts::runOnMachineFunction(MachineFunction &MF) { TM = &MF.getTarget(); TII = TM->getInstrInfo(); MRI = &MF.getRegInfo(); DT = Aggressive ? &getAnalysis<MachineDominatorTree>() : 0; bool Changed = false; SmallPtrSet<MachineInstr*, 8> LocalMIs; for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { MachineBasicBlock *MBB = &*I; LocalMIs.clear(); for (MachineBasicBlock::iterator MII = I->begin(), ME = I->end(); MII != ME; ++MII) { MachineInstr *MI = &*MII; Changed |= OptimizeInstr(MI, MBB, LocalMIs); } } return Changed; }
/// getMachineBasicBlocks - Populate given set using machine basic blocks which /// have machine instructions that belong to lexical scope identified by /// DebugLoc. void LexicalScopes:: getMachineBasicBlocks(DebugLoc DL, SmallPtrSet<const MachineBasicBlock*, 4> &MBBs) { MBBs.clear(); LexicalScope *Scope = getOrCreateLexicalScope(DL); if (!Scope) return; if (Scope == CurrentFnLexicalScope) { for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); I != E; ++I) MBBs.insert(I); return; } SmallVector<InsnRange, 4> &InsnRanges = Scope->getRanges(); for (SmallVector<InsnRange, 4>::iterator I = InsnRanges.begin(), E = InsnRanges.end(); I != E; ++I) { InsnRange &R = *I; MBBs.insert(R.first->getParent()); } }
vector<const TargetRegisterInfo*> ipaFindUsedReturns(ParameterRegistry& registry, Function& function, const vector<const TargetRegisterInfo*>& returns) { // Excuse entry points from not having callers; use every return. if (function.use_empty()) if (auto address = md::getVirtualAddress(function)) if (isEntryPoint(address->getLimitedValue())) { return returns; } // Otherwise, loop through callers and see which registers are used after the function call. TargetInfo& targetInfo = registry.getTargetInfo(); SmallPtrSet<MemoryPhi*, 4> visited; vector<const TargetRegisterInfo*> result; for (auto& use : function.uses()) { if (auto call = dyn_cast<CallInst>(use.getUser())) { auto parentFunction = call->getParent()->getParent(); if (parentFunction == &function) { // TODO: This isn't impossible to compute, just somewhat inconvenient. continue; } auto parentArgs = static_cast<Argument*>(parentFunction->arg_begin()); auto pointerType = dyn_cast<PointerType>(parentArgs->getType()); assert(pointerType != nullptr && pointerType->getTypeAtIndex(int(0))->getStructName() == "struct.x86_regs"); (void) pointerType; visited.clear(); MemorySSA& mssa = *registry.getMemorySSA(*parentFunction); findUsedReturns(returns, targetInfo, mssa, visited, *mssa.getMemoryAccess(call), result); } } return result; }
bool LoopInstSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { if (skipOptnoneFunction(L)) return false; DominatorTreeWrapperPass *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr; LoopInfo *LI = &getAnalysis<LoopInfo>(); DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr; const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>(); AssumptionTracker *AT = &getAnalysis<AssumptionTracker>(); SmallVector<BasicBlock*, 8> ExitBlocks; L->getUniqueExitBlocks(ExitBlocks); array_pod_sort(ExitBlocks.begin(), ExitBlocks.end()); SmallPtrSet<const Instruction*, 8> S1, S2, *ToSimplify = &S1, *Next = &S2; // The bit we are stealing from the pointer represents whether this basic // block is the header of a subloop, in which case we only process its phis. typedef PointerIntPair<BasicBlock*, 1> WorklistItem; SmallVector<WorklistItem, 16> VisitStack; SmallPtrSet<BasicBlock*, 32> Visited; bool Changed = false; bool LocalChanged; do { LocalChanged = false; VisitStack.clear(); Visited.clear(); VisitStack.push_back(WorklistItem(L->getHeader(), false)); while (!VisitStack.empty()) { WorklistItem Item = VisitStack.pop_back_val(); BasicBlock *BB = Item.getPointer(); bool IsSubloopHeader = Item.getInt(); // Simplify instructions in the current basic block. for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) { Instruction *I = BI++; // The first time through the loop ToSimplify is empty and we try to // simplify all instructions. On later iterations ToSimplify is not // empty and we only bother simplifying instructions that are in it. if (!ToSimplify->empty() && !ToSimplify->count(I)) continue; // Don't bother simplifying unused instructions. if (!I->use_empty()) { Value *V = SimplifyInstruction(I, DL, TLI, DT, AT); if (V && LI->replacementPreservesLCSSAForm(I, V)) { // Mark all uses for resimplification next time round the loop. for (User *U : I->users()) Next->insert(cast<Instruction>(U)); I->replaceAllUsesWith(V); LocalChanged = true; ++NumSimplified; } } bool res = RecursivelyDeleteTriviallyDeadInstructions(I, TLI); if (res) { // RecursivelyDeleteTriviallyDeadInstruction can remove // more than one instruction, so simply incrementing the // iterator does not work. When instructions get deleted // re-iterate instead. BI = BB->begin(); BE = BB->end(); LocalChanged |= res; } if (IsSubloopHeader && !isa<PHINode>(I)) break; } // Add all successors to the worklist, except for loop exit blocks and the // bodies of subloops. We visit the headers of loops so that we can process // their phis, but we contract the rest of the subloop body and only follow // edges leading back to the original loop. for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI) { BasicBlock *SuccBB = *SI; if (!Visited.insert(SuccBB).second) continue; const Loop *SuccLoop = LI->getLoopFor(SuccBB); if (SuccLoop && SuccLoop->getHeader() == SuccBB && L->contains(SuccLoop)) { VisitStack.push_back(WorklistItem(SuccBB, true)); SmallVector<BasicBlock*, 8> SubLoopExitBlocks; SuccLoop->getExitBlocks(SubLoopExitBlocks); for (unsigned i = 0; i < SubLoopExitBlocks.size(); ++i) { BasicBlock *ExitBB = SubLoopExitBlocks[i]; if (LI->getLoopFor(ExitBB) == L && Visited.insert(ExitBB).second) VisitStack.push_back(WorklistItem(ExitBB, false)); } continue; } bool IsExitBlock = std::binary_search(ExitBlocks.begin(), ExitBlocks.end(), SuccBB); if (IsExitBlock) continue; VisitStack.push_back(WorklistItem(SuccBB, false)); } } // Place the list of instructions to simplify on the next loop iteration // into ToSimplify. std::swap(ToSimplify, Next); Next->clear(); Changed |= LocalChanged; } while (LocalChanged); return Changed; }
/// processImplicitDefs - Process IMPLICIT_DEF instructions and make sure /// there is one implicit_def for each use. Add isUndef marker to /// implicit_def defs and their uses. bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) { DEBUG(dbgs() << "********** PROCESS IMPLICIT DEFS **********\n" << "********** Function: " << ((Value*)fn.getFunction())->getName() << '\n'); bool Changed = false; TII = fn.getTarget().getInstrInfo(); TRI = fn.getTarget().getRegisterInfo(); MRI = &fn.getRegInfo(); LV = &getAnalysis<LiveVariables>(); SmallSet<unsigned, 8> ImpDefRegs; SmallVector<MachineInstr*, 8> ImpDefMIs; SmallVector<MachineInstr*, 4> RUses; SmallPtrSet<MachineBasicBlock*,16> Visited; SmallPtrSet<MachineInstr*, 8> ModInsts; MachineBasicBlock *Entry = fn.begin(); for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*,16> > DFI = df_ext_begin(Entry, Visited), E = df_ext_end(Entry, Visited); DFI != E; ++DFI) { MachineBasicBlock *MBB = *DFI; for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ) { MachineInstr *MI = &*I; ++I; if (MI->isImplicitDef()) { ImpDefMIs.push_back(MI); // Is this a sub-register read-modify-write? if (MI->getOperand(0).readsReg()) continue; unsigned Reg = MI->getOperand(0).getReg(); ImpDefRegs.insert(Reg); if (TargetRegisterInfo::isPhysicalRegister(Reg)) { for (const unsigned *SS = TRI->getSubRegisters(Reg); *SS; ++SS) ImpDefRegs.insert(*SS); } continue; } // Eliminate %reg1032:sub<def> = COPY undef. if (MI->isCopy() && MI->getOperand(0).readsReg()) { MachineOperand &MO = MI->getOperand(1); if (MO.isUndef() || ImpDefRegs.count(MO.getReg())) { if (MO.isKill()) { LiveVariables::VarInfo& vi = LV->getVarInfo(MO.getReg()); vi.removeKill(MI); } unsigned Reg = MI->getOperand(0).getReg(); MI->eraseFromParent(); Changed = true; // A REG_SEQUENCE may have been expanded into partial definitions. // If this was the last one, mark Reg as implicitly defined. if (TargetRegisterInfo::isVirtualRegister(Reg) && MRI->def_empty(Reg)) ImpDefRegs.insert(Reg); continue; } } bool ChangedToImpDef = false; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand& MO = MI->getOperand(i); if (!MO.isReg() || !MO.readsReg()) continue; unsigned Reg = MO.getReg(); if (!Reg) continue; if (!ImpDefRegs.count(Reg)) continue; // Use is a copy, just turn it into an implicit_def. if (CanTurnIntoImplicitDef(MI, Reg, i, ImpDefRegs)) { bool isKill = MO.isKill(); MI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF)); for (int j = MI->getNumOperands() - 1, ee = 0; j > ee; --j) MI->RemoveOperand(j); if (isKill) { ImpDefRegs.erase(Reg); LiveVariables::VarInfo& vi = LV->getVarInfo(Reg); vi.removeKill(MI); } ChangedToImpDef = true; Changed = true; break; } Changed = true; MO.setIsUndef(); // This is a partial register redef of an implicit def. // Make sure the whole register is defined by the instruction. if (MO.isDef()) { MI->addRegisterDefined(Reg); continue; } if (MO.isKill() || MI->isRegTiedToDefOperand(i)) { // Make sure other reads of Reg are also marked <undef>. for (unsigned j = i+1; j != e; ++j) { MachineOperand &MOJ = MI->getOperand(j); if (MOJ.isReg() && MOJ.getReg() == Reg && MOJ.readsReg()) MOJ.setIsUndef(); } ImpDefRegs.erase(Reg); } } if (ChangedToImpDef) { // Backtrack to process this new implicit_def. --I; } else { for (unsigned i = 0; i != MI->getNumOperands(); ++i) { MachineOperand& MO = MI->getOperand(i); if (!MO.isReg() || !MO.isDef()) continue; ImpDefRegs.erase(MO.getReg()); } } } // Any outstanding liveout implicit_def's? for (unsigned i = 0, e = ImpDefMIs.size(); i != e; ++i) { MachineInstr *MI = ImpDefMIs[i]; unsigned Reg = MI->getOperand(0).getReg(); if (TargetRegisterInfo::isPhysicalRegister(Reg) || !ImpDefRegs.count(Reg)) { // Delete all "local" implicit_def's. That include those which define // physical registers since they cannot be liveout. MI->eraseFromParent(); Changed = true; continue; } // If there are multiple defs of the same register and at least one // is not an implicit_def, do not insert implicit_def's before the // uses. bool Skip = false; SmallVector<MachineInstr*, 4> DeadImpDefs; for (MachineRegisterInfo::def_iterator DI = MRI->def_begin(Reg), DE = MRI->def_end(); DI != DE; ++DI) { MachineInstr *DeadImpDef = &*DI; if (!DeadImpDef->isImplicitDef()) { Skip = true; break; } DeadImpDefs.push_back(DeadImpDef); } if (Skip) continue; // The only implicit_def which we want to keep are those that are live // out of its block. for (unsigned j = 0, ee = DeadImpDefs.size(); j != ee; ++j) DeadImpDefs[j]->eraseFromParent(); Changed = true; // Process each use instruction once. for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg), UE = MRI->use_end(); UI != UE; ++UI) { if (UI.getOperand().isUndef()) continue; MachineInstr *RMI = &*UI; if (ModInsts.insert(RMI)) RUses.push_back(RMI); } for (unsigned i = 0, e = RUses.size(); i != e; ++i) { MachineInstr *RMI = RUses[i]; // Turn a copy use into an implicit_def. if (isUndefCopy(RMI, Reg, ImpDefRegs)) { RMI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF)); bool isKill = false; SmallVector<unsigned, 4> Ops; for (unsigned j = 0, ee = RMI->getNumOperands(); j != ee; ++j) { MachineOperand &RRMO = RMI->getOperand(j); if (RRMO.isReg() && RRMO.getReg() == Reg) { Ops.push_back(j); if (RRMO.isKill()) isKill = true; } } // Leave the other operands along. for (unsigned j = 0, ee = Ops.size(); j != ee; ++j) { unsigned OpIdx = Ops[j]; RMI->RemoveOperand(OpIdx-j); } // Update LiveVariables varinfo if the instruction is a kill. if (isKill) { LiveVariables::VarInfo& vi = LV->getVarInfo(Reg); vi.removeKill(RMI); } continue; } // Replace Reg with a new vreg that's marked implicit. const TargetRegisterClass* RC = MRI->getRegClass(Reg); unsigned NewVReg = MRI->createVirtualRegister(RC); bool isKill = true; for (unsigned j = 0, ee = RMI->getNumOperands(); j != ee; ++j) { MachineOperand &RRMO = RMI->getOperand(j); if (RRMO.isReg() && RRMO.getReg() == Reg) { RRMO.setReg(NewVReg); RRMO.setIsUndef(); if (isKill) { // Only the first operand of NewVReg is marked kill. RRMO.setIsKill(); isKill = false; } } } } RUses.clear(); ModInsts.clear(); } ImpDefRegs.clear(); ImpDefMIs.clear(); } return Changed; }
int Compilation::performJobsImpl() { // Create a TaskQueue for execution. std::unique_ptr<TaskQueue> TQ; if (SkipTaskExecution) TQ.reset(new DummyTaskQueue(NumberOfParallelCommands)); else TQ.reset(new TaskQueue(NumberOfParallelCommands)); PerformJobsState State; using DependencyGraph = DependencyGraph<const Job *>; DependencyGraph DepGraph; SmallPtrSet<const Job *, 16> DeferredCommands; SmallVector<const Job *, 16> InitialOutOfDateCommands; DependencyGraph::MarkTracer ActualIncrementalTracer; DependencyGraph::MarkTracer *IncrementalTracer = nullptr; if (ShowIncrementalBuildDecisions) IncrementalTracer = &ActualIncrementalTracer; auto noteBuilding = [&] (const Job *cmd, StringRef reason) { if (!ShowIncrementalBuildDecisions) return; if (State.ScheduledCommands.count(cmd)) return; llvm::outs() << "Queuing " << llvm::sys::path::filename(cmd->getOutput().getBaseInput(0)) << " " << reason << "\n"; IncrementalTracer->printPath(llvm::outs(), cmd, [](raw_ostream &out, const Job *base) { out << llvm::sys::path::filename(base->getOutput().getBaseInput(0)); }); }; // Set up scheduleCommandIfNecessaryAndPossible. // This will only schedule the given command if it has not been scheduled // and if all of its inputs are in FinishedCommands. auto scheduleCommandIfNecessaryAndPossible = [&] (const Job *Cmd) { if (State.ScheduledCommands.count(Cmd)) return; if (auto Blocking = findUnfinishedJob(Cmd->getInputs(), State.FinishedCommands)) { State.BlockingCommands[Blocking].push_back(Cmd); return; } // FIXME: Failing here should not take down the whole process. bool success = writeFilelistIfNecessary(Cmd, Diags); assert(success && "failed to write filelist"); (void)success; assert(Cmd->getExtraEnvironment().empty() && "not implemented for compilations with multiple jobs"); State.ScheduledCommands.insert(Cmd); TQ->addTask(Cmd->getExecutable(), Cmd->getArguments(), llvm::None, (void *)Cmd); }; // When a task finishes, we need to reevaluate the other commands that // might have been blocked. auto markFinished = [&] (const Job *Cmd) { State.FinishedCommands.insert(Cmd); auto BlockedIter = State.BlockingCommands.find(Cmd); if (BlockedIter != State.BlockingCommands.end()) { auto AllBlocked = std::move(BlockedIter->second); State.BlockingCommands.erase(BlockedIter); for (auto *Blocked : AllBlocked) scheduleCommandIfNecessaryAndPossible(Blocked); } }; // Schedule all jobs we can. for (const Job *Cmd : getJobs()) { if (!getIncrementalBuildEnabled()) { scheduleCommandIfNecessaryAndPossible(Cmd); continue; } // Try to load the dependencies file for this job. If there isn't one, we // always have to run the job, but it doesn't affect any other jobs. If // there should be one but it's not present or can't be loaded, we have to // run all the jobs. // FIXME: We can probably do better here! Job::Condition Condition = Job::Condition::Always; StringRef DependenciesFile = Cmd->getOutput().getAdditionalOutputForType(types::TY_SwiftDeps); if (!DependenciesFile.empty()) { if (Cmd->getCondition() == Job::Condition::NewlyAdded) { DepGraph.addIndependentNode(Cmd); } else { switch (DepGraph.loadFromPath(Cmd, DependenciesFile)) { case DependencyGraphImpl::LoadResult::HadError: disableIncrementalBuild(); for (const Job *Cmd : DeferredCommands) scheduleCommandIfNecessaryAndPossible(Cmd); DeferredCommands.clear(); break; case DependencyGraphImpl::LoadResult::UpToDate: Condition = Cmd->getCondition(); break; case DependencyGraphImpl::LoadResult::AffectsDownstream: llvm_unreachable("we haven't marked anything in this graph yet"); } } } switch (Condition) { case Job::Condition::Always: if (getIncrementalBuildEnabled() && !DependenciesFile.empty()) { InitialOutOfDateCommands.push_back(Cmd); DepGraph.markIntransitive(Cmd); } SWIFT_FALLTHROUGH; case Job::Condition::RunWithoutCascading: noteBuilding(Cmd, "(initial)"); scheduleCommandIfNecessaryAndPossible(Cmd); break; case Job::Condition::CheckDependencies: DeferredCommands.insert(Cmd); break; case Job::Condition::NewlyAdded: llvm_unreachable("handled above"); } } if (getIncrementalBuildEnabled()) { SmallVector<const Job *, 16> AdditionalOutOfDateCommands; // We scheduled all of the files that have actually changed. Now add the // files that haven't changed, so that they'll get built in parallel if // possible and after the first set of files if it's not. for (auto *Cmd : InitialOutOfDateCommands) { DepGraph.markTransitive(AdditionalOutOfDateCommands, Cmd, IncrementalTracer); } for (auto *transitiveCmd : AdditionalOutOfDateCommands) noteBuilding(transitiveCmd, "because of the initial set:"); size_t firstSize = AdditionalOutOfDateCommands.size(); // Check all cross-module dependencies as well. for (StringRef dependency : DepGraph.getExternalDependencies()) { llvm::sys::fs::file_status depStatus; if (!llvm::sys::fs::status(dependency, depStatus)) if (depStatus.getLastModificationTime() < LastBuildTime) continue; // If the dependency has been modified since the oldest built file, // or if we can't stat it for some reason (perhaps it's been deleted?), // trigger rebuilds through the dependency graph. DepGraph.markExternal(AdditionalOutOfDateCommands, dependency); } for (auto *externalCmd : llvm::makeArrayRef(AdditionalOutOfDateCommands).slice(firstSize)) { noteBuilding(externalCmd, "because of external dependencies"); } for (auto *AdditionalCmd : AdditionalOutOfDateCommands) { if (!DeferredCommands.count(AdditionalCmd)) continue; scheduleCommandIfNecessaryAndPossible(AdditionalCmd); DeferredCommands.erase(AdditionalCmd); } } int Result = EXIT_SUCCESS; // Set up a callback which will be called immediately after a task has // started. This callback may be used to provide output indicating that the // task began. auto taskBegan = [this] (ProcessId Pid, void *Context) { // TODO: properly handle task began. const Job *BeganCmd = (const Job *)Context; // For verbose output, print out each command as it begins execution. if (Level == OutputLevel::Verbose) BeganCmd->printCommandLine(llvm::errs()); else if (Level == OutputLevel::Parseable) parseable_output::emitBeganMessage(llvm::errs(), *BeganCmd, Pid); }; // Set up a callback which will be called immediately after a task has // finished execution. This callback should determine if execution should // continue (if execution should stop, this callback should return true), and // it should also schedule any additional commands which we now know need // to run. auto taskFinished = [&] (ProcessId Pid, int ReturnCode, StringRef Output, void *Context) -> TaskFinishedResponse { const Job *FinishedCmd = (const Job *)Context; if (Level == OutputLevel::Parseable) { // Parseable output was requested. parseable_output::emitFinishedMessage(llvm::errs(), *FinishedCmd, Pid, ReturnCode, Output); } else { // Otherwise, send the buffered output to stderr, though only if we // support getting buffered output. if (TaskQueue::supportsBufferingOutput()) llvm::errs() << Output; } if (ReturnCode != EXIT_SUCCESS) { // The task failed, so return true without performing any further // dependency analysis. // Store this task's ReturnCode as our Result if we haven't stored // anything yet. if (Result == EXIT_SUCCESS) Result = ReturnCode; if (!isa<CompileJobAction>(FinishedCmd->getSource()) || ReturnCode != EXIT_FAILURE) { Diags.diagnose(SourceLoc(), diag::error_command_failed, FinishedCmd->getSource().getClassName(), ReturnCode); } return ContinueBuildingAfterErrors ? TaskFinishedResponse::ContinueExecution : TaskFinishedResponse::StopExecution; } // When a task finishes, we need to reevaluate the other commands that // might have been blocked. markFinished(FinishedCmd); // In order to handle both old dependencies that have disappeared and new // dependencies that have arisen, we need to reload the dependency file. if (getIncrementalBuildEnabled()) { const CommandOutput &Output = FinishedCmd->getOutput(); StringRef DependenciesFile = Output.getAdditionalOutputForType(types::TY_SwiftDeps); if (!DependenciesFile.empty()) { SmallVector<const Job *, 16> Dependents; bool wasCascading = DepGraph.isMarked(FinishedCmd); switch (DepGraph.loadFromPath(FinishedCmd, DependenciesFile)) { case DependencyGraphImpl::LoadResult::HadError: disableIncrementalBuild(); for (const Job *Cmd : DeferredCommands) scheduleCommandIfNecessaryAndPossible(Cmd); DeferredCommands.clear(); Dependents.clear(); break; case DependencyGraphImpl::LoadResult::UpToDate: if (!wasCascading) break; SWIFT_FALLTHROUGH; case DependencyGraphImpl::LoadResult::AffectsDownstream: DepGraph.markTransitive(Dependents, FinishedCmd); break; } for (const Job *Cmd : Dependents) { DeferredCommands.erase(Cmd); noteBuilding(Cmd, "because of dependencies discovered later"); scheduleCommandIfNecessaryAndPossible(Cmd); } } } return TaskFinishedResponse::ContinueExecution; }; auto taskSignalled = [&] (ProcessId Pid, StringRef ErrorMsg, StringRef Output, void *Context) -> TaskFinishedResponse { const Job *SignalledCmd = (const Job *)Context; if (Level == OutputLevel::Parseable) { // Parseable output was requested. parseable_output::emitSignalledMessage(llvm::errs(), *SignalledCmd, Pid, ErrorMsg, Output); } else { // Otherwise, send the buffered output to stderr, though only if we // support getting buffered output. if (TaskQueue::supportsBufferingOutput()) llvm::errs() << Output; } if (!ErrorMsg.empty()) Diags.diagnose(SourceLoc(), diag::error_unable_to_execute_command, ErrorMsg); Diags.diagnose(SourceLoc(), diag::error_command_signalled, SignalledCmd->getSource().getClassName()); // Since the task signalled, unconditionally set result to -2. Result = -2; return TaskFinishedResponse::StopExecution; }; do { // Ask the TaskQueue to execute. TQ->execute(taskBegan, taskFinished, taskSignalled); // Mark all remaining deferred commands as skipped. for (const Job *Cmd : DeferredCommands) { if (Level == OutputLevel::Parseable) { // Provide output indicating this command was skipped if parseable output // was requested. parseable_output::emitSkippedMessage(llvm::errs(), *Cmd); } State.ScheduledCommands.insert(Cmd); markFinished(Cmd); } // ...which may allow us to go on and do later tasks. } while (Result == 0 && TQ->hasRemainingTasks()); if (Result == 0) { assert(State.BlockingCommands.empty() && "some blocking commands never finished properly"); } else { // Make sure we record any files that still need to be rebuilt. for (const Job *Cmd : getJobs()) { // Skip files that don't use dependency analysis. StringRef DependenciesFile = Cmd->getOutput().getAdditionalOutputForType(types::TY_SwiftDeps); if (DependenciesFile.empty()) continue; // Don't worry about commands that finished or weren't going to run. if (State.FinishedCommands.count(Cmd)) continue; if (!State.ScheduledCommands.count(Cmd)) continue; bool isCascading = true; if (getIncrementalBuildEnabled()) isCascading = DepGraph.isMarked(Cmd); State.UnfinishedCommands.insert({Cmd, isCascading}); } } if (!CompilationRecordPath.empty() && !SkipTaskExecution) { InputInfoMap InputInfo; populateInputInfoMap(InputInfo, State); checkForOutOfDateInputs(Diags, InputInfo); writeCompilationRecord(CompilationRecordPath, ArgsHash, BuildStartTime, InputInfo); } if (Result == 0) Result = Diags.hadAnyError(); return Result; }
bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { if (DisablePeephole) return false; TM = &MF.getTarget(); TII = TM->getInstrInfo(); MRI = &MF.getRegInfo(); DT = Aggressive ? &getAnalysis<MachineDominatorTree>() : 0; bool Changed = false; SmallPtrSet<MachineInstr*, 8> LocalMIs; SmallSet<unsigned, 4> ImmDefRegs; DenseMap<unsigned, MachineInstr*> ImmDefMIs; unsigned FoldAsLoadDefReg; for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { MachineBasicBlock *MBB = &*I; bool SeenMoveImm = false; LocalMIs.clear(); ImmDefRegs.clear(); ImmDefMIs.clear(); FoldAsLoadDefReg = 0; for (MachineBasicBlock::iterator MII = I->begin(), MIE = I->end(); MII != MIE; ) { MachineInstr *MI = &*MII; // We may be erasing MI below, increment MII now. ++MII; LocalMIs.insert(MI); // If there exists an instruction which belongs to the following // categories, we will discard the load candidate. if (MI->isLabel() || MI->isPHI() || MI->isImplicitDef() || MI->isKill() || MI->isInlineAsm() || MI->isDebugValue() || MI->hasUnmodeledSideEffects()) { FoldAsLoadDefReg = 0; continue; } if (MI->mayStore() || MI->isCall()) FoldAsLoadDefReg = 0; if ((MI->isBitcast() && optimizeBitcastInstr(MI, MBB)) || (MI->isCompare() && optimizeCmpInstr(MI, MBB)) || (MI->isSelect() && optimizeSelect(MI))) { // MI is deleted. LocalMIs.erase(MI); Changed = true; continue; } if (isMoveImmediate(MI, ImmDefRegs, ImmDefMIs)) { SeenMoveImm = true; } else { Changed |= optimizeExtInstr(MI, MBB, LocalMIs); // optimizeExtInstr might have created new instructions after MI // and before the already incremented MII. Adjust MII so that the // next iteration sees the new instructions. MII = MI; ++MII; if (SeenMoveImm) Changed |= foldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs); } // Check whether MI is a load candidate for folding into a later // instruction. If MI is not a candidate, check whether we can fold an // earlier load into MI. if (!isLoadFoldable(MI, FoldAsLoadDefReg) && FoldAsLoadDefReg) { // We need to fold load after optimizeCmpInstr, since optimizeCmpInstr // can enable folding by converting SUB to CMP. MachineInstr *DefMI = 0; MachineInstr *FoldMI = TII->optimizeLoadInstr(MI, MRI, FoldAsLoadDefReg, DefMI); if (FoldMI) { // Update LocalMIs since we replaced MI with FoldMI and deleted DefMI. LocalMIs.erase(MI); LocalMIs.erase(DefMI); LocalMIs.insert(FoldMI); MI->eraseFromParent(); DefMI->eraseFromParent(); ++NumLoadFold; // MI is replaced with FoldMI. Changed = true; continue; } } } } return Changed; }
/// ClusterNeighboringLoads - Force nearby loads together by "flagging" them. /// This function finds loads of the same base and different offsets. If the /// offsets are not far apart (target specific), it add MVT::Flag inputs and /// outputs to ensure they are scheduled together and in order. This /// optimization may benefit some targets by improving cache locality. void ScheduleDAGSDNodes::ClusterNeighboringLoads() { SmallPtrSet<SDNode*, 16> Visited; SmallVector<int64_t, 4> Offsets; DenseMap<long long, SDNode*> O2SMap; // Map from offset to SDNode. for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(), E = DAG->allnodes_end(); NI != E; ++NI) { SDNode *Node = &*NI; if (!Node || !Node->isMachineOpcode()) continue; unsigned Opc = Node->getMachineOpcode(); const TargetInstrDesc &TID = TII->get(Opc); if (!TID.mayLoad()) continue; SDNode *Chain = 0; unsigned NumOps = Node->getNumOperands(); if (Node->getOperand(NumOps-1).getValueType() == MVT::Other) Chain = Node->getOperand(NumOps-1).getNode(); if (!Chain) continue; // Look for other loads of the same chain. Find loads that are loading from // the same base pointer and different offsets. Visited.clear(); Offsets.clear(); O2SMap.clear(); bool Cluster = false; SDNode *Base = Node; int64_t BaseOffset; for (SDNode::use_iterator I = Chain->use_begin(), E = Chain->use_end(); I != E; ++I) { SDNode *User = *I; if (User == Node || !Visited.insert(User)) continue; int64_t Offset1, Offset2; if (!TII->areLoadsFromSameBasePtr(Base, User, Offset1, Offset2) || Offset1 == Offset2) // FIXME: Should be ok if they addresses are identical. But earlier // optimizations really should have eliminated one of the loads. continue; if (O2SMap.insert(std::make_pair(Offset1, Base)).second) Offsets.push_back(Offset1); O2SMap.insert(std::make_pair(Offset2, User)); Offsets.push_back(Offset2); if (Offset2 < Offset1) { Base = User; BaseOffset = Offset2; } else { BaseOffset = Offset1; } Cluster = true; } if (!Cluster) continue; // Sort them in increasing order. std::sort(Offsets.begin(), Offsets.end()); // Check if the loads are close enough. SmallVector<SDNode*, 4> Loads; unsigned NumLoads = 0; int64_t BaseOff = Offsets[0]; SDNode *BaseLoad = O2SMap[BaseOff]; Loads.push_back(BaseLoad); for (unsigned i = 1, e = Offsets.size(); i != e; ++i) { int64_t Offset = Offsets[i]; SDNode *Load = O2SMap[Offset]; if (!TII->shouldScheduleLoadsNear(BaseLoad, Load, BaseOff, Offset, NumLoads)) break; // Stop right here. Ignore loads that are further away. Loads.push_back(Load); ++NumLoads; } if (NumLoads == 0) continue; // Cluster loads by adding MVT::Flag outputs and inputs. This also // ensure they are scheduled in order of increasing addresses. SDNode *Lead = Loads[0]; AddFlags(Lead, SDValue(0,0), true, DAG); SDValue InFlag = SDValue(Lead, Lead->getNumValues()-1); for (unsigned i = 1, e = Loads.size(); i != e; ++i) { bool OutFlag = i < e-1; SDNode *Load = Loads[i]; AddFlags(Load, InFlag, OutFlag, DAG); if (OutFlag) InFlag = SDValue(Load, Load->getNumValues()-1); ++LoadsClustered; } } }
/// Given \p BBs as input, find another set of BBs which collectively /// dominates \p BBs and have the minimal sum of frequencies. Return the BB /// set found in \p BBs. static void findBestInsertionSet(DominatorTree &DT, BlockFrequencyInfo &BFI, BasicBlock *Entry, SmallPtrSet<BasicBlock *, 8> &BBs) { assert(!BBs.count(Entry) && "Assume Entry is not in BBs"); // Nodes on the current path to the root. SmallPtrSet<BasicBlock *, 8> Path; // Candidates includes any block 'BB' in set 'BBs' that is not strictly // dominated by any other blocks in set 'BBs', and all nodes in the path // in the dominator tree from Entry to 'BB'. SmallPtrSet<BasicBlock *, 16> Candidates; for (auto BB : BBs) { // Ignore unreachable basic blocks. if (!DT.isReachableFromEntry(BB)) continue; Path.clear(); // Walk up the dominator tree until Entry or another BB in BBs // is reached. Insert the nodes on the way to the Path. BasicBlock *Node = BB; // The "Path" is a candidate path to be added into Candidates set. bool isCandidate = false; do { Path.insert(Node); if (Node == Entry || Candidates.count(Node)) { isCandidate = true; break; } assert(DT.getNode(Node)->getIDom() && "Entry doens't dominate current Node"); Node = DT.getNode(Node)->getIDom()->getBlock(); } while (!BBs.count(Node)); // If isCandidate is false, Node is another Block in BBs dominating // current 'BB'. Drop the nodes on the Path. if (!isCandidate) continue; // Add nodes on the Path into Candidates. Candidates.insert(Path.begin(), Path.end()); } // Sort the nodes in Candidates in top-down order and save the nodes // in Orders. unsigned Idx = 0; SmallVector<BasicBlock *, 16> Orders; Orders.push_back(Entry); while (Idx != Orders.size()) { BasicBlock *Node = Orders[Idx++]; for (auto ChildDomNode : DT.getNode(Node)->getChildren()) { if (Candidates.count(ChildDomNode->getBlock())) Orders.push_back(ChildDomNode->getBlock()); } } // Visit Orders in bottom-up order. using InsertPtsCostPair = std::pair<SmallPtrSet<BasicBlock *, 16>, BlockFrequency>; // InsertPtsMap is a map from a BB to the best insertion points for the // subtree of BB (subtree not including the BB itself). DenseMap<BasicBlock *, InsertPtsCostPair> InsertPtsMap; InsertPtsMap.reserve(Orders.size() + 1); for (auto RIt = Orders.rbegin(); RIt != Orders.rend(); RIt++) { BasicBlock *Node = *RIt; bool NodeInBBs = BBs.count(Node); SmallPtrSet<BasicBlock *, 16> &InsertPts = InsertPtsMap[Node].first; BlockFrequency &InsertPtsFreq = InsertPtsMap[Node].second; // Return the optimal insert points in BBs. if (Node == Entry) { BBs.clear(); if (InsertPtsFreq > BFI.getBlockFreq(Node) || (InsertPtsFreq == BFI.getBlockFreq(Node) && InsertPts.size() > 1)) BBs.insert(Entry); else BBs.insert(InsertPts.begin(), InsertPts.end()); break; } BasicBlock *Parent = DT.getNode(Node)->getIDom()->getBlock(); // Initially, ParentInsertPts is empty and ParentPtsFreq is 0. Every child // will update its parent's ParentInsertPts and ParentPtsFreq. SmallPtrSet<BasicBlock *, 16> &ParentInsertPts = InsertPtsMap[Parent].first; BlockFrequency &ParentPtsFreq = InsertPtsMap[Parent].second; // Choose to insert in Node or in subtree of Node. // Don't hoist to EHPad because we may not find a proper place to insert // in EHPad. // If the total frequency of InsertPts is the same as the frequency of the // target Node, and InsertPts contains more than one nodes, choose hoisting // to reduce code size. if (NodeInBBs || (!Node->isEHPad() && (InsertPtsFreq > BFI.getBlockFreq(Node) || (InsertPtsFreq == BFI.getBlockFreq(Node) && InsertPts.size() > 1)))) { ParentInsertPts.insert(Node); ParentPtsFreq += BFI.getBlockFreq(Node); } else { ParentInsertPts.insert(InsertPts.begin(), InsertPts.end()); ParentPtsFreq += InsertPtsFreq; } } }
static bool simplifyLoopInst(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, const TargetLibraryInfo &TLI, MemorySSAUpdater *MSSAU) { const DataLayout &DL = L.getHeader()->getModule()->getDataLayout(); SimplifyQuery SQ(DL, &TLI, &DT, &AC); // On the first pass over the loop body we try to simplify every instruction. // On subsequent passes, we can restrict this to only simplifying instructions // where the inputs have been updated. We end up needing two sets: one // containing the instructions we are simplifying in *this* pass, and one for // the instructions we will want to simplify in the *next* pass. We use // pointers so we can swap between two stably allocated sets. SmallPtrSet<const Instruction *, 8> S1, S2, *ToSimplify = &S1, *Next = &S2; // Track the PHI nodes that have already been visited during each iteration so // that we can identify when it is necessary to iterate. SmallPtrSet<PHINode *, 4> VisitedPHIs; // While simplifying we may discover dead code or cause code to become dead. // Keep track of all such instructions and we will delete them at the end. SmallVector<Instruction *, 8> DeadInsts; // First we want to create an RPO traversal of the loop body. By processing in // RPO we can ensure that definitions are processed prior to uses (for non PHI // uses) in all cases. This ensures we maximize the simplifications in each // iteration over the loop and minimizes the possible causes for continuing to // iterate. LoopBlocksRPO RPOT(&L); RPOT.perform(&LI); MemorySSA *MSSA = MSSAU ? MSSAU->getMemorySSA() : nullptr; bool Changed = false; for (;;) { if (MSSAU && VerifyMemorySSA) MSSA->verifyMemorySSA(); for (BasicBlock *BB : RPOT) { for (Instruction &I : *BB) { if (auto *PI = dyn_cast<PHINode>(&I)) VisitedPHIs.insert(PI); if (I.use_empty()) { if (isInstructionTriviallyDead(&I, &TLI)) DeadInsts.push_back(&I); continue; } // We special case the first iteration which we can detect due to the // empty `ToSimplify` set. bool IsFirstIteration = ToSimplify->empty(); if (!IsFirstIteration && !ToSimplify->count(&I)) continue; Value *V = SimplifyInstruction(&I, SQ.getWithInstruction(&I)); if (!V || !LI.replacementPreservesLCSSAForm(&I, V)) continue; for (Value::use_iterator UI = I.use_begin(), UE = I.use_end(); UI != UE;) { Use &U = *UI++; auto *UserI = cast<Instruction>(U.getUser()); U.set(V); // If the instruction is used by a PHI node we have already processed // we'll need to iterate on the loop body to converge, so add it to // the next set. if (auto *UserPI = dyn_cast<PHINode>(UserI)) if (VisitedPHIs.count(UserPI)) { Next->insert(UserPI); continue; } // If we are only simplifying targeted instructions and the user is an // instruction in the loop body, add it to our set of targeted // instructions. Because we process defs before uses (outside of PHIs) // we won't have visited it yet. // // We also skip any uses outside of the loop being simplified. Those // should always be PHI nodes due to LCSSA form, and we don't want to // try to simplify those away. assert((L.contains(UserI) || isa<PHINode>(UserI)) && "Uses outside the loop should be PHI nodes due to LCSSA!"); if (!IsFirstIteration && L.contains(UserI)) ToSimplify->insert(UserI); } if (MSSAU) if (Instruction *SimpleI = dyn_cast_or_null<Instruction>(V)) if (MemoryAccess *MA = MSSA->getMemoryAccess(&I)) if (MemoryAccess *ReplacementMA = MSSA->getMemoryAccess(SimpleI)) MA->replaceAllUsesWith(ReplacementMA); assert(I.use_empty() && "Should always have replaced all uses!"); if (isInstructionTriviallyDead(&I, &TLI)) DeadInsts.push_back(&I); ++NumSimplified; Changed = true; } } // Delete any dead instructions found thus far now that we've finished an // iteration over all instructions in all the loop blocks. if (!DeadInsts.empty()) { Changed = true; RecursivelyDeleteTriviallyDeadInstructions(DeadInsts, &TLI, MSSAU); } if (MSSAU && VerifyMemorySSA) MSSA->verifyMemorySSA(); // If we never found a PHI that needs to be simplified in the next // iteration, we're done. if (Next->empty()) break; // Otherwise, put the next set in place for the next iteration and reset it // and the visited PHIs for that iteration. std::swap(Next, ToSimplify); Next->clear(); VisitedPHIs.clear(); DeadInsts.clear(); } return Changed; }
bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { if (DisablePeephole) return false; TM = &MF.getTarget(); TII = TM->getInstrInfo(); MRI = &MF.getRegInfo(); DT = Aggressive ? &getAnalysis<MachineDominatorTree>() : 0; bool Changed = false; SmallPtrSet<MachineInstr*, 8> LocalMIs; SmallSet<unsigned, 4> ImmDefRegs; DenseMap<unsigned, MachineInstr*> ImmDefMIs; for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { MachineBasicBlock *MBB = &*I; bool SeenMoveImm = false; LocalMIs.clear(); ImmDefRegs.clear(); ImmDefMIs.clear(); bool First = true; MachineBasicBlock::iterator PMII; for (MachineBasicBlock::iterator MII = I->begin(), MIE = I->end(); MII != MIE; ) { MachineInstr *MI = &*MII; LocalMIs.insert(MI); if (MI->isLabel() || MI->isPHI() || MI->isImplicitDef() || MI->isKill() || MI->isInlineAsm() || MI->isDebugValue() || MI->hasUnmodeledSideEffects()) { ++MII; continue; } if (MI->isBitcast()) { if (optimizeBitcastInstr(MI, MBB)) { // MI is deleted. LocalMIs.erase(MI); Changed = true; MII = First ? I->begin() : llvm::next(PMII); continue; } } else if (MI->isCompare()) { if (optimizeCmpInstr(MI, MBB)) { // MI is deleted. LocalMIs.erase(MI); Changed = true; MII = First ? I->begin() : llvm::next(PMII); continue; } } if (isMoveImmediate(MI, ImmDefRegs, ImmDefMIs)) { SeenMoveImm = true; } else { Changed |= optimizeExtInstr(MI, MBB, LocalMIs); if (SeenMoveImm) Changed |= foldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs); } First = false; PMII = MII; ++MII; } } return Changed; }
/// runOnMachineFunction - Reduce two-address instructions to two operands. /// bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { DEBUG(errs() << "Machine Function\n"); const TargetMachine &TM = MF.getTarget(); MRI = &MF.getRegInfo(); TII = TM.getInstrInfo(); TRI = TM.getRegisterInfo(); LV = getAnalysisIfAvailable<LiveVariables>(); AA = &getAnalysis<AliasAnalysis>(); bool MadeChange = false; DEBUG(errs() << "********** REWRITING TWO-ADDR INSTRS **********\n"); DEBUG(errs() << "********** Function: " << MF.getFunction()->getName() << '\n'); // ReMatRegs - Keep track of the registers whose def's are remat'ed. BitVector ReMatRegs; ReMatRegs.resize(MRI->getLastVirtReg()+1); typedef DenseMap<unsigned, SmallVector<std::pair<unsigned, unsigned>, 4> > TiedOperandMap; TiedOperandMap TiedOperands(4); SmallPtrSet<MachineInstr*, 8> Processed; for (MachineFunction::iterator mbbi = MF.begin(), mbbe = MF.end(); mbbi != mbbe; ++mbbi) { unsigned Dist = 0; DistanceMap.clear(); SrcRegMap.clear(); DstRegMap.clear(); Processed.clear(); for (MachineBasicBlock::iterator mi = mbbi->begin(), me = mbbi->end(); mi != me; ) { MachineBasicBlock::iterator nmi = next(mi); const TargetInstrDesc &TID = mi->getDesc(); bool FirstTied = true; DistanceMap.insert(std::make_pair(mi, ++Dist)); ProcessCopy(&*mi, &*mbbi, Processed); // First scan through all the tied register uses in this instruction // and record a list of pairs of tied operands for each register. unsigned NumOps = (mi->getOpcode() == TargetInstrInfo::INLINEASM) ? mi->getNumOperands() : TID.getNumOperands(); for (unsigned SrcIdx = 0; SrcIdx < NumOps; ++SrcIdx) { unsigned DstIdx = 0; if (!mi->isRegTiedToDefOperand(SrcIdx, &DstIdx)) continue; if (FirstTied) { FirstTied = false; ++NumTwoAddressInstrs; DEBUG(errs() << '\t' << *mi); } assert(mi->getOperand(SrcIdx).isReg() && mi->getOperand(SrcIdx).getReg() && mi->getOperand(SrcIdx).isUse() && "two address instruction invalid"); unsigned regB = mi->getOperand(SrcIdx).getReg(); TiedOperandMap::iterator OI = TiedOperands.find(regB); if (OI == TiedOperands.end()) { SmallVector<std::pair<unsigned, unsigned>, 4> TiedPair; OI = TiedOperands.insert(std::make_pair(regB, TiedPair)).first; } OI->second.push_back(std::make_pair(SrcIdx, DstIdx)); } // Now iterate over the information collected above. for (TiedOperandMap::iterator OI = TiedOperands.begin(), OE = TiedOperands.end(); OI != OE; ++OI) { SmallVector<std::pair<unsigned, unsigned>, 4> &TiedPairs = OI->second; // If the instruction has a single pair of tied operands, try some // transformations that may either eliminate the tied operands or // improve the opportunities for coalescing away the register copy. if (TiedOperands.size() == 1 && TiedPairs.size() == 1) { unsigned SrcIdx = TiedPairs[0].first; unsigned DstIdx = TiedPairs[0].second; // If the registers are already equal, nothing needs to be done. if (mi->getOperand(SrcIdx).getReg() == mi->getOperand(DstIdx).getReg()) break; // Done with this instruction. if (TryInstructionTransform(mi, nmi, mbbi, SrcIdx, DstIdx, Dist)) break; // The tied operands have been eliminated. } bool RemovedKillFlag = false; bool AllUsesCopied = true; unsigned LastCopiedReg = 0; unsigned regB = OI->first; for (unsigned tpi = 0, tpe = TiedPairs.size(); tpi != tpe; ++tpi) { unsigned SrcIdx = TiedPairs[tpi].first; unsigned DstIdx = TiedPairs[tpi].second; unsigned regA = mi->getOperand(DstIdx).getReg(); // Grab regB from the instruction because it may have changed if the // instruction was commuted. regB = mi->getOperand(SrcIdx).getReg(); if (regA == regB) { // The register is tied to multiple destinations (or else we would // not have continued this far), but this use of the register // already matches the tied destination. Leave it. AllUsesCopied = false; continue; } LastCopiedReg = regA; assert(TargetRegisterInfo::isVirtualRegister(regB) && "cannot make instruction into two-address form"); #ifndef NDEBUG // First, verify that we don't have a use of "a" in the instruction // (a = b + a for example) because our transformation will not // work. This should never occur because we are in SSA form. for (unsigned i = 0; i != mi->getNumOperands(); ++i) assert(i == DstIdx || !mi->getOperand(i).isReg() || mi->getOperand(i).getReg() != regA); #endif // Emit a copy or rematerialize the definition. const TargetRegisterClass *rc = MRI->getRegClass(regB); MachineInstr *DefMI = MRI->getVRegDef(regB); // If it's safe and profitable, remat the definition instead of // copying it. if (DefMI && DefMI->getDesc().isAsCheapAsAMove() && DefMI->isSafeToReMat(TII, regB, AA) && isProfitableToReMat(regB, rc, mi, DefMI, mbbi, Dist)){ DEBUG(errs() << "2addr: REMATTING : " << *DefMI << "\n"); unsigned regASubIdx = mi->getOperand(DstIdx).getSubReg(); TII->reMaterialize(*mbbi, mi, regA, regASubIdx, DefMI, TRI); ReMatRegs.set(regB); ++NumReMats; } else { bool Emitted = TII->copyRegToReg(*mbbi, mi, regA, regB, rc, rc); (void)Emitted; assert(Emitted && "Unable to issue a copy instruction!\n"); } MachineBasicBlock::iterator prevMI = prior(mi); // Update DistanceMap. DistanceMap.insert(std::make_pair(prevMI, Dist)); DistanceMap[mi] = ++Dist; DEBUG(errs() << "\t\tprepend:\t" << *prevMI); MachineOperand &MO = mi->getOperand(SrcIdx); assert(MO.isReg() && MO.getReg() == regB && MO.isUse() && "inconsistent operand info for 2-reg pass"); if (MO.isKill()) { MO.setIsKill(false); RemovedKillFlag = true; } MO.setReg(regA); } if (AllUsesCopied) { // Replace other (un-tied) uses of regB with LastCopiedReg. for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) { MachineOperand &MO = mi->getOperand(i); if (MO.isReg() && MO.getReg() == regB && MO.isUse()) { if (MO.isKill()) { MO.setIsKill(false); RemovedKillFlag = true; } MO.setReg(LastCopiedReg); } } // Update live variables for regB. if (RemovedKillFlag && LV && LV->getVarInfo(regB).removeKill(mi)) LV->addVirtualRegisterKilled(regB, prior(mi)); } else if (RemovedKillFlag) { // Some tied uses of regB matched their destination registers, so // regB is still used in this instruction, but a kill flag was // removed from a different tied use of regB, so now we need to add // a kill flag to one of the remaining uses of regB. for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) { MachineOperand &MO = mi->getOperand(i); if (MO.isReg() && MO.getReg() == regB && MO.isUse()) { MO.setIsKill(true); break; } } } MadeChange = true; DEBUG(errs() << "\t\trewrite to:\t" << *mi); } // Clear TiedOperands here instead of at the top of the loop // since most instructions do not have tied operands. TiedOperands.clear(); mi = nmi; } } // Some remat'ed instructions are dead. int VReg = ReMatRegs.find_first(); while (VReg != -1) { if (MRI->use_empty(VReg)) { MachineInstr *DefMI = MRI->getVRegDef(VReg); DefMI->eraseFromParent(); } VReg = ReMatRegs.find_next(VReg); } return MadeChange; }