bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { if (DisablePeephole) return false; TM = &MF.getTarget(); TII = TM->getInstrInfo(); MRI = &MF.getRegInfo(); DT = Aggressive ? &getAnalysis<MachineDominatorTree>() : 0; bool Changed = false; SmallPtrSet<MachineInstr*, 8> LocalMIs; SmallSet<unsigned, 4> ImmDefRegs; DenseMap<unsigned, MachineInstr*> ImmDefMIs; for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { MachineBasicBlock *MBB = &*I; bool SeenMoveImm = false; LocalMIs.clear(); ImmDefRegs.clear(); ImmDefMIs.clear(); bool First = true; MachineBasicBlock::iterator PMII; for (MachineBasicBlock::iterator MII = I->begin(), MIE = I->end(); MII != MIE; ) { MachineInstr *MI = &*MII; LocalMIs.insert(MI); if (MI->isLabel() || MI->isPHI() || MI->isImplicitDef() || MI->isKill() || MI->isInlineAsm() || MI->isDebugValue() || MI->hasUnmodeledSideEffects()) { ++MII; continue; } if (MI->isBitcast()) { if (optimizeBitcastInstr(MI, MBB)) { // MI is deleted. LocalMIs.erase(MI); Changed = true; MII = First ? I->begin() : llvm::next(PMII); continue; } } else if (MI->isCompare()) { if (optimizeCmpInstr(MI, MBB)) { // MI is deleted. LocalMIs.erase(MI); Changed = true; MII = First ? I->begin() : llvm::next(PMII); continue; } } if (isMoveImmediate(MI, ImmDefRegs, ImmDefMIs)) { SeenMoveImm = true; } else { Changed |= optimizeExtInstr(MI, MBB, LocalMIs); if (SeenMoveImm) Changed |= foldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs); } First = false; PMII = MII; ++MII; } } return Changed; }
Value *PropagateJuliaAddrspaces::LiftPointer(Value *V, Type *LocTy, Instruction *InsertPt) { SmallVector<Value *, 4> Stack; Value *CurrentV = V; // Follow pointer casts back, see if we're based on a pointer in // an untracked address space, in which case we're allowed to drop // intermediate addrspace casts. while (true) { Stack.push_back(CurrentV); if (isa<BitCastInst>(CurrentV)) CurrentV = cast<BitCastInst>(CurrentV)->getOperand(0); else if (isa<AddrSpaceCastInst>(CurrentV)) { CurrentV = cast<AddrSpaceCastInst>(CurrentV)->getOperand(0); if (!isSpecialAS(getValueAddrSpace(CurrentV))) break; } else if (isa<GetElementPtrInst>(CurrentV)) { if (LiftingMap.count(CurrentV)) { CurrentV = LiftingMap[CurrentV]; break; } else if (Visited.count(CurrentV)) { return nullptr; } Visited.insert(CurrentV); CurrentV = cast<GetElementPtrInst>(CurrentV)->getOperand(0); } else break; } if (!CurrentV->getType()->isPointerTy()) return nullptr; if (isSpecialAS(getValueAddrSpace(CurrentV))) return nullptr; // Ok, we're allowed to change the address space of this load, go back and // reconstitute any GEPs in the new address space. for (Value *V : llvm::reverse(Stack)) { GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(V); if (!GEP) continue; if (LiftingMap.count(GEP)) { CurrentV = LiftingMap[GEP]; continue; } GetElementPtrInst *NewGEP = cast<GetElementPtrInst>(GEP->clone()); ToInsert.push_back(std::make_pair(NewGEP, GEP)); Type *GEPTy = GEP->getSourceElementType(); Type *NewRetTy = cast<PointerType>(GEP->getType())->getElementType()->getPointerTo(getValueAddrSpace(CurrentV)); NewGEP->mutateType(NewRetTy); if (cast<PointerType>(CurrentV->getType())->getElementType() != GEPTy) { auto *BCI = new BitCastInst(CurrentV, GEPTy->getPointerTo()); ToInsert.push_back(std::make_pair(BCI, NewGEP)); CurrentV = BCI; } NewGEP->setOperand(GetElementPtrInst::getPointerOperandIndex(), CurrentV); LiftingMap[GEP] = NewGEP; CurrentV = NewGEP; } if (LocTy && cast<PointerType>(CurrentV->getType())->getElementType() != LocTy) { auto *BCI = new BitCastInst(CurrentV, LocTy->getPointerTo()); ToInsert.push_back(std::make_pair(BCI, InsertPt)); CurrentV = BCI; } return CurrentV; }
void StackColoring::calculateLocalLiveness() { // Perform a standard reverse dataflow computation to solve for // global liveness. The BEGIN set here is equivalent to KILL in the standard // formulation, and END is equivalent to GEN. The result of this computation // is a map from blocks to bitvectors where the bitvectors represent which // allocas are live in/out of that block. SmallPtrSet<const MachineBasicBlock*, 8> BBSet(BasicBlockNumbering.begin(), BasicBlockNumbering.end()); unsigned NumSSMIters = 0; bool changed = true; while (changed) { changed = false; ++NumSSMIters; SmallPtrSet<const MachineBasicBlock*, 8> NextBBSet; for (const MachineBasicBlock *BB : BasicBlockNumbering) { if (!BBSet.count(BB)) continue; // Use an iterator to avoid repeated lookups. LivenessMap::iterator BI = BlockLiveness.find(BB); assert(BI != BlockLiveness.end() && "Block not found"); BlockLifetimeInfo &BlockInfo = BI->second; BitVector LocalLiveIn; BitVector LocalLiveOut; // Forward propagation from begins to ends. for (MachineBasicBlock::const_pred_iterator PI = BB->pred_begin(), PE = BB->pred_end(); PI != PE; ++PI) { LivenessMap::const_iterator I = BlockLiveness.find(*PI); assert(I != BlockLiveness.end() && "Predecessor not found"); LocalLiveIn |= I->second.LiveOut; } LocalLiveIn |= BlockInfo.End; LocalLiveIn.reset(BlockInfo.Begin); // Reverse propagation from ends to begins. for (MachineBasicBlock::const_succ_iterator SI = BB->succ_begin(), SE = BB->succ_end(); SI != SE; ++SI) { LivenessMap::const_iterator I = BlockLiveness.find(*SI); assert(I != BlockLiveness.end() && "Successor not found"); LocalLiveOut |= I->second.LiveIn; } LocalLiveOut |= BlockInfo.Begin; LocalLiveOut.reset(BlockInfo.End); LocalLiveIn |= LocalLiveOut; LocalLiveOut |= LocalLiveIn; // After adopting the live bits, we need to turn-off the bits which // are de-activated in this block. LocalLiveOut.reset(BlockInfo.End); LocalLiveIn.reset(BlockInfo.Begin); // If we have both BEGIN and END markers in the same basic block then // we know that the BEGIN marker comes after the END, because we already // handle the case where the BEGIN comes before the END when collecting // the markers (and building the BEGIN/END vectore). // Want to enable the LIVE_IN and LIVE_OUT of slots that have both // BEGIN and END because it means that the value lives before and after // this basic block. BitVector LocalEndBegin = BlockInfo.End; LocalEndBegin &= BlockInfo.Begin; LocalLiveIn |= LocalEndBegin; LocalLiveOut |= LocalEndBegin; if (LocalLiveIn.test(BlockInfo.LiveIn)) { changed = true; BlockInfo.LiveIn |= LocalLiveIn; NextBBSet.insert(BB->pred_begin(), BB->pred_end()); } if (LocalLiveOut.test(BlockInfo.LiveOut)) { changed = true; BlockInfo.LiveOut |= LocalLiveOut; NextBBSet.insert(BB->succ_begin(), BB->succ_end()); } } BBSet = std::move(NextBBSet); }// while changed. }
/// At this point, we're committed to promoting the alloca using IDF's, and the /// standard SSA construction algorithm. Determine which blocks need phi nodes /// and see if we can optimize out some work by avoiding insertion of dead phi /// nodes. void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum, AllocaInfo &Info) { // Unique the set of defining blocks for efficient lookup. SmallPtrSet<BasicBlock *, 32> DefBlocks; DefBlocks.insert(Info.DefiningBlocks.begin(), Info.DefiningBlocks.end()); // Determine which blocks the value is live in. These are blocks which lead // to uses. SmallPtrSet<BasicBlock *, 32> LiveInBlocks; ComputeLiveInBlocks(AI, Info, DefBlocks, LiveInBlocks); // Use a priority queue keyed on dominator tree level so that inserted nodes // are handled from the bottom of the dominator tree upwards. typedef std::pair<DomTreeNode *, unsigned> DomTreeNodePair; typedef std::priority_queue<DomTreeNodePair, SmallVector<DomTreeNodePair, 32>, less_second> IDFPriorityQueue; IDFPriorityQueue PQ; for (SmallPtrSet<BasicBlock *, 32>::const_iterator I = DefBlocks.begin(), E = DefBlocks.end(); I != E; ++I) { if (DomTreeNode *Node = DT.getNode(*I)) PQ.push(std::make_pair(Node, DomLevels[Node])); } SmallVector<std::pair<unsigned, BasicBlock *>, 32> DFBlocks; SmallPtrSet<DomTreeNode *, 32> Visited; SmallVector<DomTreeNode *, 32> Worklist; while (!PQ.empty()) { DomTreeNodePair RootPair = PQ.top(); PQ.pop(); DomTreeNode *Root = RootPair.first; unsigned RootLevel = RootPair.second; // Walk all dominator tree children of Root, inspecting their CFG edges with // targets elsewhere on the dominator tree. Only targets whose level is at // most Root's level are added to the iterated dominance frontier of the // definition set. Worklist.clear(); Worklist.push_back(Root); while (!Worklist.empty()) { DomTreeNode *Node = Worklist.pop_back_val(); BasicBlock *BB = Node->getBlock(); for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI) { DomTreeNode *SuccNode = DT.getNode(*SI); // Quickly skip all CFG edges that are also dominator tree edges instead // of catching them below. if (SuccNode->getIDom() == Node) continue; unsigned SuccLevel = DomLevels[SuccNode]; if (SuccLevel > RootLevel) continue; if (!Visited.insert(SuccNode)) continue; BasicBlock *SuccBB = SuccNode->getBlock(); if (!LiveInBlocks.count(SuccBB)) continue; DFBlocks.push_back(std::make_pair(BBNumbers[SuccBB], SuccBB)); if (!DefBlocks.count(SuccBB)) PQ.push(std::make_pair(SuccNode, SuccLevel)); } for (DomTreeNode::iterator CI = Node->begin(), CE = Node->end(); CI != CE; ++CI) { if (!Visited.count(*CI)) Worklist.push_back(*CI); } } } if (DFBlocks.size() > 1) std::sort(DFBlocks.begin(), DFBlocks.end()); unsigned CurrentVersion = 0; for (unsigned i = 0, e = DFBlocks.size(); i != e; ++i) QueuePhiNode(DFBlocks[i].second, AllocaNum, CurrentVersion); }
/// optimizeExtInstr - If instruction is a copy-like instruction, i.e. it reads /// a single register and writes a single register and it does not modify the /// source, and if the source value is preserved as a sub-register of the /// result, then replace all reachable uses of the source with the subreg of the /// result. /// /// Do not generate an EXTRACT that is used only in a debug use, as this changes /// the code. Since this code does not currently share EXTRACTs, just ignore all /// debug uses. bool PeepholeOptimizer:: optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, SmallPtrSet<MachineInstr*, 8> &LocalMIs) { unsigned SrcReg, DstReg, SubIdx; if (!TII->isCoalescableExtInstr(*MI, SrcReg, DstReg, SubIdx)) return false; if (TargetRegisterInfo::isPhysicalRegister(DstReg) || TargetRegisterInfo::isPhysicalRegister(SrcReg)) return false; if (MRI->hasOneNonDBGUse(SrcReg)) // No other uses. return false; // Ensure DstReg can get a register class that actually supports // sub-registers. Don't change the class until we commit. const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg); DstRC = TM->getRegisterInfo()->getSubClassWithSubReg(DstRC, SubIdx); if (!DstRC) return false; // The ext instr may be operating on a sub-register of SrcReg as well. // PPC::EXTSW is a 32 -> 64-bit sign extension, but it reads a 64-bit // register. // If UseSrcSubIdx is Set, SubIdx also applies to SrcReg, and only uses of // SrcReg:SubIdx should be replaced. bool UseSrcSubIdx = TM->getRegisterInfo()-> getSubClassWithSubReg(MRI->getRegClass(SrcReg), SubIdx) != 0; // The source has other uses. See if we can replace the other uses with use of // the result of the extension. SmallPtrSet<MachineBasicBlock*, 4> ReachedBBs; for (MachineRegisterInfo::use_nodbg_iterator UI = MRI->use_nodbg_begin(DstReg), UE = MRI->use_nodbg_end(); UI != UE; ++UI) ReachedBBs.insert(UI->getParent()); // Uses that are in the same BB of uses of the result of the instruction. SmallVector<MachineOperand*, 8> Uses; // Uses that the result of the instruction can reach. SmallVector<MachineOperand*, 8> ExtendedUses; bool ExtendLife = true; for (MachineRegisterInfo::use_nodbg_iterator UI = MRI->use_nodbg_begin(SrcReg), UE = MRI->use_nodbg_end(); UI != UE; ++UI) { MachineOperand &UseMO = UI.getOperand(); MachineInstr *UseMI = &*UI; if (UseMI == MI) continue; if (UseMI->isPHI()) { ExtendLife = false; continue; } // Only accept uses of SrcReg:SubIdx. if (UseSrcSubIdx && UseMO.getSubReg() != SubIdx) continue; // It's an error to translate this: // // %reg1025 = <sext> %reg1024 // ... // %reg1026 = SUBREG_TO_REG 0, %reg1024, 4 // // into this: // // %reg1025 = <sext> %reg1024 // ... // %reg1027 = COPY %reg1025:4 // %reg1026 = SUBREG_TO_REG 0, %reg1027, 4 // // The problem here is that SUBREG_TO_REG is there to assert that an // implicit zext occurs. It doesn't insert a zext instruction. If we allow // the COPY here, it will give us the value after the <sext>, not the // original value of %reg1024 before <sext>. if (UseMI->getOpcode() == TargetOpcode::SUBREG_TO_REG) continue; MachineBasicBlock *UseMBB = UseMI->getParent(); if (UseMBB == MBB) { // Local uses that come after the extension. if (!LocalMIs.count(UseMI)) Uses.push_back(&UseMO); } else if (ReachedBBs.count(UseMBB)) { // Non-local uses where the result of the extension is used. Always // replace these unless it's a PHI. Uses.push_back(&UseMO); } else if (Aggressive && DT->dominates(MBB, UseMBB)) { // We may want to extend the live range of the extension result in order // to replace these uses. ExtendedUses.push_back(&UseMO); } else { // Both will be live out of the def MBB anyway. Don't extend live range of // the extension result. ExtendLife = false; break; } } if (ExtendLife && !ExtendedUses.empty()) // Extend the liveness of the extension result. std::copy(ExtendedUses.begin(), ExtendedUses.end(), std::back_inserter(Uses)); // Now replace all uses. bool Changed = false; if (!Uses.empty()) { SmallPtrSet<MachineBasicBlock*, 4> PHIBBs; // Look for PHI uses of the extended result, we don't want to extend the // liveness of a PHI input. It breaks all kinds of assumptions down // stream. A PHI use is expected to be the kill of its source values. for (MachineRegisterInfo::use_nodbg_iterator UI = MRI->use_nodbg_begin(DstReg), UE = MRI->use_nodbg_end(); UI != UE; ++UI) if (UI->isPHI()) PHIBBs.insert(UI->getParent()); const TargetRegisterClass *RC = MRI->getRegClass(SrcReg); for (unsigned i = 0, e = Uses.size(); i != e; ++i) { MachineOperand *UseMO = Uses[i]; MachineInstr *UseMI = UseMO->getParent(); MachineBasicBlock *UseMBB = UseMI->getParent(); if (PHIBBs.count(UseMBB)) continue; // About to add uses of DstReg, clear DstReg's kill flags. if (!Changed) { MRI->clearKillFlags(DstReg); MRI->constrainRegClass(DstReg, DstRC); } unsigned NewVR = MRI->createVirtualRegister(RC); MachineInstr *Copy = BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(), TII->get(TargetOpcode::COPY), NewVR) .addReg(DstReg, 0, SubIdx); // SubIdx applies to both SrcReg and DstReg when UseSrcSubIdx is set. if (UseSrcSubIdx) { Copy->getOperand(0).setSubReg(SubIdx); Copy->getOperand(0).setIsUndef(); } UseMO->setReg(NewVR); ++NumReuse; Changed = true; } } return Changed; }
/// InlineCallIfPossible - If it is possible to inline the specified call site, /// do so and update the CallGraph for this operation. /// /// This function also does some basic book-keeping to update the IR. The /// InlinedArrayAllocas map keeps track of any allocas that are already /// available from other functions inlined into the caller. If we are able to /// inline this call site we attempt to reuse already available allocas or add /// any new allocas to the set if not possible. static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI, InlinedArrayAllocasTy &InlinedArrayAllocas, int InlineHistory) { Function *Callee = CS.getCalledFunction(); Function *Caller = CS.getCaller(); // Try to inline the function. Get the list of static allocas that were // inlined. if (!InlineFunction(CS, IFI)) return false; // If the inlined function had a higher stack protection level than the // calling function, then bump up the caller's stack protection level. if (Callee->hasFnAttr(Attribute::StackProtectReq)) Caller->addFnAttr(Attribute::StackProtectReq); else if (Callee->hasFnAttr(Attribute::StackProtect) && !Caller->hasFnAttr(Attribute::StackProtectReq)) Caller->addFnAttr(Attribute::StackProtect); // Look at all of the allocas that we inlined through this call site. If we // have already inlined other allocas through other calls into this function, // then we know that they have disjoint lifetimes and that we can merge them. // // There are many heuristics possible for merging these allocas, and the // different options have different tradeoffs. One thing that we *really* // don't want to hurt is SRoA: once inlining happens, often allocas are no // longer address taken and so they can be promoted. // // Our "solution" for that is to only merge allocas whose outermost type is an // array type. These are usually not promoted because someone is using a // variable index into them. These are also often the most important ones to // merge. // // A better solution would be to have real memory lifetime markers in the IR // and not have the inliner do any merging of allocas at all. This would // allow the backend to do proper stack slot coloring of all allocas that // *actually make it to the backend*, which is really what we want. // // Because we don't have this information, we do this simple and useful hack. // SmallPtrSet<AllocaInst*, 16> UsedAllocas; // When processing our SCC, check to see if CS was inlined from some other // call site. For example, if we're processing "A" in this code: // A() { B() } // B() { x = alloca ... C() } // C() { y = alloca ... } // Assume that C was not inlined into B initially, and so we're processing A // and decide to inline B into A. Doing this makes an alloca available for // reuse and makes a callsite (C) available for inlining. When we process // the C call site we don't want to do any alloca merging between X and Y // because their scopes are not disjoint. We could make this smarter by // keeping track of the inline history for each alloca in the // InlinedArrayAllocas but this isn't likely to be a significant win. if (InlineHistory != -1) // Only do merging for top-level call sites in SCC. return true; // Loop over all the allocas we have so far and see if they can be merged with // a previously inlined alloca. If not, remember that we had it. for (unsigned AllocaNo = 0, e = IFI.StaticAllocas.size(); AllocaNo != e; ++AllocaNo) { AllocaInst *AI = IFI.StaticAllocas[AllocaNo]; // Don't bother trying to merge array allocations (they will usually be // canonicalized to be an allocation *of* an array), or allocations whose // type is not itself an array (because we're afraid of pessimizing SRoA). const ArrayType *ATy = dyn_cast<ArrayType>(AI->getAllocatedType()); if (ATy == 0 || AI->isArrayAllocation()) continue; // Get the list of all available allocas for this array type. std::vector<AllocaInst*> &AllocasForType = InlinedArrayAllocas[ATy]; // Loop over the allocas in AllocasForType to see if we can reuse one. Note // that we have to be careful not to reuse the same "available" alloca for // multiple different allocas that we just inlined, we use the 'UsedAllocas' // set to keep track of which "available" allocas are being used by this // function. Also, AllocasForType can be empty of course! bool MergedAwayAlloca = false; for (unsigned i = 0, e = AllocasForType.size(); i != e; ++i) { AllocaInst *AvailableAlloca = AllocasForType[i]; // The available alloca has to be in the right function, not in some other // function in this SCC. if (AvailableAlloca->getParent() != AI->getParent()) continue; // If the inlined function already uses this alloca then we can't reuse // it. if (!UsedAllocas.insert(AvailableAlloca)) continue; // Otherwise, we *can* reuse it, RAUW AI into AvailableAlloca and declare // success! DEBUG(dbgs() << " ***MERGED ALLOCA: " << *AI << "\n\t\tINTO: " << *AvailableAlloca << '\n'); AI->replaceAllUsesWith(AvailableAlloca); AI->eraseFromParent(); MergedAwayAlloca = true; ++NumMergedAllocas; IFI.StaticAllocas[AllocaNo] = 0; break; } // If we already nuked the alloca, we're done with it. if (MergedAwayAlloca) continue; // If we were unable to merge away the alloca either because there are no // allocas of the right type available or because we reused them all // already, remember that this alloca came from an inlined function and mark // it used so we don't reuse it for other allocas from this inline // operation. AllocasForType.push_back(AI); UsedAllocas.insert(AI); } return true; }
/// handleEndBlock - Remove dead stores to stack-allocated locations in the /// function end block. Ex: /// %A = alloca i32 /// ... /// store i32 1, i32* %A /// ret void bool DSE::handleEndBlock(BasicBlock &BB) { bool MadeChange = false; // Keep track of all of the stack objects that are dead at the end of the // function. SmallPtrSet<Value*, 16> DeadStackObjects; // Find all of the alloca'd pointers in the entry block. BasicBlock *Entry = BB.getParent()->begin(); for (BasicBlock::iterator I = Entry->begin(), E = Entry->end(); I != E; ++I) if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) DeadStackObjects.insert(AI); // Treat byval arguments the same, stores to them are dead at the end of the // function. for (Function::arg_iterator AI = BB.getParent()->arg_begin(), AE = BB.getParent()->arg_end(); AI != AE; ++AI) if (AI->hasByValAttr()) DeadStackObjects.insert(AI); // Scan the basic block backwards for (BasicBlock::iterator BBI = BB.end(); BBI != BB.begin(); ){ --BBI; // If we find a store, check to see if it points into a dead stack value. if (hasMemoryWrite(BBI) && isRemovable(BBI)) { // See through pointer-to-pointer bitcasts Value *Pointer = GetUnderlyingObject(getStoredPointerOperand(BBI)); // Stores to stack values are valid candidates for removal. if (DeadStackObjects.count(Pointer)) { Instruction *Dead = BBI++; DEBUG(dbgs() << "DSE: Dead Store at End of Block:\n DEAD: " << *Dead << "\n Object: " << *Pointer << '\n'); // DCE instructions only used to calculate that store. DeleteDeadInstruction(Dead, *MD, &DeadStackObjects); ++NumFastStores; MadeChange = true; continue; } } // Remove any dead non-memory-mutating instructions. if (isInstructionTriviallyDead(BBI)) { Instruction *Inst = BBI++; DeleteDeadInstruction(Inst, *MD, &DeadStackObjects); ++NumFastOther; MadeChange = true; continue; } if (AllocaInst *A = dyn_cast<AllocaInst>(BBI)) { DeadStackObjects.erase(A); continue; } if (CallSite CS = cast<Value>(BBI)) { // If this call does not access memory, it can't be loading any of our // pointers. if (AA->doesNotAccessMemory(CS)) continue; unsigned NumModRef = 0, NumOther = 0; // If the call might load from any of our allocas, then any store above // the call is live. SmallVector<Value*, 8> LiveAllocas; for (SmallPtrSet<Value*, 16>::iterator I = DeadStackObjects.begin(), E = DeadStackObjects.end(); I != E; ++I) { // If we detect that our AA is imprecise, it's not worth it to scan the // rest of the DeadPointers set. Just assume that the AA will return // ModRef for everything, and go ahead and bail out. if (NumModRef >= 16 && NumOther == 0) return MadeChange; // See if the call site touches it. AliasAnalysis::ModRefResult A = AA->getModRefInfo(CS, *I, getPointerSize(*I, *AA)); if (A == AliasAnalysis::ModRef) ++NumModRef; else ++NumOther; if (A == AliasAnalysis::ModRef || A == AliasAnalysis::Ref) LiveAllocas.push_back(*I); } for (SmallVector<Value*, 8>::iterator I = LiveAllocas.begin(), E = LiveAllocas.end(); I != E; ++I) DeadStackObjects.erase(*I); // If all of the allocas were clobbered by the call then we're not going // to find anything else to process. if (DeadStackObjects.empty()) return MadeChange; continue; } AliasAnalysis::Location LoadedLoc; // If we encounter a use of the pointer, it is no longer considered dead if (LoadInst *L = dyn_cast<LoadInst>(BBI)) { LoadedLoc = AA->getLocation(L); } else if (VAArgInst *V = dyn_cast<VAArgInst>(BBI)) { LoadedLoc = AA->getLocation(V); } else if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(BBI)) { LoadedLoc = AA->getLocationForSource(MTI); } else { // Not a loading instruction. continue; } // Remove any allocas from the DeadPointer set that are loaded, as this // makes any stores above the access live. RemoveAccessedObjects(LoadedLoc, DeadStackObjects); // If all of the allocas were clobbered by the access then we're not going // to find anything else to process. if (DeadStackObjects.empty()) break; } return MadeChange; }
/// CanMergeBlocks - Return true if we can merge BB into DestBB if there is a /// single uncond branch between them, and BB contains no other non-phi /// instructions. bool CodeGenPrepare::CanMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const { // We only want to eliminate blocks whose phi nodes are used by phi nodes in // the successor. If there are more complex condition (e.g. preheaders), // don't mess around with them. BasicBlock::const_iterator BBI = BB->begin(); while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) { for (Value::const_use_iterator UI = PN->use_begin(), E = PN->use_end(); UI != E; ++UI) { const Instruction *User = cast<Instruction>(*UI); if (User->getParent() != DestBB || !isa<PHINode>(User)) return false; // If User is inside DestBB block and it is a PHINode then check // incoming value. If incoming value is not from BB then this is // a complex condition (e.g. preheaders) we want to avoid here. if (User->getParent() == DestBB) { if (const PHINode *UPN = dyn_cast<PHINode>(User)) for (unsigned I = 0, E = UPN->getNumIncomingValues(); I != E; ++I) { Instruction *Insn = dyn_cast<Instruction>(UPN->getIncomingValue(I)); if (Insn && Insn->getParent() == BB && Insn->getParent() != UPN->getIncomingBlock(I)) return false; } } } } // If BB and DestBB contain any common predecessors, then the phi nodes in BB // and DestBB may have conflicting incoming values for the block. If so, we // can't merge the block. const PHINode *DestBBPN = dyn_cast<PHINode>(DestBB->begin()); if (!DestBBPN) return true; // no conflict. // Collect the preds of BB. SmallPtrSet<const BasicBlock*, 16> BBPreds; if (const PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) { // It is faster to get preds from a PHI than with pred_iterator. for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i) BBPreds.insert(BBPN->getIncomingBlock(i)); } else { BBPreds.insert(pred_begin(BB), pred_end(BB)); } // Walk the preds of DestBB. for (unsigned i = 0, e = DestBBPN->getNumIncomingValues(); i != e; ++i) { BasicBlock *Pred = DestBBPN->getIncomingBlock(i); if (BBPreds.count(Pred)) { // Common predecessor? BBI = DestBB->begin(); while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) { const Value *V1 = PN->getIncomingValueForBlock(Pred); const Value *V2 = PN->getIncomingValueForBlock(BB); // If V2 is a phi node in BB, look up what the mapped value will be. if (const PHINode *V2PN = dyn_cast<PHINode>(V2)) if (V2PN->getParent() == BB) V2 = V2PN->getIncomingValueForBlock(Pred); // If there is a conflict, bail out. if (V1 != V2) return false; } } } return true; }
bool PruneEH::runOnSCC(CallGraphSCC &SCC) { SmallPtrSet<CallGraphNode *, 8> SCCNodes; CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph(); bool MadeChange = false; // Fill SCCNodes with the elements of the SCC. Used for quickly // looking up whether a given CallGraphNode is in this SCC. for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) SCCNodes.insert(*I); // First pass, scan all of the functions in the SCC, simplifying them // according to what we know. for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) if (Function *F = (*I)->getFunction()) MadeChange |= SimplifyFunction(F); // Next, check to see if any callees might throw or if there are any external // functions in this SCC: if so, we cannot prune any functions in this SCC. // Definitions that are weak and not declared non-throwing might be // overridden at linktime with something that throws, so assume that. // If this SCC includes the unwind instruction, we KNOW it throws, so // obviously the SCC might throw. // bool SCCMightUnwind = false, SCCMightReturn = false; for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); (!SCCMightUnwind || !SCCMightReturn) && I != E; ++I) { Function *F = (*I)->getFunction(); if (!F) { SCCMightUnwind = true; SCCMightReturn = true; } else if (F->isDeclaration() || F->mayBeOverridden()) { SCCMightUnwind |= !F->doesNotThrow(); SCCMightReturn |= !F->doesNotReturn(); } else { bool CheckUnwind = !SCCMightUnwind && !F->doesNotThrow(); bool CheckReturn = !SCCMightReturn && !F->doesNotReturn(); // Determine if we should scan for InlineAsm in a naked function as it // is the only way to return without a ReturnInst. Only do this for // no-inline functions as functions which may be inlined cannot // meaningfully return via assembly. bool CheckReturnViaAsm = CheckReturn && F->hasFnAttribute(Attribute::Naked) && F->hasFnAttribute(Attribute::NoInline); if (!CheckUnwind && !CheckReturn) continue; for (const BasicBlock &BB : *F) { const TerminatorInst *TI = BB.getTerminator(); if (CheckUnwind && TI->mayThrow()) { SCCMightUnwind = true; } else if (CheckReturn && isa<ReturnInst>(TI)) { SCCMightReturn = true; } for (const Instruction &I : BB) { if ((!CheckUnwind || SCCMightUnwind) && (!CheckReturnViaAsm || SCCMightReturn)) break; // Check to see if this function performs an unwind or calls an // unwinding function. if (CheckUnwind && !SCCMightUnwind && I.mayThrow()) { bool InstMightUnwind = true; if (const auto *CI = dyn_cast<CallInst>(&I)) { if (Function *Callee = CI->getCalledFunction()) { CallGraphNode *CalleeNode = CG[Callee]; // If the callee is outside our current SCC then we may throw // because it might. If it is inside, do nothing. if (SCCNodes.count(CalleeNode) > 0) InstMightUnwind = false; } } SCCMightUnwind |= InstMightUnwind; } if (CheckReturnViaAsm && !SCCMightReturn) if (auto ICS = ImmutableCallSite(&I)) if (const auto *IA = dyn_cast<InlineAsm>(ICS.getCalledValue())) if (IA->hasSideEffects()) SCCMightReturn = true; } if (SCCMightUnwind && SCCMightReturn) break; } } } // If the SCC doesn't unwind or doesn't throw, note this fact. if (!SCCMightUnwind || !SCCMightReturn) for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { AttrBuilder NewAttributes; if (!SCCMightUnwind) NewAttributes.addAttribute(Attribute::NoUnwind); if (!SCCMightReturn) NewAttributes.addAttribute(Attribute::NoReturn); Function *F = (*I)->getFunction(); const AttributeSet &PAL = F->getAttributes().getFnAttributes(); const AttributeSet &NPAL = AttributeSet::get( F->getContext(), AttributeSet::FunctionIndex, NewAttributes); if (PAL != NPAL) { MadeChange = true; F->addAttributes(AttributeSet::FunctionIndex, NPAL); } } for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { // Convert any invoke instructions to non-throwing functions in this node // into call instructions with a branch. This makes the exception blocks // dead. if (Function *F = (*I)->getFunction()) MadeChange |= SimplifyFunction(F); } return MadeChange; }
bool AMDGPUAlwaysInline::runOnModule(Module &M) { AMDGPUAS AMDGPUAS = AMDGPU::getAMDGPUAS(M); std::vector<GlobalAlias*> AliasesToRemove; SmallPtrSet<Function *, 8> FuncsToAlwaysInline; SmallPtrSet<Function *, 8> FuncsToNoInline; for (GlobalAlias &A : M.aliases()) { if (Function* F = dyn_cast<Function>(A.getAliasee())) { A.replaceAllUsesWith(F); AliasesToRemove.push_back(&A); } // FIXME: If the aliasee isn't a function, it's some kind of constant expr // cast that won't be inlined through. } if (GlobalOpt) { for (GlobalAlias* A : AliasesToRemove) { A->eraseFromParent(); } } // Always force inlining of any function that uses an LDS global address. This // is something of a workaround because we don't have a way of supporting LDS // objects defined in functions. LDS is always allocated by a kernel, and it // is difficult to manage LDS usage if a function may be used by multiple // kernels. // // OpenCL doesn't allow declaring LDS in non-kernels, so in practice this // should only appear when IPO passes manages to move LDs defined in a kernel // into a single user function. for (GlobalVariable &GV : M.globals()) { // TODO: Region address unsigned AS = GV.getType()->getAddressSpace(); if (AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS.REGION_ADDRESS) continue; recursivelyVisitUsers(GV, FuncsToAlwaysInline); } if (!AMDGPUTargetMachine::EnableFunctionCalls || StressCalls) { auto IncompatAttr = StressCalls ? Attribute::AlwaysInline : Attribute::NoInline; for (Function &F : M) { if (!F.isDeclaration() && !F.use_empty() && !F.hasFnAttribute(IncompatAttr)) { if (StressCalls) { if (!FuncsToAlwaysInline.count(&F)) FuncsToNoInline.insert(&F); } else FuncsToAlwaysInline.insert(&F); } } } for (Function *F : FuncsToAlwaysInline) F->addFnAttr(Attribute::AlwaysInline); for (Function *F : FuncsToNoInline) F->addFnAttr(Attribute::NoInline); return !FuncsToAlwaysInline.empty() || !FuncsToNoInline.empty(); }
bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind, Loop *TheLoop, bool HasFunNoNaNAttr, RecurrenceDescriptor &RedDes) { if (Phi->getNumIncomingValues() != 2) return false; // Reduction variables are only found in the loop header block. if (Phi->getParent() != TheLoop->getHeader()) return false; // Obtain the reduction start value from the value that comes from the loop // preheader. Value *RdxStart = Phi->getIncomingValueForBlock(TheLoop->getLoopPreheader()); // ExitInstruction is the single value which is used outside the loop. // We only allow for a single reduction value to be used outside the loop. // This includes users of the reduction, variables (which form a cycle // which ends in the phi node). Instruction *ExitInstruction = nullptr; // Indicates that we found a reduction operation in our scan. bool FoundReduxOp = false; // We start with the PHI node and scan for all of the users of this // instruction. All users must be instructions that can be used as reduction // variables (such as ADD). We must have a single out-of-block user. The cycle // must include the original PHI. bool FoundStartPHI = false; // To recognize min/max patterns formed by a icmp select sequence, we store // the number of instruction we saw from the recognized min/max pattern, // to make sure we only see exactly the two instructions. unsigned NumCmpSelectPatternInst = 0; InstDesc ReduxDesc(false, nullptr); // Data used for determining if the recurrence has been type-promoted. Type *RecurrenceType = Phi->getType(); SmallPtrSet<Instruction *, 4> CastInsts; Instruction *Start = Phi; bool IsSigned = false; SmallPtrSet<Instruction *, 8> VisitedInsts; SmallVector<Instruction *, 8> Worklist; // Return early if the recurrence kind does not match the type of Phi. If the // recurrence kind is arithmetic, we attempt to look through AND operations // resulting from the type promotion performed by InstCombine. Vector // operations are not limited to the legal integer widths, so we may be able // to evaluate the reduction in the narrower width. if (RecurrenceType->isFloatingPointTy()) { if (!isFloatingPointRecurrenceKind(Kind)) return false; } else { if (!isIntegerRecurrenceKind(Kind)) return false; if (isArithmeticRecurrenceKind(Kind)) Start = lookThroughAnd(Phi, RecurrenceType, VisitedInsts, CastInsts); } Worklist.push_back(Start); VisitedInsts.insert(Start); // A value in the reduction can be used: // - By the reduction: // - Reduction operation: // - One use of reduction value (safe). // - Multiple use of reduction value (not safe). // - PHI: // - All uses of the PHI must be the reduction (safe). // - Otherwise, not safe. // - By one instruction outside of the loop (safe). // - By further instructions outside of the loop (not safe). // - By an instruction that is not part of the reduction (not safe). // This is either: // * An instruction type other than PHI or the reduction operation. // * A PHI in the header other than the initial PHI. while (!Worklist.empty()) { Instruction *Cur = Worklist.back(); Worklist.pop_back(); // No Users. // If the instruction has no users then this is a broken chain and can't be // a reduction variable. if (Cur->use_empty()) return false; bool IsAPhi = isa<PHINode>(Cur); // A header PHI use other than the original PHI. if (Cur != Phi && IsAPhi && Cur->getParent() == Phi->getParent()) return false; // Reductions of instructions such as Div, and Sub is only possible if the // LHS is the reduction variable. if (!Cur->isCommutative() && !IsAPhi && !isa<SelectInst>(Cur) && !isa<ICmpInst>(Cur) && !isa<FCmpInst>(Cur) && !VisitedInsts.count(dyn_cast<Instruction>(Cur->getOperand(0)))) return false; // Any reduction instruction must be of one of the allowed kinds. We ignore // the starting value (the Phi or an AND instruction if the Phi has been // type-promoted). if (Cur != Start) { ReduxDesc = isRecurrenceInstr(Cur, Kind, ReduxDesc, HasFunNoNaNAttr); if (!ReduxDesc.isRecurrence()) return false; } // A reduction operation must only have one use of the reduction value. if (!IsAPhi && Kind != RK_IntegerMinMax && Kind != RK_FloatMinMax && hasMultipleUsesOf(Cur, VisitedInsts)) return false; // All inputs to a PHI node must be a reduction value. if (IsAPhi && Cur != Phi && !areAllUsesIn(Cur, VisitedInsts)) return false; if (Kind == RK_IntegerMinMax && (isa<ICmpInst>(Cur) || isa<SelectInst>(Cur))) ++NumCmpSelectPatternInst; if (Kind == RK_FloatMinMax && (isa<FCmpInst>(Cur) || isa<SelectInst>(Cur))) ++NumCmpSelectPatternInst; // Check whether we found a reduction operator. FoundReduxOp |= !IsAPhi && Cur != Start; // Process users of current instruction. Push non-PHI nodes after PHI nodes // onto the stack. This way we are going to have seen all inputs to PHI // nodes once we get to them. SmallVector<Instruction *, 8> NonPHIs; SmallVector<Instruction *, 8> PHIs; for (User *U : Cur->users()) { Instruction *UI = cast<Instruction>(U); // Check if we found the exit user. BasicBlock *Parent = UI->getParent(); if (!TheLoop->contains(Parent)) { // Exit if you find multiple outside users or if the header phi node is // being used. In this case the user uses the value of the previous // iteration, in which case we would loose "VF-1" iterations of the // reduction operation if we vectorize. if (ExitInstruction != nullptr || Cur == Phi) return false; // The instruction used by an outside user must be the last instruction // before we feed back to the reduction phi. Otherwise, we loose VF-1 // operations on the value. if (!is_contained(Phi->operands(), Cur)) return false; ExitInstruction = Cur; continue; } // Process instructions only once (termination). Each reduction cycle // value must only be used once, except by phi nodes and min/max // reductions which are represented as a cmp followed by a select. InstDesc IgnoredVal(false, nullptr); if (VisitedInsts.insert(UI).second) { if (isa<PHINode>(UI)) PHIs.push_back(UI); else NonPHIs.push_back(UI); } else if (!isa<PHINode>(UI) && ((!isa<FCmpInst>(UI) && !isa<ICmpInst>(UI) && !isa<SelectInst>(UI)) || !isMinMaxSelectCmpPattern(UI, IgnoredVal).isRecurrence())) return false; // Remember that we completed the cycle. if (UI == Phi) FoundStartPHI = true; } Worklist.append(PHIs.begin(), PHIs.end()); Worklist.append(NonPHIs.begin(), NonPHIs.end()); } // This means we have seen one but not the other instruction of the // pattern or more than just a select and cmp. if ((Kind == RK_IntegerMinMax || Kind == RK_FloatMinMax) && NumCmpSelectPatternInst != 2) return false; if (!FoundStartPHI || !FoundReduxOp || !ExitInstruction) return false; // If we think Phi may have been type-promoted, we also need to ensure that // all source operands of the reduction are either SExtInsts or ZEstInsts. If // so, we will be able to evaluate the reduction in the narrower bit width. if (Start != Phi) if (!getSourceExtensionKind(Start, ExitInstruction, RecurrenceType, IsSigned, VisitedInsts, CastInsts)) return false; // We found a reduction var if we have reached the original phi node and we // only have a single instruction with out-of-loop users. // The ExitInstruction(Instruction which is allowed to have out-of-loop users) // is saved as part of the RecurrenceDescriptor. // Save the description of this reduction variable. RecurrenceDescriptor RD( RdxStart, ExitInstruction, Kind, ReduxDesc.getMinMaxKind(), ReduxDesc.getUnsafeAlgebraInst(), RecurrenceType, IsSigned, CastInsts); RedDes = RD; return true; }
/// findValueImpl - Implementation helper for findValue. Value *Lint::findValueImpl(Value *V, bool OffsetOk, SmallPtrSetImpl<Value *> &Visited) const { // Detect self-referential values. if (!Visited.insert(V).second) return UndefValue::get(V->getType()); // TODO: Look through sext or zext cast, when the result is known to // be interpreted as signed or unsigned, respectively. // TODO: Look through eliminable cast pairs. // TODO: Look through calls with unique return values. // TODO: Look through vector insert/extract/shuffle. V = OffsetOk ? GetUnderlyingObject(V, *DL) : V->stripPointerCasts(); if (LoadInst *L = dyn_cast<LoadInst>(V)) { BasicBlock::iterator BBI = L->getIterator(); BasicBlock *BB = L->getParent(); SmallPtrSet<BasicBlock *, 4> VisitedBlocks; for (;;) { if (!VisitedBlocks.insert(BB).second) break; if (Value *U = FindAvailableLoadedValue(L, BB, BBI, DefMaxInstsToScan, AA)) return findValueImpl(U, OffsetOk, Visited); if (BBI != BB->begin()) break; BB = BB->getUniquePredecessor(); if (!BB) break; BBI = BB->end(); } } else if (PHINode *PN = dyn_cast<PHINode>(V)) { if (Value *W = PN->hasConstantValue()) if (W != V) return findValueImpl(W, OffsetOk, Visited); } else if (CastInst *CI = dyn_cast<CastInst>(V)) { if (CI->isNoopCast(*DL)) return findValueImpl(CI->getOperand(0), OffsetOk, Visited); } else if (ExtractValueInst *Ex = dyn_cast<ExtractValueInst>(V)) { if (Value *W = FindInsertedValue(Ex->getAggregateOperand(), Ex->getIndices())) if (W != V) return findValueImpl(W, OffsetOk, Visited); } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) { // Same as above, but for ConstantExpr instead of Instruction. if (Instruction::isCast(CE->getOpcode())) { if (CastInst::isNoopCast(Instruction::CastOps(CE->getOpcode()), CE->getOperand(0)->getType(), CE->getType(), DL->getIntPtrType(V->getType()))) return findValueImpl(CE->getOperand(0), OffsetOk, Visited); } else if (CE->getOpcode() == Instruction::ExtractValue) { ArrayRef<unsigned> Indices = CE->getIndices(); if (Value *W = FindInsertedValue(CE->getOperand(0), Indices)) if (W != V) return findValueImpl(W, OffsetOk, Visited); } } // As a last resort, try SimplifyInstruction or constant folding. if (Instruction *Inst = dyn_cast<Instruction>(V)) { if (Value *W = SimplifyInstruction(Inst, *DL, TLI, DT, AC)) return findValueImpl(W, OffsetOk, Visited); } else if (auto *C = dyn_cast<Constant>(V)) { if (Value *W = ConstantFoldConstant(C, *DL, TLI)) if (W && W != V) return findValueImpl(W, OffsetOk, Visited); } return V; }
static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS, SmallPtrSet<const PHINode *, 16> &PhiUsers) { for (const Use &U : V->uses()) { const User *UR = U.getUser(); if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(UR)) { GS.HasNonInstructionUser = true; // If the result of the constantexpr isn't pointer type, then we won't // know to expect it in various places. Just reject early. if (!isa<PointerType>(CE->getType())) return true; if (analyzeGlobalAux(CE, GS, PhiUsers)) return true; } else if (const Instruction *I = dyn_cast<Instruction>(UR)) { if (!GS.HasMultipleAccessingFunctions) { if (I->getParent() && I->getParent()->getParent()) { const Function *F = I->getParent()->getParent(); if (!GS.AccessingFunction) GS.AccessingFunction = F; else if (GS.AccessingFunction != F) GS.HasMultipleAccessingFunctions = true; } } if (const LoadInst *LI = dyn_cast<LoadInst>(I)) { GS.IsLoaded = true; // Don't hack on volatile loads. if (LI->isVolatile()) return true; GS.Ordering = strongerOrdering(GS.Ordering, LI->getOrdering()); } else if (const StoreInst *SI = dyn_cast<StoreInst>(I)) { // Don't allow a store OF the address, only stores TO the address. if (SI->getOperand(0) == V) return true; // Don't hack on volatile stores. if (SI->isVolatile()) return true; GS.Ordering = strongerOrdering(GS.Ordering, SI->getOrdering()); // If this is a direct store to the global (i.e., the global is a scalar // value, not an aggregate), keep more specific information about // stores. if (GS.StoredType != GlobalStatus::Stored) { if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(SI->getOperand(1))) { Value *StoredVal = SI->getOperand(0); if (Constant *C = dyn_cast<Constant>(StoredVal)) { if (C->isThreadDependent()) { // The stored value changes between threads; don't track it. return true; } } if (StoredVal == GV->getInitializer()) { if (GS.StoredType < GlobalStatus::InitializerStored) GS.StoredType = GlobalStatus::InitializerStored; } else if (isa<LoadInst>(StoredVal) && cast<LoadInst>(StoredVal)->getOperand(0) == GV) { if (GS.StoredType < GlobalStatus::InitializerStored) GS.StoredType = GlobalStatus::InitializerStored; } else if (GS.StoredType < GlobalStatus::StoredOnce) { GS.StoredType = GlobalStatus::StoredOnce; GS.StoredOnceValue = StoredVal; } else if (GS.StoredType == GlobalStatus::StoredOnce && GS.StoredOnceValue == StoredVal) { // noop. } else { GS.StoredType = GlobalStatus::Stored; } } else { GS.StoredType = GlobalStatus::Stored; } } } else if (isa<BitCastInst>(I)) { if (analyzeGlobalAux(I, GS, PhiUsers)) return true; } else if (isa<GetElementPtrInst>(I)) { if (analyzeGlobalAux(I, GS, PhiUsers)) return true; } else if (isa<SelectInst>(I)) { if (analyzeGlobalAux(I, GS, PhiUsers)) return true; } else if (const PHINode *PN = dyn_cast<PHINode>(I)) { // PHI nodes we can check just like select or GEP instructions, but we // have to be careful about infinite recursion. if (PhiUsers.insert(PN)) // Not already visited. if (analyzeGlobalAux(I, GS, PhiUsers)) return true; } else if (isa<CmpInst>(I)) { GS.IsCompared = true; } else if (const MemTransferInst *MTI = dyn_cast<MemTransferInst>(I)) { if (MTI->isVolatile()) return true; if (MTI->getArgOperand(0) == V) GS.StoredType = GlobalStatus::Stored; if (MTI->getArgOperand(1) == V) GS.IsLoaded = true; } else if (const MemSetInst *MSI = dyn_cast<MemSetInst>(I)) { assert(MSI->getArgOperand(0) == V && "Memset only takes one pointer!"); if (MSI->isVolatile()) return true; GS.StoredType = GlobalStatus::Stored; } else if (ImmutableCallSite C = I) { if (!C.isCallee(&U)) return true; GS.IsLoaded = true; } else { return true; // Any other non-load instruction might take address! } } else if (const Constant *C = dyn_cast<Constant>(UR)) { GS.HasNonInstructionUser = true; // We might have a dead and dangling constant hanging off of here. if (!isSafeToDestroyConstant(C)) return true; } else { GS.HasNonInstructionUser = true; // Otherwise must be some other user. return true; } } return false; }
/// HandlePHINodesInSuccessorBlocks - Handle PHI nodes in successor blocks. /// Emit code to ensure constants are copied into registers when needed. /// Remember the virtual registers that need to be added to the Machine PHI /// nodes as input. We cannot just directly add them, because expansion /// might result in multiple MBB's for one BB. As such, the start of the /// BB might correspond to a different MBB than the end. bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { const TerminatorInst *TI = LLVMBB->getTerminator(); SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled; unsigned OrigNumPHINodesToUpdate = FuncInfo.PHINodesToUpdate.size(); // Check successor nodes' PHI nodes that expect a constant to be available // from this block. for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) { const BasicBlock *SuccBB = TI->getSuccessor(succ); if (!isa<PHINode>(SuccBB->begin())) continue; MachineBasicBlock *SuccMBB = FuncInfo.MBBMap[SuccBB]; // If this terminator has multiple identical successors (common for // switches), only handle each succ once. if (!SuccsHandled.insert(SuccMBB)) continue; MachineBasicBlock::iterator MBBI = SuccMBB->begin(); // At this point we know that there is a 1-1 correspondence between LLVM PHI // nodes and Machine PHI nodes, but the incoming operands have not been // emitted yet. for (BasicBlock::const_iterator I = SuccBB->begin(); const PHINode *PN = dyn_cast<PHINode>(I); ++I) { // Ignore dead phi's. if (PN->use_empty()) continue; // Only handle legal types. Two interesting things to note here. First, // by bailing out early, we may leave behind some dead instructions, // since SelectionDAG's HandlePHINodesInSuccessorBlocks will insert its // own moves. Second, this check is necessary because FastISel doesn't // use CreateRegs to create registers, so it always creates // exactly one register for each non-void instruction. EVT VT = TLI.getValueType(PN->getType(), /*AllowUnknown=*/true); if (VT == MVT::Other || !TLI.isTypeLegal(VT)) { // Promote MVT::i1. if (VT == MVT::i1) VT = TLI.getTypeToTransformTo(LLVMBB->getContext(), VT); else { FuncInfo.PHINodesToUpdate.resize(OrigNumPHINodesToUpdate); return false; } } const Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB); // Set the DebugLoc for the copy. Prefer the location of the operand // if there is one; use the location of the PHI otherwise. DL = PN->getDebugLoc(); if (const Instruction *Inst = dyn_cast<Instruction>(PHIOp)) DL = Inst->getDebugLoc(); unsigned Reg = getRegForValue(PHIOp); if (Reg == 0) { FuncInfo.PHINodesToUpdate.resize(OrigNumPHINodesToUpdate); return false; } FuncInfo.PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg)); DL = DebugLoc(); } } return true; }
static bool markAliveBlocks(BasicBlock *BB, SmallPtrSet<BasicBlock*, 128> &Reachable) { SmallVector<BasicBlock*, 128> Worklist; Worklist.push_back(BB); Reachable.insert(BB); bool Changed = false; do { BB = Worklist.pop_back_val(); // Do a quick scan of the basic block, turning any obviously unreachable // instructions into LLVM unreachable insts. The instruction combining pass // canonicalizes unreachable insts into stores to null or undef. for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E;++BBI){ if (CallInst *CI = dyn_cast<CallInst>(BBI)) { if (CI->doesNotReturn()) { // If we found a call to a no-return function, insert an unreachable // instruction after it. Make sure there isn't *already* one there // though. ++BBI; if (!isa<UnreachableInst>(BBI)) { // Don't insert a call to llvm.trap right before the unreachable. changeToUnreachable(BBI, false); Changed = true; } break; } } // Store to undef and store to null are undefined and used to signal that // they should be changed to unreachable by passes that can't modify the // CFG. if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) { // Don't touch volatile stores. if (SI->isVolatile()) continue; Value *Ptr = SI->getOperand(1); if (isa<UndefValue>(Ptr) || (isa<ConstantPointerNull>(Ptr) && SI->getPointerAddressSpace() == 0)) { changeToUnreachable(SI, true); Changed = true; break; } } } // Turn invokes that call 'nounwind' functions into ordinary calls. if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) { Value *Callee = II->getCalledValue(); if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) { changeToUnreachable(II, true); Changed = true; } else if (II->doesNotThrow()) { if (II->use_empty() && II->onlyReadsMemory()) { // jump to the normal destination branch. BranchInst::Create(II->getNormalDest(), II); II->getUnwindDest()->removePredecessor(II->getParent()); II->eraseFromParent(); } else changeToCall(II); Changed = true; } } Changed |= ConstantFoldTerminator(BB, true); for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI) if (Reachable.insert(*SI)) Worklist.push_back(*SI); } while (!Worklist.empty()); return Changed; }
/// Some initialization for the rename part /// void SSI::renameInit(SmallPtrSet<Instruction*, 4> &value) { for (SmallPtrSet<Instruction*, 4>::iterator I = value.begin(), E = value.end(); I != E; ++I) value_stack[*I].push_back(*I); }
bool Inliner::runOnSCC(CallGraphSCC &SCC) { CallGraph &CG = getAnalysis<CallGraph>(); const TargetData *TD = getAnalysisIfAvailable<TargetData>(); SmallPtrSet<Function*, 8> SCCFunctions; DEBUG(dbgs() << "Inliner visiting SCC:"); for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { Function *F = (*I)->getFunction(); if (F) SCCFunctions.insert(F); DEBUG(dbgs() << " " << (F ? F->getName() : "INDIRECTNODE")); } // Scan through and identify all call sites ahead of time so that we only // inline call sites in the original functions, not call sites that result // from inlining other functions. SmallVector<std::pair<CallSite, int>, 16> CallSites; // When inlining a callee produces new call sites, we want to keep track of // the fact that they were inlined from the callee. This allows us to avoid // infinite inlining in some obscure cases. To represent this, we use an // index into the InlineHistory vector. SmallVector<std::pair<Function*, int>, 8> InlineHistory; for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { Function *F = (*I)->getFunction(); if (!F) continue; for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { CallSite CS(cast<Value>(I)); // If this isn't a call, or it is a call to an intrinsic, it can // never be inlined. if (!CS || isa<IntrinsicInst>(I)) continue; // If this is a direct call to an external function, we can never inline // it. If it is an indirect call, inlining may resolve it to be a // direct call, so we keep it. if (CS.getCalledFunction() && CS.getCalledFunction()->isDeclaration()) continue; CallSites.push_back(std::make_pair(CS, -1)); } } DEBUG(dbgs() << ": " << CallSites.size() << " call sites.\n"); // If there are no calls in this function, exit early. if (CallSites.empty()) return false; // Now that we have all of the call sites, move the ones to functions in the // current SCC to the end of the list. unsigned FirstCallInSCC = CallSites.size(); for (unsigned i = 0; i < FirstCallInSCC; ++i) if (Function *F = CallSites[i].first.getCalledFunction()) if (SCCFunctions.count(F)) std::swap(CallSites[i--], CallSites[--FirstCallInSCC]); InlinedArrayAllocasTy InlinedArrayAllocas; InlineFunctionInfo InlineInfo(&CG, TD); // Now that we have all of the call sites, loop over them and inline them if // it looks profitable to do so. bool Changed = false; bool LocalChange; do { LocalChange = false; // Iterate over the outer loop because inlining functions can cause indirect // calls to become direct calls. for (unsigned CSi = 0; CSi != CallSites.size(); ++CSi) { CallSite CS = CallSites[CSi].first; Function *Caller = CS.getCaller(); Function *Callee = CS.getCalledFunction(); // If this call site is dead and it is to a readonly function, we should // just delete the call instead of trying to inline it, regardless of // size. This happens because IPSCCP propagates the result out of the // call and then we're left with the dead call. if (isInstructionTriviallyDead(CS.getInstruction())) { DEBUG(dbgs() << " -> Deleting dead call: " << *CS.getInstruction() << "\n"); // Update the call graph by deleting the edge from Callee to Caller. CG[Caller]->removeCallEdgeFor(CS); CS.getInstruction()->eraseFromParent(); ++NumCallsDeleted; // Update the cached cost info with the missing call growCachedCostInfo(Caller, NULL); } else { // We can only inline direct calls to non-declarations. if (Callee == 0 || Callee->isDeclaration()) continue; // If this call site was obtained by inlining another function, verify // that the include path for the function did not include the callee // itself. If so, we'd be recursively inlining the same function, // which would provide the same callsites, which would cause us to // infinitely inline. int InlineHistoryID = CallSites[CSi].second; if (InlineHistoryID != -1 && InlineHistoryIncludes(Callee, InlineHistoryID, InlineHistory)) continue; // If the policy determines that we should inline this function, // try to do so. if (!shouldInline(CS)) continue; // Attempt to inline the function. if (!InlineCallIfPossible(CS, InlineInfo, InlinedArrayAllocas, InlineHistoryID)) continue; ++NumInlined; // If inlining this function gave us any new call sites, throw them // onto our worklist to process. They are useful inline candidates. if (!InlineInfo.InlinedCalls.empty()) { // Create a new inline history entry for this, so that we remember // that these new callsites came about due to inlining Callee. int NewHistoryID = InlineHistory.size(); InlineHistory.push_back(std::make_pair(Callee, InlineHistoryID)); for (unsigned i = 0, e = InlineInfo.InlinedCalls.size(); i != e; ++i) { Value *Ptr = InlineInfo.InlinedCalls[i]; CallSites.push_back(std::make_pair(CallSite(Ptr), NewHistoryID)); } } // Update the cached cost info with the inlined call. growCachedCostInfo(Caller, Callee); } // If we inlined or deleted the last possible call site to the function, // delete the function body now. if (Callee && Callee->use_empty() && Callee->hasLocalLinkage() && // TODO: Can remove if in SCC now. !SCCFunctions.count(Callee) && // The function may be apparently dead, but if there are indirect // callgraph references to the node, we cannot delete it yet, this // could invalidate the CGSCC iterator. CG[Callee]->getNumReferences() == 0) { DEBUG(dbgs() << " -> Deleting dead function: " << Callee->getName() << "\n"); CallGraphNode *CalleeNode = CG[Callee]; // Remove any call graph edges from the callee to its callees. CalleeNode->removeAllCalledFunctions(); resetCachedCostInfo(Callee); // Removing the node for callee from the call graph and delete it. delete CG.removeFunctionFromModule(CalleeNode); ++NumDeleted; } // Remove this call site from the list. If possible, use // swap/pop_back for efficiency, but do not use it if doing so would // move a call site to a function in this SCC before the // 'FirstCallInSCC' barrier. if (SCC.isSingular()) { CallSites[CSi] = CallSites.back(); CallSites.pop_back(); } else { CallSites.erase(CallSites.begin()+CSi); } --CSi; Changed = true; LocalChange = true; } } while (LocalChange); return Changed; }
/// Renames all variables in the specified BasicBlock. /// Only variables that need to be rename will be. /// void SSI::rename(BasicBlock *BB) { SmallPtrSet<Instruction*, 8> defined; // Iterate through instructions and make appropriate renaming. // For SSI_PHI (b = PHI()), store b at value_stack as a new // definition of the variable it represents. // For SSI_SIG (b = PHI(a)), substitute a with the current // value of a, present in the value_stack. // Then store bin the value_stack as the new definition of a. // For all other instructions (b = OP(a, c, d, ...)), we need to substitute // all operands with its current value, present in value_stack. for (BasicBlock::iterator begin = BB->begin(), end = BB->end(); begin != end; ++begin) { Instruction *I = begin; if (PHINode *PN = dyn_cast<PHINode>(I)) { // Treat PHI functions Instruction* position; // Treat SSI_PHI if ((position = getPositionPhi(PN))) { value_stack[position].push_back(PN); defined.insert(position); // Treat SSI_SIG } else if ((position = getPositionSigma(PN))) { substituteUse(I); value_stack[position].push_back(PN); defined.insert(position); } // Treat all other PHI functions else { substituteUse(I); } } // Treat all other functions else { substituteUse(I); } } // This loop iterates in all BasicBlocks that are successors of the current // BasicBlock. For each SSI_PHI instruction found, insert an operand. // This operand is the current operand in value_stack for the variable // in "position". And the BasicBlock this operand represents is the current // BasicBlock. for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI) { BasicBlock *BB_succ = *SI; for (BasicBlock::iterator begin = BB_succ->begin(), notPhi = BB_succ->getFirstNonPHI(); begin != *notPhi; ++begin) { Instruction *I = begin; PHINode *PN = dyn_cast<PHINode>(I); Instruction* position; if (PN && ((position = getPositionPhi(PN)))) { PN->addIncoming(value_stack[position].back(), BB); } } } // This loop calls rename on all children from this block. This time children // refers to a successor block in the dominance tree. DomTreeNode *DTN = DT_->getNode(BB); for (DomTreeNode::iterator begin = DTN->begin(), end = DTN->end(); begin != end; ++begin) { DomTreeNodeBase<BasicBlock> *DTN_children = *begin; BasicBlock *BB_children = DTN_children->getBlock(); rename(BB_children); } // Now we remove all inserted definitions of a variable from the top of // the stack leaving the previous one as the top. for (SmallPtrSet<Instruction*, 8>::iterator DI = defined.begin(), DE = defined.end(); DI != DE; ++DI) value_stack[*DI].pop_back(); }
static bool SimplifyLoopInst(Loop *L, DominatorTree *DT, LoopInfo *LI, AssumptionCache *AC, const TargetLibraryInfo *TLI) { SmallVector<BasicBlock *, 8> ExitBlocks; L->getUniqueExitBlocks(ExitBlocks); array_pod_sort(ExitBlocks.begin(), ExitBlocks.end()); SmallPtrSet<const Instruction *, 8> S1, S2, *ToSimplify = &S1, *Next = &S2; // The bit we are stealing from the pointer represents whether this basic // block is the header of a subloop, in which case we only process its phis. typedef PointerIntPair<BasicBlock *, 1> WorklistItem; SmallVector<WorklistItem, 16> VisitStack; SmallPtrSet<BasicBlock *, 32> Visited; bool Changed = false; bool LocalChanged; do { LocalChanged = false; VisitStack.clear(); Visited.clear(); VisitStack.push_back(WorklistItem(L->getHeader(), false)); while (!VisitStack.empty()) { WorklistItem Item = VisitStack.pop_back_val(); BasicBlock *BB = Item.getPointer(); bool IsSubloopHeader = Item.getInt(); const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); // Simplify instructions in the current basic block. for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) { Instruction *I = &*BI++; // The first time through the loop ToSimplify is empty and we try to // simplify all instructions. On later iterations ToSimplify is not // empty and we only bother simplifying instructions that are in it. if (!ToSimplify->empty() && !ToSimplify->count(I)) continue; // Don't bother simplifying unused instructions. if (!I->use_empty()) { Value *V = SimplifyInstruction(I, DL, TLI, DT, AC); if (V && LI->replacementPreservesLCSSAForm(I, V)) { // Mark all uses for resimplification next time round the loop. for (User *U : I->users()) Next->insert(cast<Instruction>(U)); I->replaceAllUsesWith(V); LocalChanged = true; ++NumSimplified; } } if (RecursivelyDeleteTriviallyDeadInstructions(I, TLI)) { // RecursivelyDeleteTriviallyDeadInstruction can remove more than one // instruction, so simply incrementing the iterator does not work. // When instructions get deleted re-iterate instead. BI = BB->begin(); BE = BB->end(); LocalChanged = true; } if (IsSubloopHeader && !isa<PHINode>(I)) break; } // Add all successors to the worklist, except for loop exit blocks and the // bodies of subloops. We visit the headers of loops so that we can // process // their phis, but we contract the rest of the subloop body and only // follow // edges leading back to the original loop. for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI) { BasicBlock *SuccBB = *SI; if (!Visited.insert(SuccBB).second) continue; const Loop *SuccLoop = LI->getLoopFor(SuccBB); if (SuccLoop && SuccLoop->getHeader() == SuccBB && L->contains(SuccLoop)) { VisitStack.push_back(WorklistItem(SuccBB, true)); SmallVector<BasicBlock *, 8> SubLoopExitBlocks; SuccLoop->getExitBlocks(SubLoopExitBlocks); for (unsigned i = 0; i < SubLoopExitBlocks.size(); ++i) { BasicBlock *ExitBB = SubLoopExitBlocks[i]; if (LI->getLoopFor(ExitBB) == L && Visited.insert(ExitBB).second) VisitStack.push_back(WorklistItem(ExitBB, false)); } continue; } bool IsExitBlock = std::binary_search(ExitBlocks.begin(), ExitBlocks.end(), SuccBB); if (IsExitBlock) continue; VisitStack.push_back(WorklistItem(SuccBB, false)); } } // Place the list of instructions to simplify on the next loop iteration // into ToSimplify. std::swap(ToSimplify, Next); Next->clear(); Changed |= LocalChanged; } while (LocalChanged); return Changed; }
bool LiveVariables::runOnMachineFunction(MachineFunction &mf) { MF = &mf; MRI = &mf.getRegInfo(); TRI = MF->getTarget().getRegisterInfo(); ReservedRegisters = TRI->getReservedRegs(mf); unsigned NumRegs = TRI->getNumRegs(); PhysRegDef = new MachineInstr*[NumRegs]; PhysRegUse = new MachineInstr*[NumRegs]; PHIVarInfo = new SmallVector<unsigned, 4>[MF->getNumBlockIDs()]; std::fill(PhysRegDef, PhysRegDef + NumRegs, (MachineInstr*)0); std::fill(PhysRegUse, PhysRegUse + NumRegs, (MachineInstr*)0); PHIJoins.clear(); // FIXME: LiveIntervals will be updated to remove its dependence on // LiveVariables to improve compilation time and eliminate bizarre pass // dependencies. Until then, we can't change much in -O0. if (!MRI->isSSA()) report_fatal_error("regalloc=... not currently supported with -O0"); analyzePHINodes(mf); // Calculate live variable information in depth first order on the CFG of the // function. This guarantees that we will see the definition of a virtual // register before its uses due to dominance properties of SSA (except for PHI // nodes, which are treated as a special case). MachineBasicBlock *Entry = MF->begin(); SmallPtrSet<MachineBasicBlock*,16> Visited; for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*,16> > DFI = df_ext_begin(Entry, Visited), E = df_ext_end(Entry, Visited); DFI != E; ++DFI) { MachineBasicBlock *MBB = *DFI; // Mark live-in registers as live-in. SmallVector<unsigned, 4> Defs; for (MachineBasicBlock::livein_iterator II = MBB->livein_begin(), EE = MBB->livein_end(); II != EE; ++II) { assert(TargetRegisterInfo::isPhysicalRegister(*II) && "Cannot have a live-in virtual register!"); HandlePhysRegDef(*II, 0, Defs); } // Loop over all of the instructions, processing them. DistanceMap.clear(); unsigned Dist = 0; for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ++I) { MachineInstr *MI = I; if (MI->isDebugValue()) continue; DistanceMap.insert(std::make_pair(MI, Dist++)); // Process all of the operands of the instruction... unsigned NumOperandsToProcess = MI->getNumOperands(); // Unless it is a PHI node. In this case, ONLY process the DEF, not any // of the uses. They will be handled in other basic blocks. if (MI->isPHI()) NumOperandsToProcess = 1; // Clear kill and dead markers. LV will recompute them. SmallVector<unsigned, 4> UseRegs; SmallVector<unsigned, 4> DefRegs; SmallVector<unsigned, 1> RegMasks; for (unsigned i = 0; i != NumOperandsToProcess; ++i) { MachineOperand &MO = MI->getOperand(i); if (MO.isRegMask()) { RegMasks.push_back(i); continue; } if (!MO.isReg() || MO.getReg() == 0) continue; unsigned MOReg = MO.getReg(); if (MO.isUse()) { MO.setIsKill(false); UseRegs.push_back(MOReg); } else /*MO.isDef()*/ { MO.setIsDead(false); DefRegs.push_back(MOReg); } } // Process all uses. for (unsigned i = 0, e = UseRegs.size(); i != e; ++i) { unsigned MOReg = UseRegs[i]; if (TargetRegisterInfo::isVirtualRegister(MOReg)) HandleVirtRegUse(MOReg, MBB, MI); else if (!ReservedRegisters[MOReg]) HandlePhysRegUse(MOReg, MI); } // Process all masked registers. (Call clobbers). for (unsigned i = 0, e = RegMasks.size(); i != e; ++i) HandleRegMask(MI->getOperand(RegMasks[i])); // Process all defs. for (unsigned i = 0, e = DefRegs.size(); i != e; ++i) { unsigned MOReg = DefRegs[i]; if (TargetRegisterInfo::isVirtualRegister(MOReg)) HandleVirtRegDef(MOReg, MI); else if (!ReservedRegisters[MOReg]) HandlePhysRegDef(MOReg, MI, Defs); } UpdatePhysRegDefs(MI, Defs); } // Handle any virtual assignments from PHI nodes which might be at the // bottom of this basic block. We check all of our successor blocks to see // if they have PHI nodes, and if so, we simulate an assignment at the end // of the current block. if (!PHIVarInfo[MBB->getNumber()].empty()) { SmallVector<unsigned, 4>& VarInfoVec = PHIVarInfo[MBB->getNumber()]; for (SmallVector<unsigned, 4>::iterator I = VarInfoVec.begin(), E = VarInfoVec.end(); I != E; ++I) // Mark it alive only in the block we are representing. MarkVirtRegAliveInBlock(getVarInfo(*I),MRI->getVRegDef(*I)->getParent(), MBB); } // Finally, if the last instruction in the block is a return, make sure to // mark it as using all of the live-out values in the function. // Things marked both call and return are tail calls; do not do this for // them. The tail callee need not take the same registers as input // that it produces as output, and there are dependencies for its input // registers elsewhere. if (!MBB->empty() && MBB->back().isReturn() && !MBB->back().isCall()) { MachineInstr *Ret = &MBB->back(); for (MachineRegisterInfo::liveout_iterator I = MF->getRegInfo().liveout_begin(), E = MF->getRegInfo().liveout_end(); I != E; ++I) { assert(TargetRegisterInfo::isPhysicalRegister(*I) && "Cannot have a live-out virtual register!"); HandlePhysRegUse(*I, Ret); // Add live-out registers as implicit uses. if (!Ret->readsRegister(*I)) Ret->addOperand(MachineOperand::CreateReg(*I, false, true)); } } // MachineCSE may CSE instructions which write to non-allocatable physical // registers across MBBs. Remember if any reserved register is liveout. SmallSet<unsigned, 4> LiveOuts; for (MachineBasicBlock::const_succ_iterator SI = MBB->succ_begin(), SE = MBB->succ_end(); SI != SE; ++SI) { MachineBasicBlock *SuccMBB = *SI; if (SuccMBB->isLandingPad()) continue; for (MachineBasicBlock::livein_iterator LI = SuccMBB->livein_begin(), LE = SuccMBB->livein_end(); LI != LE; ++LI) { unsigned LReg = *LI; if (!TRI->isInAllocatableClass(LReg)) // Ignore other live-ins, e.g. those that are live into landing pads. LiveOuts.insert(LReg); } } // Loop over PhysRegDef / PhysRegUse, killing any registers that are // available at the end of the basic block. for (unsigned i = 0; i != NumRegs; ++i) if ((PhysRegDef[i] || PhysRegUse[i]) && !LiveOuts.count(i)) HandlePhysRegDef(i, 0, Defs); std::fill(PhysRegDef, PhysRegDef + NumRegs, (MachineInstr*)0); std::fill(PhysRegUse, PhysRegUse + NumRegs, (MachineInstr*)0); } // Convert and transfer the dead / killed information we have gathered into // VirtRegInfo onto MI's. for (unsigned i = 0, e1 = VirtRegInfo.size(); i != e1; ++i) { const unsigned Reg = TargetRegisterInfo::index2VirtReg(i); for (unsigned j = 0, e2 = VirtRegInfo[Reg].Kills.size(); j != e2; ++j) if (VirtRegInfo[Reg].Kills[j] == MRI->getVRegDef(Reg)) VirtRegInfo[Reg].Kills[j]->addRegisterDead(Reg, TRI); else VirtRegInfo[Reg].Kills[j]->addRegisterKilled(Reg, TRI); } // Check to make sure there are no unreachable blocks in the MC CFG for the // function. If so, it is due to a bug in the instruction selector or some // other part of the code generator if this happens. #ifndef NDEBUG for(MachineFunction::iterator i = MF->begin(), e = MF->end(); i != e; ++i) assert(Visited.count(&*i) != 0 && "unreachable basic block found"); #endif delete[] PhysRegDef; delete[] PhysRegUse; delete[] PHIVarInfo; return false; }
/// \brief Determine which blocks the value is live in. /// /// These are blocks which lead to uses. Knowing this allows us to avoid /// inserting PHI nodes into blocks which don't lead to uses (thus, the /// inserted phi nodes would be dead). void PromoteMem2Reg::ComputeLiveInBlocks( AllocaInst *AI, AllocaInfo &Info, const SmallPtrSet<BasicBlock *, 32> &DefBlocks, SmallPtrSet<BasicBlock *, 32> &LiveInBlocks) { // To determine liveness, we must iterate through the predecessors of blocks // where the def is live. Blocks are added to the worklist if we need to // check their predecessors. Start with all the using blocks. SmallVector<BasicBlock *, 64> LiveInBlockWorklist(Info.UsingBlocks.begin(), Info.UsingBlocks.end()); // If any of the using blocks is also a definition block, check to see if the // definition occurs before or after the use. If it happens before the use, // the value isn't really live-in. for (unsigned i = 0, e = LiveInBlockWorklist.size(); i != e; ++i) { BasicBlock *BB = LiveInBlockWorklist[i]; if (!DefBlocks.count(BB)) continue; // Okay, this is a block that both uses and defines the value. If the first // reference to the alloca is a def (store), then we know it isn't live-in. for (BasicBlock::iterator I = BB->begin();; ++I) { if (StoreInst *SI = dyn_cast<StoreInst>(I)) { if (SI->getOperand(1) != AI) continue; // We found a store to the alloca before a load. The alloca is not // actually live-in here. LiveInBlockWorklist[i] = LiveInBlockWorklist.back(); LiveInBlockWorklist.pop_back(); --i, --e; break; } if (LoadInst *LI = dyn_cast<LoadInst>(I)) { if (LI->getOperand(0) != AI) continue; // Okay, we found a load before a store to the alloca. It is actually // live into this block. break; } } } // Now that we have a set of blocks where the phi is live-in, recursively add // their predecessors until we find the full region the value is live. while (!LiveInBlockWorklist.empty()) { BasicBlock *BB = LiveInBlockWorklist.pop_back_val(); // The block really is live in here, insert it into the set. If already in // the set, then it has already been processed. if (!LiveInBlocks.insert(BB)) continue; // Since the value is live into BB, it is either defined in a predecessor or // live into it to. Add the preds to the worklist unless they are a // defining block. for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { BasicBlock *P = *PI; // The value is not live into a predecessor if it defines the value. if (DefBlocks.count(P)) continue; // Otherwise it is, add to the worklist. LiveInBlockWorklist.push_back(P); } } }
const DominanceFrontier::DomSetType & DominanceFrontier::calculate(const DominatorTree &DT, const DomTreeNode *Node) { BasicBlock *BB = Node->getBlock(); DomSetType *Result = NULL; std::vector<DFCalculateWorkObject> workList; SmallPtrSet<BasicBlock *, 32> visited; workList.push_back(DFCalculateWorkObject(BB, NULL, Node, NULL)); do { DFCalculateWorkObject *currentW = &workList.back(); assert (currentW && "Missing work object."); BasicBlock *currentBB = currentW->currentBB; BasicBlock *parentBB = currentW->parentBB; const DomTreeNode *currentNode = currentW->Node; const DomTreeNode *parentNode = currentW->parentNode; assert (currentBB && "Invalid work object. Missing current Basic Block"); assert (currentNode && "Invalid work object. Missing current Node"); DomSetType &S = Frontiers[currentBB]; // Visit each block only once. if (visited.count(currentBB) == 0) { visited.insert(currentBB); // Loop over CFG successors to calculate DFlocal[currentNode] for (succ_iterator SI = succ_begin(currentBB), SE = succ_end(currentBB); SI != SE; ++SI) { // Does Node immediately dominate this successor? if (DT[*SI]->getIDom() != currentNode) S.insert(*SI); } } // At this point, S is DFlocal. Now we union in DFup's of our children... // Loop through and visit the nodes that Node immediately dominates (Node's // children in the IDomTree) bool visitChild = false; for (DomTreeNode::const_iterator NI = currentNode->begin(), NE = currentNode->end(); NI != NE; ++NI) { DomTreeNode *IDominee = *NI; BasicBlock *childBB = IDominee->getBlock(); if (visited.count(childBB) == 0) { workList.push_back(DFCalculateWorkObject(childBB, currentBB, IDominee, currentNode)); visitChild = true; } } // If all children are visited or there is any child then pop this block // from the workList. if (!visitChild) { if (!parentBB) { Result = &S; break; } DomSetType::const_iterator CDFI = S.begin(), CDFE = S.end(); DomSetType &parentSet = Frontiers[parentBB]; for (; CDFI != CDFE; ++CDFI) { if (!DT.properlyDominates(parentNode, DT[*CDFI])) parentSet.insert(*CDFI); } workList.pop_back(); } } while (!workList.empty()); return *Result; }
/// \brief Recursively traverse the CFG of the function, renaming loads and /// stores to the allocas which we are promoting. /// /// IncomingVals indicates what value each Alloca contains on exit from the /// predecessor block Pred. void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred, RenamePassData::ValVector &IncomingVals, std::vector<RenamePassData> &Worklist) { NextIteration: // If we are inserting any phi nodes into this BB, they will already be in the // block. if (PHINode *APN = dyn_cast<PHINode>(BB->begin())) { // If we have PHI nodes to update, compute the number of edges from Pred to // BB. if (PhiToAllocaMap.count(APN)) { // We want to be able to distinguish between PHI nodes being inserted by // this invocation of mem2reg from those phi nodes that already existed in // the IR before mem2reg was run. We determine that APN is being inserted // because it is missing incoming edges. All other PHI nodes being // inserted by this pass of mem2reg will have the same number of incoming // operands so far. Remember this count. unsigned NewPHINumOperands = APN->getNumOperands(); unsigned NumEdges = std::count(succ_begin(Pred), succ_end(Pred), BB); assert(NumEdges && "Must be at least one edge from Pred to BB!"); // Add entries for all the phis. BasicBlock::iterator PNI = BB->begin(); do { unsigned AllocaNo = PhiToAllocaMap[APN]; // Add N incoming values to the PHI node. for (unsigned i = 0; i != NumEdges; ++i) APN->addIncoming(IncomingVals[AllocaNo], Pred); // The currently active variable for this block is now the PHI. IncomingVals[AllocaNo] = APN; // Get the next phi node. ++PNI; APN = dyn_cast<PHINode>(PNI); if (APN == 0) break; // Verify that it is missing entries. If not, it is not being inserted // by this mem2reg invocation so we want to ignore it. } while (APN->getNumOperands() == NewPHINumOperands); } } // Don't revisit blocks. if (!Visited.insert(BB)) return; for (BasicBlock::iterator II = BB->begin(); !isa<TerminatorInst>(II);) { Instruction *I = II++; // get the instruction, increment iterator if (LoadInst *LI = dyn_cast<LoadInst>(I)) { AllocaInst *Src = dyn_cast<AllocaInst>(LI->getPointerOperand()); if (!Src) continue; DenseMap<AllocaInst *, unsigned>::iterator AI = AllocaLookup.find(Src); if (AI == AllocaLookup.end()) continue; Value *V = IncomingVals[AI->second]; // Anything using the load now uses the current value. LI->replaceAllUsesWith(V); if (AST && LI->getType()->isPointerTy()) AST->deleteValue(LI); BB->getInstList().erase(LI); } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { // Delete this instruction and mark the name as the current holder of the // value AllocaInst *Dest = dyn_cast<AllocaInst>(SI->getPointerOperand()); if (!Dest) continue; DenseMap<AllocaInst *, unsigned>::iterator ai = AllocaLookup.find(Dest); if (ai == AllocaLookup.end()) continue; // what value were we writing? IncomingVals[ai->second] = SI->getOperand(0); // Record debuginfo for the store before removing it. if (DbgDeclareInst *DDI = AllocaDbgDeclares[ai->second]) ConvertDebugDeclareToDebugValue(DDI, SI, DIB); BB->getInstList().erase(SI); } } // 'Recurse' to our successors. succ_iterator I = succ_begin(BB), E = succ_end(BB); if (I == E) return; // Keep track of the successors so we don't visit the same successor twice SmallPtrSet<BasicBlock *, 8> VisitedSuccs; // Handle the first successor without using the worklist. VisitedSuccs.insert(*I); Pred = BB; BB = *I; ++I; for (; I != E; ++I) if (VisitedSuccs.insert(*I)) Worklist.push_back(RenamePassData(*I, Pred, IncomingVals)); goto NextIteration; }
/// PromoteAliasSet - Try to promote memory values to scalars by sinking /// stores out of the loop and moving loads to before the loop. We do this by /// looping over the stores in the loop, looking for stores to Must pointers /// which are loop invariant. /// void LICM::PromoteAliasSet(AliasSet &AS) { // We can promote this alias set if it has a store, if it is a "Must" alias // set, if the pointer is loop invariant, and if we are not eliminating any // volatile loads or stores. if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() || AS.isVolatile() || !CurLoop->isLoopInvariant(AS.begin()->getValue())) return; assert(!AS.empty() && "Must alias set should have at least one pointer element in it!"); Value *SomePtr = AS.begin()->getValue(); // It isn't safe to promote a load/store from the loop if the load/store is // conditional. For example, turning: // // for () { if (c) *P += 1; } // // into: // // tmp = *P; for () { if (c) tmp +=1; } *P = tmp; // // is not safe, because *P may only be valid to access if 'c' is true. // // It is safe to promote P if all uses are direct load/stores and if at // least one is guaranteed to be executed. bool GuaranteedToExecute = false; SmallVector<Instruction*, 64> LoopUses; SmallPtrSet<Value*, 4> PointerMustAliases; // Check that all of the pointers in the alias set have the same type. We // cannot (yet) promote a memory location that is loaded and stored in // different sizes. for (AliasSet::iterator ASI = AS.begin(), E = AS.end(); ASI != E; ++ASI) { Value *ASIV = ASI->getValue(); PointerMustAliases.insert(ASIV); // Check that all of the pointers in the alias set have the same type. We // cannot (yet) promote a memory location that is loaded and stored in // different sizes. if (SomePtr->getType() != ASIV->getType()) return; for (Value::use_iterator UI = ASIV->use_begin(), UE = ASIV->use_end(); UI != UE; ++UI) { // Ignore instructions that are outside the loop. Instruction *Use = dyn_cast<Instruction>(*UI); if (!Use || !CurLoop->contains(Use)) continue; // If there is an non-load/store instruction in the loop, we can't promote // it. if (isa<LoadInst>(Use)) assert(!cast<LoadInst>(Use)->isVolatile() && "AST broken"); else if (isa<StoreInst>(Use)) { assert(!cast<StoreInst>(Use)->isVolatile() && "AST broken"); if (Use->getOperand(0) == ASIV) return; } else return; // Not a load or store. if (!GuaranteedToExecute) GuaranteedToExecute = isSafeToExecuteUnconditionally(*Use); LoopUses.push_back(Use); } } // If there isn't a guaranteed-to-execute instruction, we can't promote. if (!GuaranteedToExecute) return; // Otherwise, this is safe to promote, lets do it! DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " <<*SomePtr<<'\n'); Changed = true; ++NumPromoted; // We use the SSAUpdater interface to insert phi nodes as required. SmallVector<PHINode*, 16> NewPHIs; SSAUpdater SSA(&NewPHIs); // It wants to know some value of the same type as what we'll be inserting. Value *SomeValue; if (isa<LoadInst>(LoopUses[0])) SomeValue = LoopUses[0]; else SomeValue = cast<StoreInst>(LoopUses[0])->getOperand(0); SSA.Initialize(SomeValue->getType(), SomeValue->getName()); // First step: bucket up uses of the pointers by the block they occur in. // This is important because we have to handle multiple defs/uses in a block // ourselves: SSAUpdater is purely for cross-block references. // FIXME: Want a TinyVector<Instruction*> since there is usually 0/1 element. DenseMap<BasicBlock*, std::vector<Instruction*> > UsesByBlock; for (unsigned i = 0, e = LoopUses.size(); i != e; ++i) { Instruction *User = LoopUses[i]; UsesByBlock[User->getParent()].push_back(User); } // Okay, now we can iterate over all the blocks in the loop with uses, // processing them. Keep track of which loads are loading a live-in value. SmallVector<LoadInst*, 32> LiveInLoads; DenseMap<Value*, Value*> ReplacedLoads; for (unsigned LoopUse = 0, e = LoopUses.size(); LoopUse != e; ++LoopUse) { Instruction *User = LoopUses[LoopUse]; std::vector<Instruction*> &BlockUses = UsesByBlock[User->getParent()]; // If this block has already been processed, ignore this repeat use. if (BlockUses.empty()) continue; // Okay, this is the first use in the block. If this block just has a // single user in it, we can rewrite it trivially. if (BlockUses.size() == 1) { // If it is a store, it is a trivial def of the value in the block. if (isa<StoreInst>(User)) { SSA.AddAvailableValue(User->getParent(), cast<StoreInst>(User)->getOperand(0)); } else { // Otherwise it is a load, queue it to rewrite as a live-in load. LiveInLoads.push_back(cast<LoadInst>(User)); } BlockUses.clear(); continue; } // Otherwise, check to see if this block is all loads. If so, we can queue // them all as live in loads. bool HasStore = false; for (unsigned i = 0, e = BlockUses.size(); i != e; ++i) { if (isa<StoreInst>(BlockUses[i])) { HasStore = true; break; } } if (!HasStore) { for (unsigned i = 0, e = BlockUses.size(); i != e; ++i) LiveInLoads.push_back(cast<LoadInst>(BlockUses[i])); BlockUses.clear(); continue; } // Otherwise, we have mixed loads and stores (or just a bunch of stores). // Since SSAUpdater is purely for cross-block values, we need to determine // the order of these instructions in the block. If the first use in the // block is a load, then it uses the live in value. The last store defines // the live out value. We handle this by doing a linear scan of the block. BasicBlock *BB = User->getParent(); Value *StoredValue = 0; for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) { if (LoadInst *L = dyn_cast<LoadInst>(II)) { // If this is a load from an unrelated pointer, ignore it. if (!PointerMustAliases.count(L->getOperand(0))) continue; // If we haven't seen a store yet, this is a live in use, otherwise // use the stored value. if (StoredValue) { L->replaceAllUsesWith(StoredValue); ReplacedLoads[L] = StoredValue; } else { LiveInLoads.push_back(L); } continue; } if (StoreInst *S = dyn_cast<StoreInst>(II)) { // If this is a store to an unrelated pointer, ignore it. if (!PointerMustAliases.count(S->getOperand(1))) continue; // Remember that this is the active value in the block. StoredValue = S->getOperand(0); } } // The last stored value that happened is the live-out for the block. assert(StoredValue && "Already checked that there is a store in block"); SSA.AddAvailableValue(BB, StoredValue); BlockUses.clear(); } // Now that all the intra-loop values are classified, set up the preheader. // It gets a load of the pointer we're promoting, and it is the live-out value // from the preheader. LoadInst *PreheaderLoad = new LoadInst(SomePtr,SomePtr->getName()+".promoted", Preheader->getTerminator()); SSA.AddAvailableValue(Preheader, PreheaderLoad); // Now that the preheader is good to go, set up the exit blocks. Each exit // block gets a store of the live-out values that feed them. Since we've // already told the SSA updater about the defs in the loop and the preheader // definition, it is all set and we can start using it. SmallVector<BasicBlock*, 8> ExitBlocks; CurLoop->getUniqueExitBlocks(ExitBlocks); for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { BasicBlock *ExitBlock = ExitBlocks[i]; Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock); Instruction *InsertPos = ExitBlock->getFirstNonPHI(); new StoreInst(LiveInValue, SomePtr, InsertPos); } // Okay, now we rewrite all loads that use live-in values in the loop, // inserting PHI nodes as necessary. for (unsigned i = 0, e = LiveInLoads.size(); i != e; ++i) { LoadInst *ALoad = LiveInLoads[i]; Value *NewVal = SSA.GetValueInMiddleOfBlock(ALoad->getParent()); ALoad->replaceAllUsesWith(NewVal); CurAST->copyValue(ALoad, NewVal); ReplacedLoads[ALoad] = NewVal; } // If the preheader load is itself a pointer, we need to tell alias analysis // about the new pointer we created in the preheader block and about any PHI // nodes that just got inserted. if (PreheaderLoad->getType()->isPointerTy()) { // Copy any value stored to or loaded from a must-alias of the pointer. CurAST->copyValue(SomeValue, PreheaderLoad); for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i) CurAST->copyValue(SomeValue, NewPHIs[i]); } // Now that everything is rewritten, delete the old instructions from the body // of the loop. They should all be dead now. for (unsigned i = 0, e = LoopUses.size(); i != e; ++i) { Instruction *User = LoopUses[i]; // If this is a load that still has uses, then the load must have been added // as a live value in the SSAUpdate data structure for a block (e.g. because // the loaded value was stored later). In this case, we need to recursively // propagate the updates until we get to the real value. if (!User->use_empty()) { Value *NewVal = ReplacedLoads[User]; assert(NewVal && "not a replaced load?"); // Propagate down to the ultimate replacee. The intermediately loads // could theoretically already have been deleted, so we don't want to // dereference the Value*'s. DenseMap<Value*, Value*>::iterator RLI = ReplacedLoads.find(NewVal); while (RLI != ReplacedLoads.end()) { NewVal = RLI->second; RLI = ReplacedLoads.find(NewVal); } User->replaceAllUsesWith(NewVal); CurAST->copyValue(User, NewVal); } CurAST->deleteValue(User); User->eraseFromParent(); } // fwew, we're done! }
bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "********** PEEPHOLE OPTIMIZER **********\n"); DEBUG(dbgs() << "********** Function: " << MF.getName() << '\n'); if (DisablePeephole) return false; TM = &MF.getTarget(); TII = TM->getInstrInfo(); MRI = &MF.getRegInfo(); DT = Aggressive ? &getAnalysis<MachineDominatorTree>() : 0; bool Changed = false; SmallPtrSet<MachineInstr*, 8> LocalMIs; SmallSet<unsigned, 4> ImmDefRegs; DenseMap<unsigned, MachineInstr*> ImmDefMIs; unsigned FoldAsLoadDefReg; for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { MachineBasicBlock *MBB = &*I; bool SeenMoveImm = false; LocalMIs.clear(); ImmDefRegs.clear(); ImmDefMIs.clear(); FoldAsLoadDefReg = 0; for (MachineBasicBlock::iterator MII = I->begin(), MIE = I->end(); MII != MIE; ) { MachineInstr *MI = &*MII; // We may be erasing MI below, increment MII now. ++MII; LocalMIs.insert(MI); // If there exists an instruction which belongs to the following // categories, we will discard the load candidate. if (MI->isLabel() || MI->isPHI() || MI->isImplicitDef() || MI->isKill() || MI->isInlineAsm() || MI->isDebugValue() || MI->hasUnmodeledSideEffects()) { FoldAsLoadDefReg = 0; continue; } if (MI->mayStore() || MI->isCall()) FoldAsLoadDefReg = 0; if (((MI->isBitcast() || MI->isCopy()) && optimizeCopyOrBitcast(MI)) || (MI->isCompare() && optimizeCmpInstr(MI, MBB)) || (MI->isSelect() && optimizeSelect(MI))) { // MI is deleted. LocalMIs.erase(MI); Changed = true; continue; } if (isMoveImmediate(MI, ImmDefRegs, ImmDefMIs)) { SeenMoveImm = true; } else { Changed |= optimizeExtInstr(MI, MBB, LocalMIs); // optimizeExtInstr might have created new instructions after MI // and before the already incremented MII. Adjust MII so that the // next iteration sees the new instructions. MII = MI; ++MII; if (SeenMoveImm) Changed |= foldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs); } // Check whether MI is a load candidate for folding into a later // instruction. If MI is not a candidate, check whether we can fold an // earlier load into MI. if (!isLoadFoldable(MI, FoldAsLoadDefReg) && FoldAsLoadDefReg) { // We need to fold load after optimizeCmpInstr, since optimizeCmpInstr // can enable folding by converting SUB to CMP. MachineInstr *DefMI = 0; MachineInstr *FoldMI = TII->optimizeLoadInstr(MI, MRI, FoldAsLoadDefReg, DefMI); if (FoldMI) { // Update LocalMIs since we replaced MI with FoldMI and deleted DefMI. DEBUG(dbgs() << "Replacing: " << *MI); DEBUG(dbgs() << " With: " << *FoldMI); LocalMIs.erase(MI); LocalMIs.erase(DefMI); LocalMIs.insert(FoldMI); MI->eraseFromParent(); DefMI->eraseFromParent(); ++NumLoadFold; // MI is replaced with FoldMI. Changed = true; continue; } } } } return Changed; }
/// nonLocalHelper - Private helper used to calculate non-local dependencies /// by doing DFS on the predecessors of a block to find its dependencies void MemoryDependenceAnalysis::nonLocalHelper(Instruction* query, BasicBlock* block, DenseMap<BasicBlock*, Value*>& resp) { // Set of blocks that we've already visited in our DFS SmallPtrSet<BasicBlock*, 4> visited; // If we're updating a dirtied cache entry, we don't need to reprocess // already computed entries. for (DenseMap<BasicBlock*, Value*>::iterator I = resp.begin(), E = resp.end(); I != E; ++I) if (I->second != Dirty) visited.insert(I->first); // Current stack of the DFS SmallVector<BasicBlock*, 4> stack; for (pred_iterator PI = pred_begin(block), PE = pred_end(block); PI != PE; ++PI) stack.push_back(*PI); // Do a basic DFS while (!stack.empty()) { BasicBlock* BB = stack.back(); // If we've already visited this block, no need to revist if (visited.count(BB)) { stack.pop_back(); continue; } // If we find a new block with a local dependency for query, // then we insert the new dependency and backtrack. if (BB != block) { visited.insert(BB); Instruction* localDep = getDependency(query, 0, BB); if (localDep != NonLocal) { resp.insert(std::make_pair(BB, localDep)); stack.pop_back(); continue; } // If we re-encounter the starting block, we still need to search it // because there might be a dependency in the starting block AFTER // the position of the query. This is necessary to get loops right. } else if (BB == block) { visited.insert(BB); Instruction* localDep = getDependency(query, 0, BB); if (localDep != query) resp.insert(std::make_pair(BB, localDep)); stack.pop_back(); continue; } // If we didn't find anything, recurse on the precessors of this block // Only do this for blocks with a small number of predecessors. bool predOnStack = false; bool inserted = false; if (std::distance(pred_begin(BB), pred_end(BB)) <= PredLimit) { for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) if (!visited.count(*PI)) { stack.push_back(*PI); inserted = true; } else predOnStack = true; } // If we inserted a new predecessor, then we'll come back to this block if (inserted) continue; // If we didn't insert because we have no predecessors, then this // query has no dependency at all. else if (!inserted && !predOnStack) { resp.insert(std::make_pair(BB, None)); // If we didn't insert because our predecessors are already on the stack, // then we might still have a dependency, but it will be discovered during // backtracking. } else if (!inserted && predOnStack){ resp.insert(std::make_pair(BB, NonLocal)); } stack.pop_back(); } }
/// SplitCriticalEdge - If this edge is a critical edge, insert a new node to /// split the critical edge. This will update DominatorTree information if it /// is available, thus calling this pass will not invalidate either of them. /// This returns the new block if the edge was split, null otherwise. /// /// If MergeIdenticalEdges is true (not the default), *all* edges from TI to the /// specified successor will be merged into the same critical edge block. /// This is most commonly interesting with switch instructions, which may /// have many edges to any one destination. This ensures that all edges to that /// dest go to one block instead of each going to a different block, but isn't /// the standard definition of a "critical edge". /// /// It is invalid to call this function on a critical edge that starts at an /// IndirectBrInst. Splitting these edges will almost always create an invalid /// program because the address of the new block won't be the one that is jumped /// to. /// BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, Pass *P, bool MergeIdenticalEdges) { if (!isCriticalEdge(TI, SuccNum, MergeIdenticalEdges)) return 0; assert(!isa<IndirectBrInst>(TI) && "Cannot split critical edge from IndirectBrInst"); BasicBlock *TIBB = TI->getParent(); BasicBlock *DestBB = TI->getSuccessor(SuccNum); // Create a new basic block, linking it into the CFG. BasicBlock *NewBB = BasicBlock::Create(TI->getContext(), TIBB->getName() + "." + DestBB->getName() + "_crit_edge"); // Create our unconditional branch. BranchInst *NewBI = BranchInst::Create(DestBB, NewBB); NewBI->setDebugLoc(TI->getDebugLoc()); // Branch to the new block, breaking the edge. TI->setSuccessor(SuccNum, NewBB); // Insert the block into the function... right after the block TI lives in. Function &F = *TIBB->getParent(); Function::iterator FBBI = TIBB; F.getBasicBlockList().insert(++FBBI, NewBB); // If there are any PHI nodes in DestBB, we need to update them so that they // merge incoming values from NewBB instead of from TIBB. if (PHINode *APHI = dyn_cast<PHINode>(DestBB->begin())) { // This conceptually does: // foreach (PHINode *PN in DestBB) // PN->setIncomingBlock(PN->getIncomingBlock(TIBB), NewBB); // but is optimized for two cases. if (APHI->getNumIncomingValues() <= 8) { // Small # preds case. unsigned BBIdx = 0; for (BasicBlock::iterator I = DestBB->begin(); isa<PHINode>(I); ++I) { // We no longer enter through TIBB, now we come in through NewBB. // Revector exactly one entry in the PHI node that used to come from // TIBB to come from NewBB. PHINode *PN = cast<PHINode>(I); // Reuse the previous value of BBIdx if it lines up. In cases where we // have multiple phi nodes with *lots* of predecessors, this is a speed // win because we don't have to scan the PHI looking for TIBB. This // happens because the BB list of PHI nodes are usually in the same // order. if (PN->getIncomingBlock(BBIdx) != TIBB) BBIdx = PN->getBasicBlockIndex(TIBB); PN->setIncomingBlock(BBIdx, NewBB); } } else { // However, the foreach loop is slow for blocks with lots of predecessors // because PHINode::getIncomingBlock is O(n) in # preds. Instead, walk // the user list of TIBB to find the PHI nodes. SmallPtrSet<PHINode*, 16> UpdatedPHIs; for (Value::use_iterator UI = TIBB->use_begin(), E = TIBB->use_end(); UI != E; ) { Value::use_iterator Use = UI++; if (PHINode *PN = dyn_cast<PHINode>(*Use)) { // Remove one entry from each PHI. if (PN->getParent() == DestBB && UpdatedPHIs.insert(PN)) PN->setOperand(Use.getOperandNo(), NewBB); } } } } // If there are any other edges from TIBB to DestBB, update those to go // through the split block, making those edges non-critical as well (and // reducing the number of phi entries in the DestBB if relevant). if (MergeIdenticalEdges) { for (unsigned i = SuccNum+1, e = TI->getNumSuccessors(); i != e; ++i) { if (TI->getSuccessor(i) != DestBB) continue; // Remove an entry for TIBB from DestBB phi nodes. DestBB->removePredecessor(TIBB); // We found another edge to DestBB, go to NewBB instead. TI->setSuccessor(i, NewBB); } } // If we don't have a pass object, we can't update anything... if (P == 0) return NewBB; DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>(); LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>(); ProfileInfo *PI = P->getAnalysisIfAvailable<ProfileInfo>(); // If we have nothing to update, just return. if (DT == 0 && LI == 0 && PI == 0) return NewBB; // Now update analysis information. Since the only predecessor of NewBB is // the TIBB, TIBB clearly dominates NewBB. TIBB usually doesn't dominate // anything, as there are other successors of DestBB. However, if all other // predecessors of DestBB are already dominated by DestBB (e.g. DestBB is a // loop header) then NewBB dominates DestBB. SmallVector<BasicBlock*, 8> OtherPreds; // If there is a PHI in the block, loop over predecessors with it, which is // faster than iterating pred_begin/end. if (PHINode *PN = dyn_cast<PHINode>(DestBB->begin())) { for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) if (PN->getIncomingBlock(i) != NewBB) OtherPreds.push_back(PN->getIncomingBlock(i)); } else { for (pred_iterator I = pred_begin(DestBB), E = pred_end(DestBB); I != E; ++I) { BasicBlock *P = *I; if (P != NewBB) OtherPreds.push_back(P); } } bool NewBBDominatesDestBB = true; // Should we update DominatorTree information? if (DT) { DomTreeNode *TINode = DT->getNode(TIBB); // The new block is not the immediate dominator for any other nodes, but // TINode is the immediate dominator for the new node. // if (TINode) { // Don't break unreachable code! DomTreeNode *NewBBNode = DT->addNewBlock(NewBB, TIBB); DomTreeNode *DestBBNode = 0; // If NewBBDominatesDestBB hasn't been computed yet, do so with DT. if (!OtherPreds.empty()) { DestBBNode = DT->getNode(DestBB); while (!OtherPreds.empty() && NewBBDominatesDestBB) { if (DomTreeNode *OPNode = DT->getNode(OtherPreds.back())) NewBBDominatesDestBB = DT->dominates(DestBBNode, OPNode); OtherPreds.pop_back(); } OtherPreds.clear(); } // If NewBBDominatesDestBB, then NewBB dominates DestBB, otherwise it // doesn't dominate anything. if (NewBBDominatesDestBB) { if (!DestBBNode) DestBBNode = DT->getNode(DestBB); DT->changeImmediateDominator(DestBBNode, NewBBNode); } } } // Update LoopInfo if it is around. if (LI) { if (Loop *TIL = LI->getLoopFor(TIBB)) { // If one or the other blocks were not in a loop, the new block is not // either, and thus LI doesn't need to be updated. if (Loop *DestLoop = LI->getLoopFor(DestBB)) { if (TIL == DestLoop) { // Both in the same loop, the NewBB joins loop. DestLoop->addBasicBlockToLoop(NewBB, LI->getBase()); } else if (TIL->contains(DestLoop)) { // Edge from an outer loop to an inner loop. Add to the outer loop. TIL->addBasicBlockToLoop(NewBB, LI->getBase()); } else if (DestLoop->contains(TIL)) { // Edge from an inner loop to an outer loop. Add to the outer loop. DestLoop->addBasicBlockToLoop(NewBB, LI->getBase()); } else { // Edge from two loops with no containment relation. Because these // are natural loops, we know that the destination block must be the // header of its loop (adding a branch into a loop elsewhere would // create an irreducible loop). assert(DestLoop->getHeader() == DestBB && "Should not create irreducible loops!"); if (Loop *P = DestLoop->getParentLoop()) P->addBasicBlockToLoop(NewBB, LI->getBase()); } } // If TIBB is in a loop and DestBB is outside of that loop, split the // other exit blocks of the loop that also have predecessors outside // the loop, to maintain a LoopSimplify guarantee. if (!TIL->contains(DestBB) && P->mustPreserveAnalysisID(LoopSimplifyID)) { assert(!TIL->contains(NewBB) && "Split point for loop exit is contained in loop!"); // Update LCSSA form in the newly created exit block. if (P->mustPreserveAnalysisID(LCSSAID)) { SmallVector<BasicBlock *, 1> OrigPred; OrigPred.push_back(TIBB); CreatePHIsForSplitLoopExit(OrigPred, NewBB, DestBB); } // For each unique exit block... SmallVector<BasicBlock *, 4> ExitBlocks; TIL->getExitBlocks(ExitBlocks); for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { // Collect all the preds that are inside the loop, and note // whether there are any preds outside the loop. SmallVector<BasicBlock *, 4> Preds; bool HasPredOutsideOfLoop = false; BasicBlock *Exit = ExitBlocks[i]; for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit); I != E; ++I) { BasicBlock *P = *I; if (TIL->contains(P)) Preds.push_back(P); else HasPredOutsideOfLoop = true; } // If there are any preds not in the loop, we'll need to split // the edges. The Preds.empty() check is needed because a block // may appear multiple times in the list. We can't use // getUniqueExitBlocks above because that depends on LoopSimplify // form, which we're in the process of restoring! if (!Preds.empty() && HasPredOutsideOfLoop) { BasicBlock *NewExitBB = SplitBlockPredecessors(Exit, Preds.data(), Preds.size(), "split", P); if (P->mustPreserveAnalysisID(LCSSAID)) CreatePHIsForSplitLoopExit(Preds, NewExitBB, Exit); } } } // LCSSA form was updated above for the case where LoopSimplify is // available, which means that all predecessors of loop exit blocks // are within the loop. Without LoopSimplify form, it would be // necessary to insert a new phi. assert((!P->mustPreserveAnalysisID(LCSSAID) || P->mustPreserveAnalysisID(LoopSimplifyID)) && "SplitCriticalEdge doesn't know how to update LCCSA form " "without LoopSimplify!"); } } // Update ProfileInfo if it is around. if (PI) PI->splitEdge(TIBB, DestBB, NewBB, MergeIdenticalEdges); return NewBB; }
bool TypeChecker::validateGenericFuncSignature(AbstractFunctionDecl *func) { bool invalid = false; // Create the archetype builder. ArchetypeBuilder builder = createArchetypeBuilder(func->getParentModule()); // Type check the function declaration, treating all generic type // parameters as dependent, unresolved. DependentGenericTypeResolver dependentResolver(builder); if (checkGenericFuncSignature(*this, &builder, func, dependentResolver)) invalid = true; // If this triggered a recursive validation, back out: we're done. // FIXME: This is an awful hack. if (func->hasType()) return !func->isInvalid(); // Finalize the generic requirements. (void)builder.finalize(func->getLoc()); // The archetype builder now has all of the requirements, although there might // still be errors that have not yet been diagnosed. Revert the generic // function signature and type-check it again, completely. revertGenericFuncSignature(func); CompleteGenericTypeResolver completeResolver(*this, builder); if (checkGenericFuncSignature(*this, nullptr, func, completeResolver)) invalid = true; // The generic function signature is complete and well-formed. Determine // the type of the generic function. // Collect the complete set of generic parameter types. SmallVector<GenericTypeParamType *, 4> allGenericParams; collectGenericParamTypes(func->getGenericParams(), func->getDeclContext()->getGenericSignatureOfContext(), allGenericParams); auto sig = builder.getGenericSignature(allGenericParams); // Debugging of the archetype builder and generic signature generation. if (sig && Context.LangOpts.DebugGenericSignatures) { func->dumpRef(llvm::errs()); llvm::errs() << "\n"; builder.dump(llvm::errs()); llvm::errs() << "Generic signature: "; sig->print(llvm::errs()); llvm::errs() << "\n"; llvm::errs() << "Canonical generic signature: "; sig->getCanonicalSignature()->print(llvm::errs()); llvm::errs() << "\n"; llvm::errs() << "Canonical generic signature for mangling: "; sig->getCanonicalManglingSignature(*func->getParentModule()) ->print(llvm::errs()); llvm::errs() << "\n"; } func->setGenericSignature(sig); if (invalid) { func->overwriteType(ErrorType::get(Context)); return true; } // Compute the function type. Type funcTy; Type initFuncTy; if (auto fn = dyn_cast<FuncDecl>(func)) { funcTy = fn->getBodyResultTypeLoc().getType(); if (!funcTy) { funcTy = TupleType::getEmpty(Context); } else { funcTy = getResultType(*this, fn, funcTy); } } else if (auto ctor = dyn_cast<ConstructorDecl>(func)) { // FIXME: shouldn't this just be // ctor->getDeclContext()->getDeclaredInterfaceType()? if (ctor->getDeclContext()->getAsProtocolOrProtocolExtensionContext()) { funcTy = ctor->getDeclContext()->getProtocolSelf()->getDeclaredType(); } else { funcTy = ctor->getExtensionType()->getAnyNominal() ->getDeclaredInterfaceType(); } // Adjust result type for failability. if (ctor->getFailability() != OTK_None) funcTy = OptionalType::get(ctor->getFailability(), funcTy); initFuncTy = funcTy; } else { assert(isa<DestructorDecl>(func)); funcTy = TupleType::getEmpty(Context); } auto paramLists = func->getParameterLists(); SmallVector<ParameterList*, 4> storedParamLists; // FIXME: Destructors don't have the '()' pattern in their signature, so // paste it here. if (isa<DestructorDecl>(func)) { assert(paramLists.size() == 1 && "Only the self paramlist"); storedParamLists.push_back(paramLists[0]); storedParamLists.push_back(ParameterList::createEmpty(Context)); paramLists = storedParamLists; } bool hasSelf = func->getDeclContext()->isTypeContext(); for (unsigned i = 0, e = paramLists.size(); i != e; ++i) { Type argTy; Type initArgTy; Type selfTy; if (i == e-1 && hasSelf) { selfTy = func->computeInterfaceSelfType(/*isInitializingCtor=*/false); // Substitute in our own 'self' parameter. argTy = selfTy; if (initFuncTy) { initArgTy = func->computeInterfaceSelfType(/*isInitializingCtor=*/true); } } else { argTy = paramLists[e - i - 1]->getType(Context); // For an implicit declaration, our argument type will be in terms of // archetypes rather than dependent types. Replace the // archetypes with their corresponding dependent types. if (func->isImplicit()) { argTy = ArchetypeBuilder::mapTypeOutOfContext(func, argTy); } if (initFuncTy) initArgTy = argTy; } auto info = applyFunctionTypeAttributes(func, i); // FIXME: We shouldn't even get here if the function isn't locally generic // to begin with, but fixing that requires a lot of reengineering for local // definitions in generic contexts. if (sig && i == e-1) { if (func->getGenericParams()) { // Collect all generic params referenced in parameter types, // return type or requirements. SmallPtrSet<GenericTypeParamDecl *, 4> referencedGenericParams; argTy.visit([&referencedGenericParams](Type t) { if (isa<GenericTypeParamType>(t.getCanonicalTypeOrNull())) { referencedGenericParams.insert( t->castTo<GenericTypeParamType>()->getDecl()); } }); funcTy.visit([&referencedGenericParams](Type t) { if (isa<GenericTypeParamType>(t.getCanonicalTypeOrNull())) { referencedGenericParams.insert( t->castTo<GenericTypeParamType>()->getDecl()); } }); auto requirements = sig->getRequirements(); for (auto req : requirements) { if (req.getKind() == RequirementKind::SameType) { // Same type requirements may allow for generic // inference, even if this generic parameter // is not mentioned in the function signature. // TODO: Make the test more precise. auto left = req.getFirstType(); auto right = req.getSecondType(); // For now consider any references inside requirements // as a possibility to infer the generic type. left.visit([&referencedGenericParams](Type t) { if (isa<GenericTypeParamType>(t.getCanonicalTypeOrNull())) { referencedGenericParams.insert( t->castTo<GenericTypeParamType>()->getDecl()); } }); right.visit([&referencedGenericParams](Type t) { if (isa<GenericTypeParamType>(t.getCanonicalTypeOrNull())) { referencedGenericParams.insert( t->castTo<GenericTypeParamType>()->getDecl()); } }); } } // Find the depth of the function's own generic parameters. unsigned fnGenericParamsDepth = func->getGenericParams()->getDepth(); // Check that every generic parameter type from the signature is // among referencedArchetypes. for (auto *genParam : sig->getGenericParams()) { auto *paramDecl = genParam->getDecl(); if (paramDecl->getDepth() != fnGenericParamsDepth) continue; if (!referencedGenericParams.count(paramDecl)) { // Produce an error that this generic parameter cannot be bound. diagnose(paramDecl->getLoc(), diag::unreferenced_generic_parameter, paramDecl->getNameStr()); func->setInvalid(); } } } funcTy = GenericFunctionType::get(sig, argTy, funcTy, info); if (initFuncTy) initFuncTy = GenericFunctionType::get(sig, initArgTy, initFuncTy, info); } else { funcTy = FunctionType::get(argTy, funcTy, info); if (initFuncTy) initFuncTy = FunctionType::get(initArgTy, initFuncTy, info); } } // Record the interface type. func->setInterfaceType(funcTy); if (initFuncTy) cast<ConstructorDecl>(func)->setInitializerInterfaceType(initFuncTy); return false; }
void PromoteMem2Reg::run() { Function &F = *DT.getRoot()->getParent(); if (AST) PointerAllocaValues.resize(Allocas.size()); AllocaDbgDeclares.resize(Allocas.size()); AllocaInfo Info; LargeBlockInfo LBI; IDFCalculator IDF(DT); for (unsigned AllocaNum = 0; AllocaNum != Allocas.size(); ++AllocaNum) { AllocaInst *AI = Allocas[AllocaNum]; assert(isAllocaPromotable(AI) && "Cannot promote non-promotable alloca!"); assert(AI->getParent()->getParent() == &F && "All allocas should be in the same function, which is same as DF!"); removeLifetimeIntrinsicUsers(AI); if (AI->use_empty()) { // If there are no uses of the alloca, just delete it now. if (AST) AST->deleteValue(AI); AI->eraseFromParent(); // Remove the alloca from the Allocas list, since it has been processed RemoveFromAllocasList(AllocaNum); ++NumDeadAlloca; continue; } // Calculate the set of read and write-locations for each alloca. This is // analogous to finding the 'uses' and 'definitions' of each variable. Info.AnalyzeAlloca(AI); // If there is only a single store to this value, replace any loads of // it that are directly dominated by the definition with the value stored. if (Info.DefiningBlocks.size() == 1) { if (rewriteSingleStoreAlloca(AI, Info, LBI, DT, AST)) { // The alloca has been processed, move on. RemoveFromAllocasList(AllocaNum); ++NumSingleStore; continue; } } // If the alloca is only read and written in one basic block, just perform a // linear sweep over the block to eliminate it. if (Info.OnlyUsedInOneBlock && promoteSingleBlockAlloca(AI, Info, LBI, AST)) { // The alloca has been processed, move on. RemoveFromAllocasList(AllocaNum); continue; } // If we haven't computed a numbering for the BB's in the function, do so // now. if (BBNumbers.empty()) { unsigned ID = 0; for (auto &BB : F) BBNumbers[&BB] = ID++; } // If we have an AST to keep updated, remember some pointer value that is // stored into the alloca. if (AST) PointerAllocaValues[AllocaNum] = Info.AllocaPointerVal; // Remember the dbg.declare intrinsic describing this alloca, if any. if (Info.DbgDeclare) AllocaDbgDeclares[AllocaNum] = Info.DbgDeclare; // Keep the reverse mapping of the 'Allocas' array for the rename pass. AllocaLookup[Allocas[AllocaNum]] = AllocaNum; // At this point, we're committed to promoting the alloca using IDF's, and // the standard SSA construction algorithm. Determine which blocks need PHI // nodes and see if we can optimize out some work by avoiding insertion of // dead phi nodes. // Unique the set of defining blocks for efficient lookup. SmallPtrSet<BasicBlock *, 32> DefBlocks; DefBlocks.insert(Info.DefiningBlocks.begin(), Info.DefiningBlocks.end()); // Determine which blocks the value is live in. These are blocks which lead // to uses. SmallPtrSet<BasicBlock *, 32> LiveInBlocks; ComputeLiveInBlocks(AI, Info, DefBlocks, LiveInBlocks); // At this point, we're committed to promoting the alloca using IDF's, and // the standard SSA construction algorithm. Determine which blocks need phi // nodes and see if we can optimize out some work by avoiding insertion of // dead phi nodes. IDF.setLiveInBlocks(LiveInBlocks); IDF.setDefiningBlocks(DefBlocks); SmallVector<BasicBlock *, 32> PHIBlocks; IDF.calculate(PHIBlocks); if (PHIBlocks.size() > 1) std::sort(PHIBlocks.begin(), PHIBlocks.end(), [this](BasicBlock *A, BasicBlock *B) { return BBNumbers.lookup(A) < BBNumbers.lookup(B); }); unsigned CurrentVersion = 0; for (unsigned i = 0, e = PHIBlocks.size(); i != e; ++i) QueuePhiNode(PHIBlocks[i], AllocaNum, CurrentVersion); } if (Allocas.empty()) return; // All of the allocas must have been trivial! LBI.clear(); // Set the incoming values for the basic block to be null values for all of // the alloca's. We do this in case there is a load of a value that has not // been stored yet. In this case, it will get this null value. // RenamePassData::ValVector Values(Allocas.size()); for (unsigned i = 0, e = Allocas.size(); i != e; ++i) Values[i] = UndefValue::get(Allocas[i]->getAllocatedType()); // Walks all basic blocks in the function performing the SSA rename algorithm // and inserting the phi nodes we marked as necessary // std::vector<RenamePassData> RenamePassWorkList; RenamePassWorkList.emplace_back(&F.front(), nullptr, std::move(Values)); do { RenamePassData RPD; RPD.swap(RenamePassWorkList.back()); RenamePassWorkList.pop_back(); // RenamePass may add new worklist entries. RenamePass(RPD.BB, RPD.Pred, RPD.Values, RenamePassWorkList); } while (!RenamePassWorkList.empty()); // The renamer uses the Visited set to avoid infinite loops. Clear it now. Visited.clear(); // Remove the allocas themselves from the function. for (unsigned i = 0, e = Allocas.size(); i != e; ++i) { Instruction *A = Allocas[i]; // If there are any uses of the alloca instructions left, they must be in // unreachable basic blocks that were not processed by walking the dominator // tree. Just delete the users now. if (!A->use_empty()) A->replaceAllUsesWith(UndefValue::get(A->getType())); if (AST) AST->deleteValue(A); A->eraseFromParent(); } const DataLayout &DL = F.getParent()->getDataLayout(); // Remove alloca's dbg.declare instrinsics from the function. for (unsigned i = 0, e = AllocaDbgDeclares.size(); i != e; ++i) if (DbgDeclareInst *DDI = AllocaDbgDeclares[i]) DDI->eraseFromParent(); // Loop over all of the PHI nodes and see if there are any that we can get // rid of because they merge all of the same incoming values. This can // happen due to undef values coming into the PHI nodes. This process is // iterative, because eliminating one PHI node can cause others to be removed. bool EliminatedAPHI = true; while (EliminatedAPHI) { EliminatedAPHI = false; // Iterating over NewPhiNodes is deterministic, so it is safe to try to // simplify and RAUW them as we go. If it was not, we could add uses to // the values we replace with in a non-deterministic order, thus creating // non-deterministic def->use chains. for (DenseMap<std::pair<unsigned, unsigned>, PHINode *>::iterator I = NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E;) { PHINode *PN = I->second; // If this PHI node merges one value and/or undefs, get the value. if (Value *V = SimplifyInstruction(PN, DL, nullptr, &DT, AC)) { if (AST && PN->getType()->isPointerTy()) AST->deleteValue(PN); PN->replaceAllUsesWith(V); PN->eraseFromParent(); NewPhiNodes.erase(I++); EliminatedAPHI = true; continue; } ++I; } } // At this point, the renamer has added entries to PHI nodes for all reachable // code. Unfortunately, there may be unreachable blocks which the renamer // hasn't traversed. If this is the case, the PHI nodes may not // have incoming values for all predecessors. Loop over all PHI nodes we have // created, inserting undef values if they are missing any incoming values. // for (DenseMap<std::pair<unsigned, unsigned>, PHINode *>::iterator I = NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E; ++I) { // We want to do this once per basic block. As such, only process a block // when we find the PHI that is the first entry in the block. PHINode *SomePHI = I->second; BasicBlock *BB = SomePHI->getParent(); if (&BB->front() != SomePHI) continue; // Only do work here if there the PHI nodes are missing incoming values. We // know that all PHI nodes that were inserted in a block will have the same // number of incoming values, so we can just check any of them. if (SomePHI->getNumIncomingValues() == getNumPreds(BB)) continue; // Get the preds for BB. SmallVector<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB)); // Ok, now we know that all of the PHI nodes are missing entries for some // basic blocks. Start by sorting the incoming predecessors for efficient // access. std::sort(Preds.begin(), Preds.end()); // Now we loop through all BB's which have entries in SomePHI and remove // them from the Preds list. for (unsigned i = 0, e = SomePHI->getNumIncomingValues(); i != e; ++i) { // Do a log(n) search of the Preds list for the entry we want. SmallVectorImpl<BasicBlock *>::iterator EntIt = std::lower_bound( Preds.begin(), Preds.end(), SomePHI->getIncomingBlock(i)); assert(EntIt != Preds.end() && *EntIt == SomePHI->getIncomingBlock(i) && "PHI node has entry for a block which is not a predecessor!"); // Remove the entry Preds.erase(EntIt); } // At this point, the blocks left in the preds list must have dummy // entries inserted into every PHI nodes for the block. Update all the phi // nodes in this block that we are inserting (there could be phis before // mem2reg runs). unsigned NumBadPreds = SomePHI->getNumIncomingValues(); BasicBlock::iterator BBI = BB->begin(); while ((SomePHI = dyn_cast<PHINode>(BBI++)) && SomePHI->getNumIncomingValues() == NumBadPreds) { Value *UndefVal = UndefValue::get(SomePHI->getType()); for (unsigned pred = 0, e = Preds.size(); pred != e; ++pred) SomePHI->addIncoming(UndefVal, Preds[pred]); } } NewPhiNodes.clear(); }
/// optimizeExtInstr - If instruction is a copy-like instruction, i.e. it reads /// a single register and writes a single register and it does not modify the /// source, and if the source value is preserved as a sub-register of the /// result, then replace all reachable uses of the source with the subreg of the /// result. /// /// Do not generate an EXTRACT that is used only in a debug use, as this changes /// the code. Since this code does not currently share EXTRACTs, just ignore all /// debug uses. bool PeepholeOptimizer:: optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, SmallPtrSet<MachineInstr*, 8> &LocalMIs) { unsigned SrcReg, DstReg, SubIdx; if (!TII->isCoalescableExtInstr(*MI, SrcReg, DstReg, SubIdx)) return false; if (TargetRegisterInfo::isPhysicalRegister(DstReg) || TargetRegisterInfo::isPhysicalRegister(SrcReg)) return false; MachineRegisterInfo::use_nodbg_iterator UI = MRI->use_nodbg_begin(SrcReg); if (++UI == MRI->use_nodbg_end()) // No other uses. return false; // The source has other uses. See if we can replace the other uses with use of // the result of the extension. SmallPtrSet<MachineBasicBlock*, 4> ReachedBBs; UI = MRI->use_nodbg_begin(DstReg); for (MachineRegisterInfo::use_nodbg_iterator UE = MRI->use_nodbg_end(); UI != UE; ++UI) ReachedBBs.insert(UI->getParent()); // Uses that are in the same BB of uses of the result of the instruction. SmallVector<MachineOperand*, 8> Uses; // Uses that the result of the instruction can reach. SmallVector<MachineOperand*, 8> ExtendedUses; bool ExtendLife = true; UI = MRI->use_nodbg_begin(SrcReg); for (MachineRegisterInfo::use_nodbg_iterator UE = MRI->use_nodbg_end(); UI != UE; ++UI) { MachineOperand &UseMO = UI.getOperand(); MachineInstr *UseMI = &*UI; if (UseMI == MI) continue; if (UseMI->isPHI()) { ExtendLife = false; continue; } // It's an error to translate this: // // %reg1025 = <sext> %reg1024 // ... // %reg1026 = SUBREG_TO_REG 0, %reg1024, 4 // // into this: // // %reg1025 = <sext> %reg1024 // ... // %reg1027 = COPY %reg1025:4 // %reg1026 = SUBREG_TO_REG 0, %reg1027, 4 // // The problem here is that SUBREG_TO_REG is there to assert that an // implicit zext occurs. It doesn't insert a zext instruction. If we allow // the COPY here, it will give us the value after the <sext>, not the // original value of %reg1024 before <sext>. if (UseMI->getOpcode() == TargetOpcode::SUBREG_TO_REG) continue; MachineBasicBlock *UseMBB = UseMI->getParent(); if (UseMBB == MBB) { // Local uses that come after the extension. if (!LocalMIs.count(UseMI)) Uses.push_back(&UseMO); } else if (ReachedBBs.count(UseMBB)) { // Non-local uses where the result of the extension is used. Always // replace these unless it's a PHI. Uses.push_back(&UseMO); } else if (Aggressive && DT->dominates(MBB, UseMBB)) { // We may want to extend the live range of the extension result in order // to replace these uses. ExtendedUses.push_back(&UseMO); } else { // Both will be live out of the def MBB anyway. Don't extend live range of // the extension result. ExtendLife = false; break; } } if (ExtendLife && !ExtendedUses.empty()) // Extend the liveness of the extension result. std::copy(ExtendedUses.begin(), ExtendedUses.end(), std::back_inserter(Uses)); // Now replace all uses. bool Changed = false; if (!Uses.empty()) { SmallPtrSet<MachineBasicBlock*, 4> PHIBBs; // Look for PHI uses of the extended result, we don't want to extend the // liveness of a PHI input. It breaks all kinds of assumptions down // stream. A PHI use is expected to be the kill of its source values. UI = MRI->use_nodbg_begin(DstReg); for (MachineRegisterInfo::use_nodbg_iterator UE = MRI->use_nodbg_end(); UI != UE; ++UI) if (UI->isPHI()) PHIBBs.insert(UI->getParent()); const TargetRegisterClass *RC = MRI->getRegClass(SrcReg); for (unsigned i = 0, e = Uses.size(); i != e; ++i) { MachineOperand *UseMO = Uses[i]; MachineInstr *UseMI = UseMO->getParent(); MachineBasicBlock *UseMBB = UseMI->getParent(); if (PHIBBs.count(UseMBB)) continue; // About to add uses of DstReg, clear DstReg's kill flags. if (!Changed) MRI->clearKillFlags(DstReg); unsigned NewVR = MRI->createVirtualRegister(RC); BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(), TII->get(TargetOpcode::COPY), NewVR) .addReg(DstReg, 0, SubIdx); UseMO->setReg(NewVR); ++NumReuse; Changed = true; } } return Changed; }