/// Tests whether a function is "malloc-like". /// /// A function is "malloc-like" if it returns either null or a pointer that /// doesn't alias any other pointer visible to the caller. static bool isFunctionMallocLike(Function *F, const SCCNodeSet &SCCNodes) { SmallSetVector<Value *, 8> FlowsToReturn; for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) if (ReturnInst *Ret = dyn_cast<ReturnInst>(I->getTerminator())) FlowsToReturn.insert(Ret->getReturnValue()); for (unsigned i = 0; i != FlowsToReturn.size(); ++i) { Value *RetVal = FlowsToReturn[i]; if (Constant *C = dyn_cast<Constant>(RetVal)) { if (!C->isNullValue() && !isa<UndefValue>(C)) return false; continue; } if (isa<Argument>(RetVal)) return false; if (Instruction *RVI = dyn_cast<Instruction>(RetVal)) switch (RVI->getOpcode()) { // Extend the analysis by looking upwards. case Instruction::BitCast: case Instruction::GetElementPtr: case Instruction::AddrSpaceCast: FlowsToReturn.insert(RVI->getOperand(0)); continue; case Instruction::Select: { SelectInst *SI = cast<SelectInst>(RVI); FlowsToReturn.insert(SI->getTrueValue()); FlowsToReturn.insert(SI->getFalseValue()); continue; } case Instruction::PHI: { PHINode *PN = cast<PHINode>(RVI); for (Value *IncValue : PN->incoming_values()) FlowsToReturn.insert(IncValue); continue; } // Check whether the pointer came from an allocation. case Instruction::Alloca: break; case Instruction::Call: case Instruction::Invoke: { CallSite CS(RVI); if (CS.paramHasAttr(0, Attribute::NoAlias)) break; if (CS.getCalledFunction() && SCCNodes.count(CS.getCalledFunction())) break; } // fall-through default: return false; // Did not come from an allocation. } if (PointerMayBeCaptured(RetVal, false, /*StoreCaptures=*/false)) return false; } return true; }
// Analyze interleaved accesses and collect them into interleaved load and // store groups. // // When generating code for an interleaved load group, we effectively hoist all // loads in the group to the location of the first load in program order. When // generating code for an interleaved store group, we sink all stores to the // location of the last store. This code motion can change the order of load // and store instructions and may break dependences. // // The code generation strategy mentioned above ensures that we won't violate // any write-after-read (WAR) dependences. // // E.g., for the WAR dependence: a = A[i]; // (1) // A[i] = b; // (2) // // The store group of (2) is always inserted at or below (2), and the load // group of (1) is always inserted at or above (1). Thus, the instructions will // never be reordered. All other dependences are checked to ensure the // correctness of the instruction reordering. // // The algorithm visits all memory accesses in the loop in bottom-up program // order. Program order is established by traversing the blocks in the loop in // reverse postorder when collecting the accesses. // // We visit the memory accesses in bottom-up order because it can simplify the // construction of store groups in the presence of write-after-write (WAW) // dependences. // // E.g., for the WAW dependence: A[i] = a; // (1) // A[i] = b; // (2) // A[i + 1] = c; // (3) // // We will first create a store group with (3) and (2). (1) can't be added to // this group because it and (2) are dependent. However, (1) can be grouped // with other accesses that may precede it in program order. Note that a // bottom-up order does not imply that WAW dependences should not be checked. void InterleavedAccessInfo::analyzeInterleaving( bool EnablePredicatedInterleavedMemAccesses) { LLVM_DEBUG(dbgs() << "LV: Analyzing interleaved accesses...\n"); const ValueToValueMap &Strides = LAI->getSymbolicStrides(); // Holds all accesses with a constant stride. MapVector<Instruction *, StrideDescriptor> AccessStrideInfo; collectConstStrideAccesses(AccessStrideInfo, Strides); if (AccessStrideInfo.empty()) return; // Collect the dependences in the loop. collectDependences(); // Holds all interleaved store groups temporarily. SmallSetVector<InterleaveGroup *, 4> StoreGroups; // Holds all interleaved load groups temporarily. SmallSetVector<InterleaveGroup *, 4> LoadGroups; // Search in bottom-up program order for pairs of accesses (A and B) that can // form interleaved load or store groups. In the algorithm below, access A // precedes access B in program order. We initialize a group for B in the // outer loop of the algorithm, and then in the inner loop, we attempt to // insert each A into B's group if: // // 1. A and B have the same stride, // 2. A and B have the same memory object size, and // 3. A belongs in B's group according to its distance from B. // // Special care is taken to ensure group formation will not break any // dependences. for (auto BI = AccessStrideInfo.rbegin(), E = AccessStrideInfo.rend(); BI != E; ++BI) { Instruction *B = BI->first; StrideDescriptor DesB = BI->second; // Initialize a group for B if it has an allowable stride. Even if we don't // create a group for B, we continue with the bottom-up algorithm to ensure // we don't break any of B's dependences. InterleaveGroup *Group = nullptr; if (isStrided(DesB.Stride) && (!isPredicated(B->getParent()) || EnablePredicatedInterleavedMemAccesses)) { Group = getInterleaveGroup(B); if (!Group) { LLVM_DEBUG(dbgs() << "LV: Creating an interleave group with:" << *B << '\n'); Group = createInterleaveGroup(B, DesB.Stride, DesB.Align); } if (B->mayWriteToMemory()) StoreGroups.insert(Group); else LoadGroups.insert(Group); } for (auto AI = std::next(BI); AI != E; ++AI) { Instruction *A = AI->first; StrideDescriptor DesA = AI->second; // Our code motion strategy implies that we can't have dependences // between accesses in an interleaved group and other accesses located // between the first and last member of the group. Note that this also // means that a group can't have more than one member at a given offset. // The accesses in a group can have dependences with other accesses, but // we must ensure we don't extend the boundaries of the group such that // we encompass those dependent accesses. // // For example, assume we have the sequence of accesses shown below in a // stride-2 loop: // // (1, 2) is a group | A[i] = a; // (1) // | A[i-1] = b; // (2) | // A[i-3] = c; // (3) // A[i] = d; // (4) | (2, 4) is not a group // // Because accesses (2) and (3) are dependent, we can group (2) with (1) // but not with (4). If we did, the dependent access (3) would be within // the boundaries of the (2, 4) group. if (!canReorderMemAccessesForInterleavedGroups(&*AI, &*BI)) { // If a dependence exists and A is already in a group, we know that A // must be a store since A precedes B and WAR dependences are allowed. // Thus, A would be sunk below B. We release A's group to prevent this // illegal code motion. A will then be free to form another group with // instructions that precede it. if (isInterleaved(A)) { InterleaveGroup *StoreGroup = getInterleaveGroup(A); StoreGroups.remove(StoreGroup); releaseGroup(StoreGroup); } // If a dependence exists and A is not already in a group (or it was // and we just released it), B might be hoisted above A (if B is a // load) or another store might be sunk below A (if B is a store). In // either case, we can't add additional instructions to B's group. B // will only form a group with instructions that it precedes. break; } // At this point, we've checked for illegal code motion. If either A or B // isn't strided, there's nothing left to do. if (!isStrided(DesA.Stride) || !isStrided(DesB.Stride)) continue; // Ignore A if it's already in a group or isn't the same kind of memory // operation as B. // Note that mayReadFromMemory() isn't mutually exclusive to // mayWriteToMemory in the case of atomic loads. We shouldn't see those // here, canVectorizeMemory() should have returned false - except for the // case we asked for optimization remarks. if (isInterleaved(A) || (A->mayReadFromMemory() != B->mayReadFromMemory()) || (A->mayWriteToMemory() != B->mayWriteToMemory())) continue; // Check rules 1 and 2. Ignore A if its stride or size is different from // that of B. if (DesA.Stride != DesB.Stride || DesA.Size != DesB.Size) continue; // Ignore A if the memory object of A and B don't belong to the same // address space if (getLoadStoreAddressSpace(A) != getLoadStoreAddressSpace(B)) continue; // Calculate the distance from A to B. const SCEVConstant *DistToB = dyn_cast<SCEVConstant>( PSE.getSE()->getMinusSCEV(DesA.Scev, DesB.Scev)); if (!DistToB) continue; int64_t DistanceToB = DistToB->getAPInt().getSExtValue(); // Check rule 3. Ignore A if its distance to B is not a multiple of the // size. if (DistanceToB % static_cast<int64_t>(DesB.Size)) continue; // All members of a predicated interleave-group must have the same predicate, // and currently must reside in the same BB. BasicBlock *BlockA = A->getParent(); BasicBlock *BlockB = B->getParent(); if ((isPredicated(BlockA) || isPredicated(BlockB)) && (!EnablePredicatedInterleavedMemAccesses || BlockA != BlockB)) continue; // The index of A is the index of B plus A's distance to B in multiples // of the size. int IndexA = Group->getIndex(B) + DistanceToB / static_cast<int64_t>(DesB.Size); // Try to insert A into B's group. if (Group->insertMember(A, IndexA, DesA.Align)) { LLVM_DEBUG(dbgs() << "LV: Inserted:" << *A << '\n' << " into the interleave group with" << *B << '\n'); InterleaveGroupMap[A] = Group; // Set the first load in program order as the insert position. if (A->mayReadFromMemory()) Group->setInsertPos(A); } } // Iteration over A accesses. } // Iteration over B accesses. // Remove interleaved store groups with gaps. for (InterleaveGroup *Group : StoreGroups) if (Group->getNumMembers() != Group->getFactor()) { LLVM_DEBUG( dbgs() << "LV: Invalidate candidate interleaved store group due " "to gaps.\n"); releaseGroup(Group); } // Remove interleaved groups with gaps (currently only loads) whose memory // accesses may wrap around. We have to revisit the getPtrStride analysis, // this time with ShouldCheckWrap=true, since collectConstStrideAccesses does // not check wrapping (see documentation there). // FORNOW we use Assume=false; // TODO: Change to Assume=true but making sure we don't exceed the threshold // of runtime SCEV assumptions checks (thereby potentially failing to // vectorize altogether). // Additional optional optimizations: // TODO: If we are peeling the loop and we know that the first pointer doesn't // wrap then we can deduce that all pointers in the group don't wrap. // This means that we can forcefully peel the loop in order to only have to // check the first pointer for no-wrap. When we'll change to use Assume=true // we'll only need at most one runtime check per interleaved group. for (InterleaveGroup *Group : LoadGroups) { // Case 1: A full group. Can Skip the checks; For full groups, if the wide // load would wrap around the address space we would do a memory access at // nullptr even without the transformation. if (Group->getNumMembers() == Group->getFactor()) continue; // Case 2: If first and last members of the group don't wrap this implies // that all the pointers in the group don't wrap. // So we check only group member 0 (which is always guaranteed to exist), // and group member Factor - 1; If the latter doesn't exist we rely on // peeling (if it is a non-reveresed accsess -- see Case 3). Value *FirstMemberPtr = getLoadStorePointerOperand(Group->getMember(0)); if (!getPtrStride(PSE, FirstMemberPtr, TheLoop, Strides, /*Assume=*/false, /*ShouldCheckWrap=*/true)) { LLVM_DEBUG( dbgs() << "LV: Invalidate candidate interleaved group due to " "first group member potentially pointer-wrapping.\n"); releaseGroup(Group); continue; } Instruction *LastMember = Group->getMember(Group->getFactor() - 1); if (LastMember) { Value *LastMemberPtr = getLoadStorePointerOperand(LastMember); if (!getPtrStride(PSE, LastMemberPtr, TheLoop, Strides, /*Assume=*/false, /*ShouldCheckWrap=*/true)) { LLVM_DEBUG( dbgs() << "LV: Invalidate candidate interleaved group due to " "last group member potentially pointer-wrapping.\n"); releaseGroup(Group); } } else { // Case 3: A non-reversed interleaved load group with gaps: We need // to execute at least one scalar epilogue iteration. This will ensure // we don't speculatively access memory out-of-bounds. We only need // to look for a member at index factor - 1, since every group must have // a member at index zero. if (Group->isReverse()) { LLVM_DEBUG( dbgs() << "LV: Invalidate candidate interleaved group due to " "a reverse access with gaps.\n"); releaseGroup(Group); continue; } LLVM_DEBUG( dbgs() << "LV: Interleaved group requires epilogue iteration.\n"); RequiresScalarEpilogue = true; } } }
/// UpdateSuccessorsPHIs - After FromBB is tail duplicated into its predecessor /// blocks, the successors have gained new predecessors. Update the PHI /// instructions in them accordingly. void TailDuplicatePass::UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead, SmallVector<MachineBasicBlock*, 8> &TDBBs, SmallSetVector<MachineBasicBlock*,8> &Succs) { for (SmallSetVector<MachineBasicBlock*, 8>::iterator SI = Succs.begin(), SE = Succs.end(); SI != SE; ++SI) { MachineBasicBlock *SuccBB = *SI; for (MachineBasicBlock::iterator II = SuccBB->begin(), EE = SuccBB->end(); II != EE; ++II) { if (!II->isPHI()) break; unsigned Idx = 0; for (unsigned i = 1, e = II->getNumOperands(); i != e; i += 2) { MachineOperand &MO = II->getOperand(i+1); if (MO.getMBB() == FromBB) { Idx = i; break; } } assert(Idx != 0); MachineOperand &MO0 = II->getOperand(Idx); unsigned Reg = MO0.getReg(); if (isDead) { // Folded into the previous BB. // There could be duplicate phi source entries. FIXME: Should sdisel // or earlier pass fixed this? for (unsigned i = II->getNumOperands()-2; i != Idx; i -= 2) { MachineOperand &MO = II->getOperand(i+1); if (MO.getMBB() == FromBB) { II->RemoveOperand(i+1); II->RemoveOperand(i); } } } else Idx = 0; // If Idx is set, the operands at Idx and Idx+1 must be removed. // We reuse the location to avoid expensive RemoveOperand calls. DenseMap<unsigned,AvailableValsTy>::iterator LI=SSAUpdateVals.find(Reg); if (LI != SSAUpdateVals.end()) { // This register is defined in the tail block. for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) { MachineBasicBlock *SrcBB = LI->second[j].first; unsigned SrcReg = LI->second[j].second; if (Idx != 0) { II->getOperand(Idx).setReg(SrcReg); II->getOperand(Idx+1).setMBB(SrcBB); Idx = 0; } else { II->addOperand(MachineOperand::CreateReg(SrcReg, false)); II->addOperand(MachineOperand::CreateMBB(SrcBB)); } } } else { // Live in tail block, must also be live in predecessors. for (unsigned j = 0, ee = TDBBs.size(); j != ee; ++j) { MachineBasicBlock *SrcBB = TDBBs[j]; if (Idx != 0) { II->getOperand(Idx).setReg(Reg); II->getOperand(Idx+1).setMBB(SrcBB); Idx = 0; } else { II->addOperand(MachineOperand::CreateReg(Reg, false)); II->addOperand(MachineOperand::CreateMBB(SrcBB)); } } } if (Idx != 0) { II->RemoveOperand(Idx+1); II->RemoveOperand(Idx); } } } }
bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { SmallSetVector<MachineInstr*, 8> MaybeDeadCopies; // Candidates for deletion DenseMap<unsigned, MachineInstr*> AvailCopyMap; // Def -> available copies map DenseMap<unsigned, MachineInstr*> CopyMap; // Def -> copies map SourceMap SrcMap; // Src -> Def map bool Changed = false; for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ) { MachineInstr *MI = &*I; ++I; if (MI->isCopy()) { unsigned Def = MI->getOperand(0).getReg(); unsigned Src = MI->getOperand(1).getReg(); if (TargetRegisterInfo::isVirtualRegister(Def) || TargetRegisterInfo::isVirtualRegister(Src)) report_fatal_error("MachineCopyPropagation should be run after" " register allocation!"); DenseMap<unsigned, MachineInstr*>::iterator CI = AvailCopyMap.find(Src); if (CI != AvailCopyMap.end()) { MachineInstr *CopyMI = CI->second; if (!MRI->isReserved(Def) && (!MRI->isReserved(Src) || NoInterveningSideEffect(CopyMI, MI)) && isNopCopy(CopyMI, Def, Src, TRI)) { // The two copies cancel out and the source of the first copy // hasn't been overridden, eliminate the second one. e.g. // %ECX<def> = COPY %EAX<kill> // ... nothing clobbered EAX. // %EAX<def> = COPY %ECX // => // %ECX<def> = COPY %EAX // // Also avoid eliminating a copy from reserved registers unless the // definition is proven not clobbered. e.g. // %RSP<def> = COPY %RAX // CALL // %RAX<def> = COPY %RSP // Clear any kills of Def between CopyMI and MI. This extends the // live range. for (MachineBasicBlock::iterator I = CopyMI, E = MI; I != E; ++I) I->clearRegisterKills(Def, TRI); removeCopy(MI); Changed = true; ++NumDeletes; continue; } } // If Src is defined by a previous copy, it cannot be eliminated. for (MCRegAliasIterator AI(Src, TRI, true); AI.isValid(); ++AI) { CI = CopyMap.find(*AI); if (CI != CopyMap.end()) MaybeDeadCopies.remove(CI->second); } // Copy is now a candidate for deletion. MaybeDeadCopies.insert(MI); // If 'Src' is previously source of another copy, then this earlier copy's // source is no longer available. e.g. // %xmm9<def> = copy %xmm2 // ... // %xmm2<def> = copy %xmm0 // ... // %xmm2<def> = copy %xmm9 SourceNoLongerAvailable(Def, SrcMap, AvailCopyMap); // Remember Def is defined by the copy. // ... Make sure to clear the def maps of aliases first. for (MCRegAliasIterator AI(Def, TRI, false); AI.isValid(); ++AI) { CopyMap.erase(*AI); AvailCopyMap.erase(*AI); } CopyMap[Def] = MI; AvailCopyMap[Def] = MI; for (MCSubRegIterator SR(Def, TRI); SR.isValid(); ++SR) { CopyMap[*SR] = MI; AvailCopyMap[*SR] = MI; } // Remember source that's copied to Def. Once it's clobbered, then // it's no longer available for copy propagation. if (std::find(SrcMap[Src].begin(), SrcMap[Src].end(), Def) == SrcMap[Src].end()) { SrcMap[Src].push_back(Def); } continue; } // Not a copy. SmallVector<unsigned, 2> Defs; int RegMaskOpNum = -1; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); if (MO.isRegMask()) RegMaskOpNum = i; if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); if (!Reg) continue; if (TargetRegisterInfo::isVirtualRegister(Reg)) report_fatal_error("MachineCopyPropagation should be run after" " register allocation!"); if (MO.isDef()) { Defs.push_back(Reg); continue; } // If 'Reg' is defined by a copy, the copy is no longer a candidate // for elimination. for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { DenseMap<unsigned, MachineInstr*>::iterator CI = CopyMap.find(*AI); if (CI != CopyMap.end()) MaybeDeadCopies.remove(CI->second); } } // The instruction has a register mask operand which means that it clobbers // a large set of registers. It is possible to use the register mask to // prune the available copies, but treat it like a basic block boundary for // now. if (RegMaskOpNum >= 0) { // Erase any MaybeDeadCopies whose destination register is clobbered. const MachineOperand &MaskMO = MI->getOperand(RegMaskOpNum); for (SmallSetVector<MachineInstr*, 8>::iterator DI = MaybeDeadCopies.begin(), DE = MaybeDeadCopies.end(); DI != DE; ++DI) { unsigned Reg = (*DI)->getOperand(0).getReg(); if (MRI->isReserved(Reg) || !MaskMO.clobbersPhysReg(Reg)) continue; removeCopy(*DI); Changed = true; ++NumDeletes; } // Clear all data structures as if we were beginning a new basic block. MaybeDeadCopies.clear(); AvailCopyMap.clear(); CopyMap.clear(); SrcMap.clear(); continue; } for (unsigned i = 0, e = Defs.size(); i != e; ++i) { unsigned Reg = Defs[i]; // No longer defined by a copy. for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { CopyMap.erase(*AI); AvailCopyMap.erase(*AI); } // If 'Reg' is previously source of a copy, it is no longer available for // copy propagation. SourceNoLongerAvailable(Reg, SrcMap, AvailCopyMap); } } // If MBB doesn't have successors, delete the copies whose defs are not used. // If MBB does have successors, then conservative assume the defs are live-out // since we don't want to trust live-in lists. if (MBB.succ_empty()) { for (SmallSetVector<MachineInstr*, 8>::iterator DI = MaybeDeadCopies.begin(), DE = MaybeDeadCopies.end(); DI != DE; ++DI) { if (!MRI->isReserved((*DI)->getOperand(0).getReg())) { removeCopy(*DI); Changed = true; ++NumDeletes; } } } return Changed; }
/// For every instruction from the worklist, check to see if it has any uses /// that are outside the current loop. If so, insert LCSSA PHI nodes and /// rewrite the uses. bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist, DominatorTree &DT, LoopInfo &LI) { SmallVector<Use *, 16> UsesToRewrite; SmallSetVector<PHINode *, 16> PHIsToRemove; PredIteratorCache PredCache; bool Changed = false; // Cache the Loop ExitBlocks across this loop. We expect to get a lot of // instructions within the same loops, computing the exit blocks is // expensive, and we're not mutating the loop structure. SmallDenseMap<Loop*, SmallVector<BasicBlock *,1>> LoopExitBlocks; while (!Worklist.empty()) { UsesToRewrite.clear(); Instruction *I = Worklist.pop_back_val(); BasicBlock *InstBB = I->getParent(); Loop *L = LI.getLoopFor(InstBB); if (!LoopExitBlocks.count(L)) L->getExitBlocks(LoopExitBlocks[L]); assert(LoopExitBlocks.count(L)); const SmallVectorImpl<BasicBlock *> &ExitBlocks = LoopExitBlocks[L]; if (ExitBlocks.empty()) continue; // Tokens cannot be used in PHI nodes, so we skip over them. // We can run into tokens which are live out of a loop with catchswitch // instructions in Windows EH if the catchswitch has one catchpad which // is inside the loop and another which is not. if (I->getType()->isTokenTy()) continue; for (Use &U : I->uses()) { Instruction *User = cast<Instruction>(U.getUser()); BasicBlock *UserBB = User->getParent(); if (PHINode *PN = dyn_cast<PHINode>(User)) UserBB = PN->getIncomingBlock(U); if (InstBB != UserBB && !L->contains(UserBB)) UsesToRewrite.push_back(&U); } // If there are no uses outside the loop, exit with no change. if (UsesToRewrite.empty()) continue; ++NumLCSSA; // We are applying the transformation // Invoke instructions are special in that their result value is not // available along their unwind edge. The code below tests to see whether // DomBB dominates the value, so adjust DomBB to the normal destination // block, which is effectively where the value is first usable. BasicBlock *DomBB = InstBB; if (InvokeInst *Inv = dyn_cast<InvokeInst>(I)) DomBB = Inv->getNormalDest(); DomTreeNode *DomNode = DT.getNode(DomBB); SmallVector<PHINode *, 16> AddedPHIs; SmallVector<PHINode *, 8> PostProcessPHIs; SmallVector<PHINode *, 4> InsertedPHIs; SSAUpdater SSAUpdate(&InsertedPHIs); SSAUpdate.Initialize(I->getType(), I->getName()); // Insert the LCSSA phi's into all of the exit blocks dominated by the // value, and add them to the Phi's map. for (BasicBlock *ExitBB : ExitBlocks) { if (!DT.dominates(DomNode, DT.getNode(ExitBB))) continue; // If we already inserted something for this BB, don't reprocess it. if (SSAUpdate.HasValueForBlock(ExitBB)) continue; PHINode *PN = PHINode::Create(I->getType(), PredCache.size(ExitBB), I->getName() + ".lcssa", &ExitBB->front()); // Add inputs from inside the loop for this PHI. for (BasicBlock *Pred : PredCache.get(ExitBB)) { PN->addIncoming(I, Pred); // If the exit block has a predecessor not within the loop, arrange for // the incoming value use corresponding to that predecessor to be // rewritten in terms of a different LCSSA PHI. if (!L->contains(Pred)) UsesToRewrite.push_back( &PN->getOperandUse(PN->getOperandNumForIncomingValue( PN->getNumIncomingValues() - 1))); } AddedPHIs.push_back(PN); // Remember that this phi makes the value alive in this block. SSAUpdate.AddAvailableValue(ExitBB, PN); // LoopSimplify might fail to simplify some loops (e.g. when indirect // branches are involved). In such situations, it might happen that an // exit for Loop L1 is the header of a disjoint Loop L2. Thus, when we // create PHIs in such an exit block, we are also inserting PHIs into L2's // header. This could break LCSSA form for L2 because these inserted PHIs // can also have uses outside of L2. Remember all PHIs in such situation // as to revisit than later on. FIXME: Remove this if indirectbr support // into LoopSimplify gets improved. if (auto *OtherLoop = LI.getLoopFor(ExitBB)) if (!L->contains(OtherLoop)) PostProcessPHIs.push_back(PN); } // Rewrite all uses outside the loop in terms of the new PHIs we just // inserted. for (Use *UseToRewrite : UsesToRewrite) { // If this use is in an exit block, rewrite to use the newly inserted PHI. // This is required for correctness because SSAUpdate doesn't handle uses // in the same block. It assumes the PHI we inserted is at the end of the // block. Instruction *User = cast<Instruction>(UseToRewrite->getUser()); BasicBlock *UserBB = User->getParent(); if (PHINode *PN = dyn_cast<PHINode>(User)) UserBB = PN->getIncomingBlock(*UseToRewrite); if (isa<PHINode>(UserBB->begin()) && isExitBlock(UserBB, ExitBlocks)) { // Tell the VHs that the uses changed. This updates SCEV's caches. if (UseToRewrite->get()->hasValueHandle()) ValueHandleBase::ValueIsRAUWd(*UseToRewrite, &UserBB->front()); UseToRewrite->set(&UserBB->front()); continue; } // Otherwise, do full PHI insertion. SSAUpdate.RewriteUse(*UseToRewrite); } // SSAUpdater might have inserted phi-nodes inside other loops. We'll need // to post-process them to keep LCSSA form. for (PHINode *InsertedPN : InsertedPHIs) { if (auto *OtherLoop = LI.getLoopFor(InsertedPN->getParent())) if (!L->contains(OtherLoop)) PostProcessPHIs.push_back(InsertedPN); } // Post process PHI instructions that were inserted into another disjoint // loop and update their exits properly. for (auto *PostProcessPN : PostProcessPHIs) { if (PostProcessPN->use_empty()) continue; // Reprocess each PHI instruction. Worklist.push_back(PostProcessPN); } // Keep track of PHI nodes that we want to remove because they did not have // any uses rewritten. for (PHINode *PN : AddedPHIs) if (PN->use_empty()) PHIsToRemove.insert(PN); Changed = true; } // Remove PHI nodes that did not have any uses rewritten. for (PHINode *PN : PHIsToRemove) { assert (PN->use_empty() && "Trying to remove a phi with uses."); PN->eraseFromParent(); } return Changed; }
void AAEvaluator::runInternal(Function &F, AAResults &AA) { const DataLayout &DL = F.getParent()->getDataLayout(); ++FunctionCount; SetVector<Value *> Pointers; SmallSetVector<CallBase *, 16> Calls; SetVector<Value *> Loads; SetVector<Value *> Stores; for (auto &I : F.args()) if (I.getType()->isPointerTy()) // Add all pointer arguments. Pointers.insert(&I); for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) { if (I->getType()->isPointerTy()) // Add all pointer instructions. Pointers.insert(&*I); if (EvalAAMD && isa<LoadInst>(&*I)) Loads.insert(&*I); if (EvalAAMD && isa<StoreInst>(&*I)) Stores.insert(&*I); Instruction &Inst = *I; if (auto *Call = dyn_cast<CallBase>(&Inst)) { Value *Callee = Call->getCalledValue(); // Skip actual functions for direct function calls. if (!isa<Function>(Callee) && isInterestingPointer(Callee)) Pointers.insert(Callee); // Consider formals. for (Use &DataOp : Call->data_ops()) if (isInterestingPointer(DataOp)) Pointers.insert(DataOp); Calls.insert(Call); } else { // Consider all operands. for (Instruction::op_iterator OI = Inst.op_begin(), OE = Inst.op_end(); OI != OE; ++OI) if (isInterestingPointer(*OI)) Pointers.insert(*OI); } } if (PrintAll || PrintNoAlias || PrintMayAlias || PrintPartialAlias || PrintMustAlias || PrintNoModRef || PrintMod || PrintRef || PrintModRef) errs() << "Function: " << F.getName() << ": " << Pointers.size() << " pointers, " << Calls.size() << " call sites\n"; // iterate over the worklist, and run the full (n^2)/2 disambiguations for (SetVector<Value *>::iterator I1 = Pointers.begin(), E = Pointers.end(); I1 != E; ++I1) { auto I1Size = LocationSize::unknown(); Type *I1ElTy = cast<PointerType>((*I1)->getType())->getElementType(); if (I1ElTy->isSized()) I1Size = LocationSize::precise(DL.getTypeStoreSize(I1ElTy)); for (SetVector<Value *>::iterator I2 = Pointers.begin(); I2 != I1; ++I2) { auto I2Size = LocationSize::unknown(); Type *I2ElTy = cast<PointerType>((*I2)->getType())->getElementType(); if (I2ElTy->isSized()) I2Size = LocationSize::precise(DL.getTypeStoreSize(I2ElTy)); AliasResult AR = AA.alias(*I1, I1Size, *I2, I2Size); switch (AR) { case NoAlias: PrintResults(AR, PrintNoAlias, *I1, *I2, F.getParent()); ++NoAliasCount; break; case MayAlias: PrintResults(AR, PrintMayAlias, *I1, *I2, F.getParent()); ++MayAliasCount; break; case PartialAlias: PrintResults(AR, PrintPartialAlias, *I1, *I2, F.getParent()); ++PartialAliasCount; break; case MustAlias: PrintResults(AR, PrintMustAlias, *I1, *I2, F.getParent()); ++MustAliasCount; break; } } } if (EvalAAMD) { // iterate over all pairs of load, store for (Value *Load : Loads) { for (Value *Store : Stores) { AliasResult AR = AA.alias(MemoryLocation::get(cast<LoadInst>(Load)), MemoryLocation::get(cast<StoreInst>(Store))); switch (AR) { case NoAlias: PrintLoadStoreResults(AR, PrintNoAlias, Load, Store, F.getParent()); ++NoAliasCount; break; case MayAlias: PrintLoadStoreResults(AR, PrintMayAlias, Load, Store, F.getParent()); ++MayAliasCount; break; case PartialAlias: PrintLoadStoreResults(AR, PrintPartialAlias, Load, Store, F.getParent()); ++PartialAliasCount; break; case MustAlias: PrintLoadStoreResults(AR, PrintMustAlias, Load, Store, F.getParent()); ++MustAliasCount; break; } } } // iterate over all pairs of store, store for (SetVector<Value *>::iterator I1 = Stores.begin(), E = Stores.end(); I1 != E; ++I1) { for (SetVector<Value *>::iterator I2 = Stores.begin(); I2 != I1; ++I2) { AliasResult AR = AA.alias(MemoryLocation::get(cast<StoreInst>(*I1)), MemoryLocation::get(cast<StoreInst>(*I2))); switch (AR) { case NoAlias: PrintLoadStoreResults(AR, PrintNoAlias, *I1, *I2, F.getParent()); ++NoAliasCount; break; case MayAlias: PrintLoadStoreResults(AR, PrintMayAlias, *I1, *I2, F.getParent()); ++MayAliasCount; break; case PartialAlias: PrintLoadStoreResults(AR, PrintPartialAlias, *I1, *I2, F.getParent()); ++PartialAliasCount; break; case MustAlias: PrintLoadStoreResults(AR, PrintMustAlias, *I1, *I2, F.getParent()); ++MustAliasCount; break; } } } } // Mod/ref alias analysis: compare all pairs of calls and values for (CallBase *Call : Calls) { for (auto Pointer : Pointers) { auto Size = LocationSize::unknown(); Type *ElTy = cast<PointerType>(Pointer->getType())->getElementType(); if (ElTy->isSized()) Size = LocationSize::precise(DL.getTypeStoreSize(ElTy)); switch (AA.getModRefInfo(Call, Pointer, Size)) { case ModRefInfo::NoModRef: PrintModRefResults("NoModRef", PrintNoModRef, Call, Pointer, F.getParent()); ++NoModRefCount; break; case ModRefInfo::Mod: PrintModRefResults("Just Mod", PrintMod, Call, Pointer, F.getParent()); ++ModCount; break; case ModRefInfo::Ref: PrintModRefResults("Just Ref", PrintRef, Call, Pointer, F.getParent()); ++RefCount; break; case ModRefInfo::ModRef: PrintModRefResults("Both ModRef", PrintModRef, Call, Pointer, F.getParent()); ++ModRefCount; break; case ModRefInfo::Must: PrintModRefResults("Must", PrintMust, Call, Pointer, F.getParent()); ++MustCount; break; case ModRefInfo::MustMod: PrintModRefResults("Just Mod (MustAlias)", PrintMustMod, Call, Pointer, F.getParent()); ++MustModCount; break; case ModRefInfo::MustRef: PrintModRefResults("Just Ref (MustAlias)", PrintMustRef, Call, Pointer, F.getParent()); ++MustRefCount; break; case ModRefInfo::MustModRef: PrintModRefResults("Both ModRef (MustAlias)", PrintMustModRef, Call, Pointer, F.getParent()); ++MustModRefCount; break; } } } // Mod/ref alias analysis: compare all pairs of calls for (CallBase *CallA : Calls) { for (CallBase *CallB : Calls) { if (CallA == CallB) continue; switch (AA.getModRefInfo(CallA, CallB)) { case ModRefInfo::NoModRef: PrintModRefResults("NoModRef", PrintNoModRef, CallA, CallB, F.getParent()); ++NoModRefCount; break; case ModRefInfo::Mod: PrintModRefResults("Just Mod", PrintMod, CallA, CallB, F.getParent()); ++ModCount; break; case ModRefInfo::Ref: PrintModRefResults("Just Ref", PrintRef, CallA, CallB, F.getParent()); ++RefCount; break; case ModRefInfo::ModRef: PrintModRefResults("Both ModRef", PrintModRef, CallA, CallB, F.getParent()); ++ModRefCount; break; case ModRefInfo::Must: PrintModRefResults("Must", PrintMust, CallA, CallB, F.getParent()); ++MustCount; break; case ModRefInfo::MustMod: PrintModRefResults("Just Mod (MustAlias)", PrintMustMod, CallA, CallB, F.getParent()); ++MustModCount; break; case ModRefInfo::MustRef: PrintModRefResults("Just Ref (MustAlias)", PrintMustRef, CallA, CallB, F.getParent()); ++MustRefCount; break; case ModRefInfo::MustModRef: PrintModRefResults("Both ModRef (MustAlias)", PrintMustModRef, CallA, CallB, F.getParent()); ++MustModRefCount; break; } } } }
LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForFunctionPass( LazyCallGraph &G, LazyCallGraph::SCC &InitialC, LazyCallGraph::Node &N, CGSCCAnalysisManager &AM, CGSCCUpdateResult &UR) { using Node = LazyCallGraph::Node; using Edge = LazyCallGraph::Edge; using SCC = LazyCallGraph::SCC; using RefSCC = LazyCallGraph::RefSCC; RefSCC &InitialRC = InitialC.getOuterRefSCC(); SCC *C = &InitialC; RefSCC *RC = &InitialRC; Function &F = N.getFunction(); // Walk the function body and build up the set of retained, promoted, and // demoted edges. SmallVector<Constant *, 16> Worklist; SmallPtrSet<Constant *, 16> Visited; SmallPtrSet<Node *, 16> RetainedEdges; SmallSetVector<Node *, 4> PromotedRefTargets; SmallSetVector<Node *, 4> DemotedCallTargets; // First walk the function and handle all called functions. We do this first // because if there is a single call edge, whether there are ref edges is // irrelevant. for (Instruction &I : instructions(F)) if (auto CS = CallSite(&I)) if (Function *Callee = CS.getCalledFunction()) if (Visited.insert(Callee).second && !Callee->isDeclaration()) { Node &CalleeN = *G.lookup(*Callee); Edge *E = N->lookup(CalleeN); // FIXME: We should really handle adding new calls. While it will // make downstream usage more complex, there is no fundamental // limitation and it will allow passes within the CGSCC to be a bit // more flexible in what transforms they can do. Until then, we // verify that new calls haven't been introduced. assert(E && "No function transformations should introduce *new* " "call edges! Any new calls should be modeled as " "promoted existing ref edges!"); bool Inserted = RetainedEdges.insert(&CalleeN).second; (void)Inserted; assert(Inserted && "We should never visit a function twice."); if (!E->isCall()) PromotedRefTargets.insert(&CalleeN); } // Now walk all references. for (Instruction &I : instructions(F)) for (Value *Op : I.operand_values()) if (auto *C = dyn_cast<Constant>(Op)) if (Visited.insert(C).second) Worklist.push_back(C); auto VisitRef = [&](Function &Referee) { Node &RefereeN = *G.lookup(Referee); Edge *E = N->lookup(RefereeN); // FIXME: Similarly to new calls, we also currently preclude // introducing new references. See above for details. assert(E && "No function transformations should introduce *new* ref " "edges! Any new ref edges would require IPO which " "function passes aren't allowed to do!"); bool Inserted = RetainedEdges.insert(&RefereeN).second; (void)Inserted; assert(Inserted && "We should never visit a function twice."); if (E->isCall()) DemotedCallTargets.insert(&RefereeN); }; LazyCallGraph::visitReferences(Worklist, Visited, VisitRef); // Include synthetic reference edges to known, defined lib functions. for (auto *F : G.getLibFunctions()) // While the list of lib functions doesn't have repeats, don't re-visit // anything handled above. if (!Visited.count(F)) VisitRef(*F); // First remove all of the edges that are no longer present in this function. // The first step makes these edges uniformly ref edges and accumulates them // into a separate data structure so removal doesn't invalidate anything. SmallVector<Node *, 4> DeadTargets; for (Edge &E : *N) { if (RetainedEdges.count(&E.getNode())) continue; SCC &TargetC = *G.lookupSCC(E.getNode()); RefSCC &TargetRC = TargetC.getOuterRefSCC(); if (&TargetRC == RC && E.isCall()) { if (C != &TargetC) { // For separate SCCs this is trivial. RC->switchTrivialInternalEdgeToRef(N, E.getNode()); } else { // Now update the call graph. C = incorporateNewSCCRange(RC->switchInternalEdgeToRef(N, E.getNode()), G, N, C, AM, UR); } } // Now that this is ready for actual removal, put it into our list. DeadTargets.push_back(&E.getNode()); } // Remove the easy cases quickly and actually pull them out of our list. DeadTargets.erase( llvm::remove_if(DeadTargets, [&](Node *TargetN) { SCC &TargetC = *G.lookupSCC(*TargetN); RefSCC &TargetRC = TargetC.getOuterRefSCC(); // We can't trivially remove internal targets, so skip // those. if (&TargetRC == RC) return false; RC->removeOutgoingEdge(N, *TargetN); LLVM_DEBUG(dbgs() << "Deleting outgoing edge from '" << N << "' to '" << TargetN << "'\n"); return true; }), DeadTargets.end()); // Now do a batch removal of the internal ref edges left. auto NewRefSCCs = RC->removeInternalRefEdge(N, DeadTargets); if (!NewRefSCCs.empty()) { // The old RefSCC is dead, mark it as such. UR.InvalidatedRefSCCs.insert(RC); // Note that we don't bother to invalidate analyses as ref-edge // connectivity is not really observable in any way and is intended // exclusively to be used for ordering of transforms rather than for // analysis conclusions. // Update RC to the "bottom". assert(G.lookupSCC(N) == C && "Changed the SCC when splitting RefSCCs!"); RC = &C->getOuterRefSCC(); assert(G.lookupRefSCC(N) == RC && "Failed to update current RefSCC!"); // The RC worklist is in reverse postorder, so we enqueue the new ones in // RPO except for the one which contains the source node as that is the // "bottom" we will continue processing in the bottom-up walk. assert(NewRefSCCs.front() == RC && "New current RefSCC not first in the returned list!"); for (RefSCC *NewRC : llvm::reverse(make_range(std::next(NewRefSCCs.begin()), NewRefSCCs.end()))) { assert(NewRC != RC && "Should not encounter the current RefSCC further " "in the postorder list of new RefSCCs."); UR.RCWorklist.insert(NewRC); LLVM_DEBUG(dbgs() << "Enqueuing a new RefSCC in the update worklist: " << *NewRC << "\n"); } } // Next demote all the call edges that are now ref edges. This helps make // the SCCs small which should minimize the work below as we don't want to // form cycles that this would break. for (Node *RefTarget : DemotedCallTargets) { SCC &TargetC = *G.lookupSCC(*RefTarget); RefSCC &TargetRC = TargetC.getOuterRefSCC(); // The easy case is when the target RefSCC is not this RefSCC. This is // only supported when the target RefSCC is a child of this RefSCC. if (&TargetRC != RC) { assert(RC->isAncestorOf(TargetRC) && "Cannot potentially form RefSCC cycles here!"); RC->switchOutgoingEdgeToRef(N, *RefTarget); LLVM_DEBUG(dbgs() << "Switch outgoing call edge to a ref edge from '" << N << "' to '" << *RefTarget << "'\n"); continue; } // We are switching an internal call edge to a ref edge. This may split up // some SCCs. if (C != &TargetC) { // For separate SCCs this is trivial. RC->switchTrivialInternalEdgeToRef(N, *RefTarget); continue; } // Now update the call graph. C = incorporateNewSCCRange(RC->switchInternalEdgeToRef(N, *RefTarget), G, N, C, AM, UR); } // Now promote ref edges into call edges. for (Node *CallTarget : PromotedRefTargets) { SCC &TargetC = *G.lookupSCC(*CallTarget); RefSCC &TargetRC = TargetC.getOuterRefSCC(); // The easy case is when the target RefSCC is not this RefSCC. This is // only supported when the target RefSCC is a child of this RefSCC. if (&TargetRC != RC) { assert(RC->isAncestorOf(TargetRC) && "Cannot potentially form RefSCC cycles here!"); RC->switchOutgoingEdgeToCall(N, *CallTarget); LLVM_DEBUG(dbgs() << "Switch outgoing ref edge to a call edge from '" << N << "' to '" << *CallTarget << "'\n"); continue; } LLVM_DEBUG(dbgs() << "Switch an internal ref edge to a call edge from '" << N << "' to '" << *CallTarget << "'\n"); // Otherwise we are switching an internal ref edge to a call edge. This // may merge away some SCCs, and we add those to the UpdateResult. We also // need to make sure to update the worklist in the event SCCs have moved // before the current one in the post-order sequence bool HasFunctionAnalysisProxy = false; auto InitialSCCIndex = RC->find(*C) - RC->begin(); bool FormedCycle = RC->switchInternalEdgeToCall( N, *CallTarget, [&](ArrayRef<SCC *> MergedSCCs) { for (SCC *MergedC : MergedSCCs) { assert(MergedC != &TargetC && "Cannot merge away the target SCC!"); HasFunctionAnalysisProxy |= AM.getCachedResult<FunctionAnalysisManagerCGSCCProxy>( *MergedC) != nullptr; // Mark that this SCC will no longer be valid. UR.InvalidatedSCCs.insert(MergedC); // FIXME: We should really do a 'clear' here to forcibly release // memory, but we don't have a good way of doing that and // preserving the function analyses. auto PA = PreservedAnalyses::allInSet<AllAnalysesOn<Function>>(); PA.preserve<FunctionAnalysisManagerCGSCCProxy>(); AM.invalidate(*MergedC, PA); } }); // If we formed a cycle by creating this call, we need to update more data // structures. if (FormedCycle) { C = &TargetC; assert(G.lookupSCC(N) == C && "Failed to update current SCC!"); // If one of the invalidated SCCs had a cached proxy to a function // analysis manager, we need to create a proxy in the new current SCC as // the invalidated SCCs had their functions moved. if (HasFunctionAnalysisProxy) AM.getResult<FunctionAnalysisManagerCGSCCProxy>(*C, G); // Any analyses cached for this SCC are no longer precise as the shape // has changed by introducing this cycle. However, we have taken care to // update the proxies so it remains valide. auto PA = PreservedAnalyses::allInSet<AllAnalysesOn<Function>>(); PA.preserve<FunctionAnalysisManagerCGSCCProxy>(); AM.invalidate(*C, PA); } auto NewSCCIndex = RC->find(*C) - RC->begin(); // If we have actually moved an SCC to be topologically "below" the current // one due to merging, we will need to revisit the current SCC after // visiting those moved SCCs. // // It is critical that we *do not* revisit the current SCC unless we // actually move SCCs in the process of merging because otherwise we may // form a cycle where an SCC is split apart, merged, split, merged and so // on infinitely. if (InitialSCCIndex < NewSCCIndex) { // Put our current SCC back onto the worklist as we'll visit other SCCs // that are now definitively ordered prior to the current one in the // post-order sequence, and may end up observing more precise context to // optimize the current SCC. UR.CWorklist.insert(C); LLVM_DEBUG(dbgs() << "Enqueuing the existing SCC in the worklist: " << *C << "\n"); // Enqueue in reverse order as we pop off the back of the worklist. for (SCC &MovedC : llvm::reverse(make_range(RC->begin() + InitialSCCIndex, RC->begin() + NewSCCIndex))) { UR.CWorklist.insert(&MovedC); LLVM_DEBUG(dbgs() << "Enqueuing a newly earlier in post-order SCC: " << MovedC << "\n"); } } } assert(!UR.InvalidatedSCCs.count(C) && "Invalidated the current SCC!"); assert(!UR.InvalidatedRefSCCs.count(RC) && "Invalidated the current RefSCC!"); assert(&C->getOuterRefSCC() == RC && "Current SCC not in current RefSCC!"); // Record the current RefSCC and SCC for higher layers of the CGSCC pass // manager now that all the updates have been applied. if (RC != &InitialRC) UR.UpdatedRC = RC; if (C != &InitialC) UR.UpdatedC = C; return *C; }
bool AArch64RedundantCopyElimination::optimizeCopy(MachineBasicBlock *MBB) { // Check if the current basic block has a single predecessor. if (MBB->pred_size() != 1) return false; MachineBasicBlock *PredMBB = *MBB->pred_begin(); MachineBasicBlock::iterator CompBr = PredMBB->getLastNonDebugInstr(); if (CompBr == PredMBB->end() || PredMBB->succ_size() != 2) return false; ++CompBr; do { --CompBr; if (guaranteesZeroRegInBlock(*CompBr, MBB)) break; } while (CompBr != PredMBB->begin() && CompBr->isTerminator()); // We've not found a CBZ/CBNZ, time to bail out. if (!guaranteesZeroRegInBlock(*CompBr, MBB)) return false; unsigned TargetReg = CompBr->getOperand(0).getReg(); if (!TargetReg) return false; assert(TargetRegisterInfo::isPhysicalRegister(TargetReg) && "Expect physical register"); // Remember all registers aliasing with TargetReg. SmallSetVector<unsigned, 8> TargetRegs; for (MCRegAliasIterator AI(TargetReg, TRI, true); AI.isValid(); ++AI) TargetRegs.insert(*AI); bool Changed = false; MachineBasicBlock::iterator LastChange = MBB->begin(); unsigned SmallestDef = TargetReg; // Remove redundant Copy instructions unless TargetReg is modified. for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;) { MachineInstr *MI = &*I; ++I; if (MI->isCopy() && MI->getOperand(0).isReg() && MI->getOperand(1).isReg()) { unsigned DefReg = MI->getOperand(0).getReg(); unsigned SrcReg = MI->getOperand(1).getReg(); if ((SrcReg == AArch64::XZR || SrcReg == AArch64::WZR) && !MRI->isReserved(DefReg) && (TargetReg == DefReg || TRI->isSuperRegister(DefReg, TargetReg))) { DEBUG(dbgs() << "Remove redundant Copy : "); DEBUG((MI)->print(dbgs())); MI->eraseFromParent(); Changed = true; LastChange = I; NumCopiesRemoved++; SmallestDef = TRI->isSubRegister(SmallestDef, DefReg) ? DefReg : SmallestDef; continue; } } if (MI->modifiesRegister(TargetReg, TRI)) break; } if (!Changed) return false; // Otherwise, we have to fixup the use-def chain, starting with the // CBZ/CBNZ. Conservatively mark as much as we can live. CompBr->clearRegisterKills(SmallestDef, TRI); if (std::none_of(TargetRegs.begin(), TargetRegs.end(), [&](unsigned Reg) { return MBB->isLiveIn(Reg); })) MBB->addLiveIn(TargetReg); // Clear any kills of TargetReg between CompBr and the last removed COPY. for (MachineInstr &MMI : make_range(MBB->begin()->getIterator(), LastChange->getIterator())) MMI.clearRegisterKills(SmallestDef, TRI); return true; }
/// After FromBB is tail duplicated into its predecessor blocks, the successors /// have gained new predecessors. Update the PHI instructions in them /// accordingly. void TailDuplicatePass::UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead, SmallVectorImpl<MachineBasicBlock *> &TDBBs, SmallSetVector<MachineBasicBlock*,8> &Succs) { for (SmallSetVector<MachineBasicBlock*, 8>::iterator SI = Succs.begin(), SE = Succs.end(); SI != SE; ++SI) { MachineBasicBlock *SuccBB = *SI; for (MachineBasicBlock::iterator II = SuccBB->begin(), EE = SuccBB->end(); II != EE; ++II) { if (!II->isPHI()) break; MachineInstrBuilder MIB(*FromBB->getParent(), II); unsigned Idx = 0; for (unsigned i = 1, e = II->getNumOperands(); i != e; i += 2) { MachineOperand &MO = II->getOperand(i+1); if (MO.getMBB() == FromBB) { Idx = i; break; } } assert(Idx != 0); MachineOperand &MO0 = II->getOperand(Idx); unsigned Reg = MO0.getReg(); if (isDead) { // Folded into the previous BB. // There could be duplicate phi source entries. FIXME: Should sdisel // or earlier pass fixed this? for (unsigned i = II->getNumOperands()-2; i != Idx; i -= 2) { MachineOperand &MO = II->getOperand(i+1); if (MO.getMBB() == FromBB) { II->RemoveOperand(i+1); II->RemoveOperand(i); } } } else Idx = 0; // If Idx is set, the operands at Idx and Idx+1 must be removed. // We reuse the location to avoid expensive RemoveOperand calls. DenseMap<unsigned,AvailableValsTy>::iterator LI=SSAUpdateVals.find(Reg); if (LI != SSAUpdateVals.end()) { // This register is defined in the tail block. for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) { MachineBasicBlock *SrcBB = LI->second[j].first; // If we didn't duplicate a bb into a particular predecessor, we // might still have added an entry to SSAUpdateVals to correcly // recompute SSA. If that case, avoid adding a dummy extra argument // this PHI. if (!SrcBB->isSuccessor(SuccBB)) continue; unsigned SrcReg = LI->second[j].second; if (Idx != 0) { II->getOperand(Idx).setReg(SrcReg); II->getOperand(Idx+1).setMBB(SrcBB); Idx = 0; } else { MIB.addReg(SrcReg).addMBB(SrcBB); } } } else { // Live in tail block, must also be live in predecessors. for (unsigned j = 0, ee = TDBBs.size(); j != ee; ++j) { MachineBasicBlock *SrcBB = TDBBs[j]; if (Idx != 0) { II->getOperand(Idx).setReg(Reg); II->getOperand(Idx+1).setMBB(SrcBB); Idx = 0; } else { MIB.addReg(Reg).addMBB(SrcBB); } } } if (Idx != 0) { II->RemoveOperand(Idx+1); II->RemoveOperand(Idx); } } } }
/// linkModuleFlagsMetadata - Merge the linker flags in Src into the Dest /// module. bool ModuleLinker::linkModuleFlagsMetadata() { const NamedMDNode *SrcModFlags = SrcM->getModuleFlagsMetadata(); if (!SrcModFlags) return false; NamedMDNode *DstModFlags = DstM->getOrInsertModuleFlagsMetadata(); // If the destination module doesn't have module flags yet, then just copy // over the source module's flags. if (DstModFlags->getNumOperands() == 0) { for (unsigned I = 0, E = SrcModFlags->getNumOperands(); I != E; ++I) DstModFlags->addOperand(SrcModFlags->getOperand(I)); return false; } bool HasErr = false; // Otherwise, we have to merge them based on their behaviors. First, // categorize all of the nodes in the modules' module flags. If an error or // warning occurs, then emit the appropriate message(s). DenseMap<MDString*, MDNode*> ErrorNode; DenseMap<MDString*, MDNode*> WarningNode; DenseMap<MDString*, MDNode*> OverrideNode; DenseMap<MDString*, SmallSetVector<MDNode*, 8> > RequireNodes; SmallSetVector<MDString*, 16> SeenIDs; HasErr |= categorizeModuleFlagNodes(SrcModFlags, ErrorNode, WarningNode, OverrideNode, RequireNodes, SeenIDs); HasErr |= categorizeModuleFlagNodes(DstModFlags, ErrorNode, WarningNode, OverrideNode, RequireNodes, SeenIDs); // Check that there isn't both an error and warning node for a flag. for (SmallSetVector<MDString*, 16>::iterator I = SeenIDs.begin(), E = SeenIDs.end(); I != E; ++I) { MDString *ID = *I; if (ErrorNode[ID] && WarningNode[ID]) HasErr = emitError("linking module flags '" + ID->getString() + "': IDs have conflicting behaviors"); } // Early exit if we had an error. if (HasErr) return true; // Get the destination's module flags ready for new operands. DstModFlags->dropAllReferences(); // Add all of the module flags to the destination module. DenseMap<MDString*, SmallVector<MDNode*, 4> > AddedNodes; for (SmallSetVector<MDString*, 16>::iterator I = SeenIDs.begin(), E = SeenIDs.end(); I != E; ++I) { MDString *ID = *I; if (OverrideNode[ID]) { DstModFlags->addOperand(OverrideNode[ID]); AddedNodes[ID].push_back(OverrideNode[ID]); } else if (ErrorNode[ID]) { DstModFlags->addOperand(ErrorNode[ID]); AddedNodes[ID].push_back(ErrorNode[ID]); } else if (WarningNode[ID]) { DstModFlags->addOperand(WarningNode[ID]); AddedNodes[ID].push_back(WarningNode[ID]); } for (SmallSetVector<MDNode*, 8>::iterator II = RequireNodes[ID].begin(), IE = RequireNodes[ID].end(); II != IE; ++II) DstModFlags->addOperand(*II); } // Now check that all of the requirements have been satisfied. for (SmallSetVector<MDString*, 16>::iterator I = SeenIDs.begin(), E = SeenIDs.end(); I != E; ++I) { MDString *ID = *I; SmallSetVector<MDNode*, 8> &Set = RequireNodes[ID]; for (SmallSetVector<MDNode*, 8>::iterator II = Set.begin(), IE = Set.end(); II != IE; ++II) { MDNode *Node = *II; assert(isa<MDNode>(Node->getOperand(2)) && "Module flag's third operand must be an MDNode!"); MDNode *Val = cast<MDNode>(Node->getOperand(2)); MDString *ReqID = cast<MDString>(Val->getOperand(0)); Value *ReqVal = Val->getOperand(1); bool HasValue = false; for (SmallVectorImpl<MDNode*>::iterator RI = AddedNodes[ReqID].begin(), RE = AddedNodes[ReqID].end(); RI != RE; ++RI) { MDNode *ReqNode = *RI; if (ReqNode->getOperand(2) == ReqVal) { HasValue = true; break; } } if (!HasValue) HasErr = emitError("linking module flags '" + ReqID->getString() + "': does not have the required value"); } } return HasErr; }
/// setupEntryBlockAndCallSites - Setup the entry block by creating and filling /// the function context and marking the call sites with the appropriate /// values. These values are used by the DWARF EH emitter. bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) { SmallVector<ReturnInst *, 16> Returns; SmallVector<InvokeInst *, 16> Invokes; SmallSetVector<LandingPadInst *, 16> LPads; // Look through the terminators of the basic blocks to find invokes. for (BasicBlock &BB : F) if (auto *II = dyn_cast<InvokeInst>(BB.getTerminator())) { if (Function *Callee = II->getCalledFunction()) if (Callee->getIntrinsicID() == Intrinsic::donothing) { // Remove the NOP invoke. BranchInst::Create(II->getNormalDest(), II); II->eraseFromParent(); continue; } Invokes.push_back(II); LPads.insert(II->getUnwindDest()->getLandingPadInst()); } else if (auto *RI = dyn_cast<ReturnInst>(BB.getTerminator())) { Returns.push_back(RI); } if (Invokes.empty()) return false; NumInvokes += Invokes.size(); lowerIncomingArguments(F); lowerAcrossUnwindEdges(F, Invokes); Value *FuncCtx = setupFunctionContext(F, makeArrayRef(LPads.begin(), LPads.end())); BasicBlock *EntryBB = &F.front(); IRBuilder<> Builder(EntryBB->getTerminator()); // Get a reference to the jump buffer. Value *JBufPtr = Builder.CreateConstGEP2_32(FunctionContextTy, FuncCtx, 0, 5, "jbuf_gep"); // Save the frame pointer. Value *FramePtr = Builder.CreateConstGEP2_32(doubleUnderJBufTy, JBufPtr, 0, 0, "jbuf_fp_gep"); Value *Val = Builder.CreateCall(FrameAddrFn, Builder.getInt32(0), "fp"); Builder.CreateStore(Val, FramePtr, /*isVolatile=*/true); // Save the stack pointer. Value *StackPtr = Builder.CreateConstGEP2_32(doubleUnderJBufTy, JBufPtr, 0, 2, "jbuf_sp_gep"); Val = Builder.CreateCall(StackAddrFn, {}, "sp"); Builder.CreateStore(Val, StackPtr, /*isVolatile=*/true); // Call the setup_dispatch instrinsic. It fills in the rest of the jmpbuf. Builder.CreateCall(BuiltinSetupDispatchFn, {}); // Store a pointer to the function context so that the back-end will know // where to look for it. Value *FuncCtxArg = Builder.CreateBitCast(FuncCtx, Builder.getInt8PtrTy()); Builder.CreateCall(FuncCtxFn, FuncCtxArg); // At this point, we are all set up, update the invoke instructions to mark // their call_site values. for (unsigned I = 0, E = Invokes.size(); I != E; ++I) { insertCallSiteStore(Invokes[I], I + 1); ConstantInt *CallSiteNum = ConstantInt::get(Type::getInt32Ty(F.getContext()), I + 1); // Record the call site value for the back end so it stays associated with // the invoke. CallInst::Create(CallSiteFn, CallSiteNum, "", Invokes[I]); } // Mark call instructions that aren't nounwind as no-action (call_site == // -1). Skip the entry block, as prior to then, no function context has been // created for this function and any unexpected exceptions thrown will go // directly to the caller's context, which is what we want anyway, so no need // to do anything here. for (BasicBlock &BB : F) { if (&BB == &F.front()) continue; for (Instruction &I : BB) if (I.mayThrow()) insertCallSiteStore(&I, -1); } // Register the function context and make sure it's known to not throw CallInst *Register = CallInst::Create(RegisterFn, FuncCtx, "", EntryBB->getTerminator()); Register->setDoesNotThrow(); // Following any allocas not in the entry block, update the saved SP in the // jmpbuf to the new value. for (BasicBlock &BB : F) { if (&BB == &F.front()) continue; for (Instruction &I : BB) { if (auto *CI = dyn_cast<CallInst>(&I)) { if (CI->getCalledFunction() != StackRestoreFn) continue; } else if (!isa<AllocaInst>(&I)) { continue; } Instruction *StackAddr = CallInst::Create(StackAddrFn, "sp"); StackAddr->insertAfter(&I); Instruction *StoreStackAddr = new StoreInst(StackAddr, StackPtr, true); StoreStackAddr->insertAfter(StackAddr); } } // Finally, for any returns from this function, if this function contains an // invoke, add a call to unregister the function context. for (ReturnInst *Return : Returns) CallInst::Create(UnregisterFn, FuncCtx, "", Return); return true; }
bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { SmallSetVector<MachineInstr*, 8> MaybeDeadCopies; // Candidates for deletion DenseMap<unsigned, MachineInstr*> AvailCopyMap; // Def -> available copies map DenseMap<unsigned, MachineInstr*> CopyMap; // Def -> copies map DenseMap<unsigned, unsigned> SrcMap; // Src -> Def map bool Changed = false; for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ) { MachineInstr *MI = &*I; ++I; if (MI->isCopy()) { unsigned Def = MI->getOperand(0).getReg(); unsigned Src = MI->getOperand(1).getReg(); if (TargetRegisterInfo::isVirtualRegister(Def) || TargetRegisterInfo::isVirtualRegister(Src)) report_fatal_error("MachineCopyPropagation should be run after" " register allocation!"); DenseMap<unsigned, MachineInstr*>::iterator CI = AvailCopyMap.find(Src); if (CI != AvailCopyMap.end()) { MachineInstr *CopyMI = CI->second; unsigned SrcSrc = CopyMI->getOperand(1).getReg(); if (!ReservedRegs.test(Def) && (!ReservedRegs.test(Src) || NoInterveningSideEffect(CopyMI, MI)) && (SrcSrc == Def || TRI->isSubRegister(SrcSrc, Def))) { // The two copies cancel out and the source of the first copy // hasn't been overridden, eliminate the second one. e.g. // %ECX<def> = COPY %EAX<kill> // ... nothing clobbered EAX. // %EAX<def> = COPY %ECX // => // %ECX<def> = COPY %EAX // // Also avoid eliminating a copy from reserved registers unless the // definition is proven not clobbered. e.g. // %RSP<def> = COPY %RAX // CALL // %RAX<def> = COPY %RSP CopyMI->getOperand(1).setIsKill(false); MI->eraseFromParent(); Changed = true; ++NumDeletes; continue; } } // If Src is defined by a previous copy, it cannot be eliminated. CI = CopyMap.find(Src); if (CI != CopyMap.end()) MaybeDeadCopies.remove(CI->second); for (const unsigned *AS = TRI->getAliasSet(Src); *AS; ++AS) { CI = CopyMap.find(*AS); if (CI != CopyMap.end()) MaybeDeadCopies.remove(CI->second); } // Copy is now a candidate for deletion. MaybeDeadCopies.insert(MI); // If 'Src' is previously source of another copy, then this earlier copy's // source is no longer available. e.g. // %xmm9<def> = copy %xmm2 // ... // %xmm2<def> = copy %xmm0 // ... // %xmm2<def> = copy %xmm9 SourceNoLongerAvailable(Def, SrcMap, AvailCopyMap); // Remember Def is defined by the copy. CopyMap[Def] = MI; AvailCopyMap[Def] = MI; for (const unsigned *SR = TRI->getSubRegisters(Def); *SR; ++SR) { CopyMap[*SR] = MI; AvailCopyMap[*SR] = MI; } // Remember source that's copied to Def. Once it's clobbered, then // it's no longer available for copy propagation. SrcMap[Src] = Def; continue; } // Not a copy. SmallVector<unsigned, 2> Defs; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); if (!Reg) continue; if (TargetRegisterInfo::isVirtualRegister(Reg)) report_fatal_error("MachineCopyPropagation should be run after" " register allocation!"); if (MO.isDef()) { Defs.push_back(Reg); continue; } // If 'Reg' is defined by a copy, the copy is no longer a candidate // for elimination. DenseMap<unsigned, MachineInstr*>::iterator CI = CopyMap.find(Reg); if (CI != CopyMap.end()) MaybeDeadCopies.remove(CI->second); for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) { CI = CopyMap.find(*AS); if (CI != CopyMap.end()) MaybeDeadCopies.remove(CI->second); } } for (unsigned i = 0, e = Defs.size(); i != e; ++i) { unsigned Reg = Defs[i]; // No longer defined by a copy. CopyMap.erase(Reg); AvailCopyMap.erase(Reg); for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) { CopyMap.erase(*AS); AvailCopyMap.erase(*AS); } // If 'Reg' is previously source of a copy, it is no longer available for // copy propagation. SourceNoLongerAvailable(Reg, SrcMap, AvailCopyMap); } } // If MBB doesn't have successors, delete the copies whose defs are not used. // If MBB does have successors, then conservative assume the defs are live-out // since we don't want to trust live-in lists. if (MBB.succ_empty()) { for (SmallSetVector<MachineInstr*, 8>::iterator DI = MaybeDeadCopies.begin(), DE = MaybeDeadCopies.end(); DI != DE; ++DI) { if (!ReservedRegs.test((*DI)->getOperand(0).getReg())) { (*DI)->eraseFromParent(); Changed = true; ++NumDeletes; } } } return Changed; }
/// Remove dead stores to stack-allocated locations in the function end block. /// Ex: /// %A = alloca i32 /// ... /// store i32 1, i32* %A /// ret void static bool handleEndBlock(BasicBlock &BB, AliasAnalysis *AA, MemoryDependenceResults *MD, const TargetLibraryInfo *TLI, InstOverlapIntervalsTy &IOL, DenseMap<Instruction*, size_t> *InstrOrdering) { bool MadeChange = false; // Keep track of all of the stack objects that are dead at the end of the // function. SmallSetVector<Value*, 16> DeadStackObjects; // Find all of the alloca'd pointers in the entry block. BasicBlock &Entry = BB.getParent()->front(); for (Instruction &I : Entry) { if (isa<AllocaInst>(&I)) DeadStackObjects.insert(&I); // Okay, so these are dead heap objects, but if the pointer never escapes // then it's leaked by this function anyways. else if (isAllocLikeFn(&I, TLI) && !PointerMayBeCaptured(&I, true, true)) DeadStackObjects.insert(&I); } // Treat byval or inalloca arguments the same, stores to them are dead at the // end of the function. for (Argument &AI : BB.getParent()->args()) if (AI.hasByValOrInAllocaAttr()) DeadStackObjects.insert(&AI); const DataLayout &DL = BB.getModule()->getDataLayout(); // Scan the basic block backwards for (BasicBlock::iterator BBI = BB.end(); BBI != BB.begin(); ){ --BBI; // If we find a store, check to see if it points into a dead stack value. if (hasMemoryWrite(&*BBI, *TLI) && isRemovable(&*BBI)) { // See through pointer-to-pointer bitcasts SmallVector<Value *, 4> Pointers; GetUnderlyingObjects(getStoredPointerOperand(&*BBI), Pointers, DL); // Stores to stack values are valid candidates for removal. bool AllDead = true; for (Value *Pointer : Pointers) if (!DeadStackObjects.count(Pointer)) { AllDead = false; break; } if (AllDead) { Instruction *Dead = &*BBI; DEBUG(dbgs() << "DSE: Dead Store at End of Block:\n DEAD: " << *Dead << "\n Objects: "; for (SmallVectorImpl<Value *>::iterator I = Pointers.begin(), E = Pointers.end(); I != E; ++I) { dbgs() << **I; if (std::next(I) != E) dbgs() << ", "; } dbgs() << '\n'); // DCE instructions only used to calculate that store. deleteDeadInstruction(Dead, &BBI, *MD, *TLI, IOL, InstrOrdering, &DeadStackObjects); ++NumFastStores; MadeChange = true; continue; } } // Remove any dead non-memory-mutating instructions. if (isInstructionTriviallyDead(&*BBI, TLI)) { DEBUG(dbgs() << "DSE: Removing trivially dead instruction:\n DEAD: " << *&*BBI << '\n'); deleteDeadInstruction(&*BBI, &BBI, *MD, *TLI, IOL, InstrOrdering, &DeadStackObjects); ++NumFastOther; MadeChange = true; continue; } if (isa<AllocaInst>(BBI)) { // Remove allocas from the list of dead stack objects; there can't be // any references before the definition. DeadStackObjects.remove(&*BBI); continue; } if (auto CS = CallSite(&*BBI)) { // Remove allocation function calls from the list of dead stack objects; // there can't be any references before the definition. if (isAllocLikeFn(&*BBI, TLI)) DeadStackObjects.remove(&*BBI); // If this call does not access memory, it can't be loading any of our // pointers. if (AA->doesNotAccessMemory(CS)) continue; // If the call might load from any of our allocas, then any store above // the call is live. DeadStackObjects.remove_if([&](Value *I) { // See if the call site touches the value. ModRefInfo A = AA->getModRefInfo(CS, I, getPointerSize(I, DL, *TLI)); return A == MRI_ModRef || A == MRI_Ref; }); // If all of the allocas were clobbered by the call then we're not going // to find anything else to process. if (DeadStackObjects.empty()) break; continue; } // We can remove the dead stores, irrespective of the fence and its ordering // (release/acquire/seq_cst). Fences only constraints the ordering of // already visible stores, it does not make a store visible to other // threads. So, skipping over a fence does not change a store from being // dead. if (isa<FenceInst>(*BBI)) continue; MemoryLocation LoadedLoc; // If we encounter a use of the pointer, it is no longer considered dead if (LoadInst *L = dyn_cast<LoadInst>(BBI)) { if (!L->isUnordered()) // Be conservative with atomic/volatile load break; LoadedLoc = MemoryLocation::get(L); } else if (VAArgInst *V = dyn_cast<VAArgInst>(BBI)) { LoadedLoc = MemoryLocation::get(V); } else if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(BBI)) { LoadedLoc = MemoryLocation::getForSource(MTI); } else if (!BBI->mayReadFromMemory()) { // Instruction doesn't read memory. Note that stores that weren't removed // above will hit this case. continue; } else { // Unknown inst; assume it clobbers everything. break; } // Remove any allocas from the DeadPointer set that are loaded, as this // makes any stores above the access live. removeAccessedObjects(LoadedLoc, DeadStackObjects, DL, AA, TLI); // If all of the allocas were clobbered by the access then we're not going // to find anything else to process. if (DeadStackObjects.empty()) break; }
/// Sort the blocks in RPO, taking special care to make sure that loops are /// contiguous even in the case of split backedges. /// /// TODO: Determine whether RPO is actually worthwhile, or whether we should /// move to just a stable-topological-sort-based approach that would preserve /// more of the original order. static void SortBlocks(MachineFunction &MF, const MachineLoopInfo &MLI) { // Note that we do our own RPO rather than using // "llvm/ADT/PostOrderIterator.h" because we want control over the order that // successors are visited in (see above). Also, we can sort the blocks in the // MachineFunction as we go. SmallPtrSet<MachineBasicBlock *, 16> Visited; SmallVector<POStackEntry, 16> Stack; MachineBasicBlock *EntryBlock = &*MF.begin(); Visited.insert(EntryBlock); Stack.push_back(POStackEntry(EntryBlock, MF, MLI)); for (;;) { POStackEntry &Entry = Stack.back(); SmallVectorImpl<MachineBasicBlock *> &Succs = Entry.Succs; if (!Succs.empty()) { MachineBasicBlock *Succ = Succs.pop_back_val(); if (Visited.insert(Succ).second) Stack.push_back(POStackEntry(Succ, MF, MLI)); continue; } // Put the block in its position in the MachineFunction. MachineBasicBlock &MBB = *Entry.MBB; MBB.moveBefore(&*MF.begin()); // Branch instructions may utilize a fallthrough, so update them if a // fallthrough has been added or removed. if (!MBB.empty() && MBB.back().isTerminator() && !MBB.back().isBranch() && !MBB.back().isBarrier()) report_fatal_error( "Non-branch terminator with fallthrough cannot yet be rewritten"); if (MBB.empty() || !MBB.back().isTerminator() || MBB.back().isBranch()) MBB.updateTerminator(); Stack.pop_back(); if (Stack.empty()) break; } // Now that we've sorted the blocks in RPO, renumber them. MF.RenumberBlocks(); #ifndef NDEBUG SmallSetVector<MachineLoop *, 8> OnStack; // Insert a sentinel representing the degenerate loop that starts at the // function entry block and includes the entire function as a "loop" that // executes once. OnStack.insert(nullptr); for (auto &MBB : MF) { assert(MBB.getNumber() >= 0 && "Renumbered blocks should be non-negative."); MachineLoop *Loop = MLI.getLoopFor(&MBB); if (Loop && &MBB == Loop->getHeader()) { // Loop header. The loop predecessor should be sorted above, and the other // predecessors should be backedges below. for (auto Pred : MBB.predecessors()) assert( (Pred->getNumber() < MBB.getNumber() || Loop->contains(Pred)) && "Loop header predecessors must be loop predecessors or backedges"); assert(OnStack.insert(Loop) && "Loops should be declared at most once."); } else { // Not a loop header. All predecessors should be sorted above. for (auto Pred : MBB.predecessors()) assert(Pred->getNumber() < MBB.getNumber() && "Non-loop-header predecessors should be topologically sorted"); assert(OnStack.count(MLI.getLoopFor(&MBB)) && "Blocks must be nested in their loops"); } while (OnStack.size() > 1 && &MBB == LoopBottom(OnStack.back())) OnStack.pop_back(); } assert(OnStack.pop_back_val() == nullptr && "The function entry block shouldn't actually be a loop header"); assert(OnStack.empty() && "Control flow stack pushes and pops should be balanced."); #endif }
/// Tests whether this function is known to not return null. /// /// Requires that the function returns a pointer. /// /// Returns true if it believes the function will not return a null, and sets /// \p Speculative based on whether the returned conclusion is a speculative /// conclusion due to SCC calls. static bool isReturnNonNull(Function *F, const SCCNodeSet &SCCNodes, const TargetLibraryInfo &TLI, bool &Speculative) { assert(F->getReturnType()->isPointerTy() && "nonnull only meaningful on pointer types"); Speculative = false; SmallSetVector<Value *, 8> FlowsToReturn; for (BasicBlock &BB : *F) if (auto *Ret = dyn_cast<ReturnInst>(BB.getTerminator())) FlowsToReturn.insert(Ret->getReturnValue()); for (unsigned i = 0; i != FlowsToReturn.size(); ++i) { Value *RetVal = FlowsToReturn[i]; // If this value is locally known to be non-null, we're good if (isKnownNonNull(RetVal, &TLI)) continue; // Otherwise, we need to look upwards since we can't make any local // conclusions. Instruction *RVI = dyn_cast<Instruction>(RetVal); if (!RVI) return false; switch (RVI->getOpcode()) { // Extend the analysis by looking upwards. case Instruction::BitCast: case Instruction::GetElementPtr: case Instruction::AddrSpaceCast: FlowsToReturn.insert(RVI->getOperand(0)); continue; case Instruction::Select: { SelectInst *SI = cast<SelectInst>(RVI); FlowsToReturn.insert(SI->getTrueValue()); FlowsToReturn.insert(SI->getFalseValue()); continue; } case Instruction::PHI: { PHINode *PN = cast<PHINode>(RVI); for (int i = 0, e = PN->getNumIncomingValues(); i != e; ++i) FlowsToReturn.insert(PN->getIncomingValue(i)); continue; } case Instruction::Call: case Instruction::Invoke: { CallSite CS(RVI); Function *Callee = CS.getCalledFunction(); // A call to a node within the SCC is assumed to return null until // proven otherwise if (Callee && SCCNodes.count(Callee)) { Speculative = true; continue; } return false; } default: return false; // Unknown source, may be null }; llvm_unreachable("should have either continued or returned"); } return true; }
/// Merge the linker flags in Src into the Dest module. Error IRLinker::linkModuleFlagsMetadata() { // If the source module has no module flags, we are done. const NamedMDNode *SrcModFlags = SrcM->getModuleFlagsMetadata(); if (!SrcModFlags) return Error::success(); // If the destination module doesn't have module flags yet, then just copy // over the source module's flags. NamedMDNode *DstModFlags = DstM.getOrInsertModuleFlagsMetadata(); if (DstModFlags->getNumOperands() == 0) { for (unsigned I = 0, E = SrcModFlags->getNumOperands(); I != E; ++I) DstModFlags->addOperand(SrcModFlags->getOperand(I)); return Error::success(); } // First build a map of the existing module flags and requirements. DenseMap<MDString *, std::pair<MDNode *, unsigned>> Flags; SmallSetVector<MDNode *, 16> Requirements; for (unsigned I = 0, E = DstModFlags->getNumOperands(); I != E; ++I) { MDNode *Op = DstModFlags->getOperand(I); ConstantInt *Behavior = mdconst::extract<ConstantInt>(Op->getOperand(0)); MDString *ID = cast<MDString>(Op->getOperand(1)); if (Behavior->getZExtValue() == Module::Require) { Requirements.insert(cast<MDNode>(Op->getOperand(2))); } else { Flags[ID] = std::make_pair(Op, I); } } // Merge in the flags from the source module, and also collect its set of // requirements. for (unsigned I = 0, E = SrcModFlags->getNumOperands(); I != E; ++I) { MDNode *SrcOp = SrcModFlags->getOperand(I); ConstantInt *SrcBehavior = mdconst::extract<ConstantInt>(SrcOp->getOperand(0)); MDString *ID = cast<MDString>(SrcOp->getOperand(1)); MDNode *DstOp; unsigned DstIndex; std::tie(DstOp, DstIndex) = Flags.lookup(ID); unsigned SrcBehaviorValue = SrcBehavior->getZExtValue(); // If this is a requirement, add it and continue. if (SrcBehaviorValue == Module::Require) { // If the destination module does not already have this requirement, add // it. if (Requirements.insert(cast<MDNode>(SrcOp->getOperand(2)))) { DstModFlags->addOperand(SrcOp); } continue; } // If there is no existing flag with this ID, just add it. if (!DstOp) { Flags[ID] = std::make_pair(SrcOp, DstModFlags->getNumOperands()); DstModFlags->addOperand(SrcOp); continue; } // Otherwise, perform a merge. ConstantInt *DstBehavior = mdconst::extract<ConstantInt>(DstOp->getOperand(0)); unsigned DstBehaviorValue = DstBehavior->getZExtValue(); // If either flag has override behavior, handle it first. if (DstBehaviorValue == Module::Override) { // Diagnose inconsistent flags which both have override behavior. if (SrcBehaviorValue == Module::Override && SrcOp->getOperand(2) != DstOp->getOperand(2)) return stringErr("linking module flags '" + ID->getString() + "': IDs have conflicting override values"); continue; } else if (SrcBehaviorValue == Module::Override) { // Update the destination flag to that of the source. DstModFlags->setOperand(DstIndex, SrcOp); Flags[ID].first = SrcOp; continue; } // Diagnose inconsistent merge behavior types. if (SrcBehaviorValue != DstBehaviorValue) return stringErr("linking module flags '" + ID->getString() + "': IDs have conflicting behaviors"); auto replaceDstValue = [&](MDNode *New) { Metadata *FlagOps[] = {DstOp->getOperand(0), ID, New}; MDNode *Flag = MDNode::get(DstM.getContext(), FlagOps); DstModFlags->setOperand(DstIndex, Flag); Flags[ID].first = Flag; }; // Perform the merge for standard behavior types. switch (SrcBehaviorValue) { case Module::Require: case Module::Override: llvm_unreachable("not possible"); case Module::Error: { // Emit an error if the values differ. if (SrcOp->getOperand(2) != DstOp->getOperand(2)) return stringErr("linking module flags '" + ID->getString() + "': IDs have conflicting values"); continue; } case Module::Warning: { // Emit a warning if the values differ. if (SrcOp->getOperand(2) != DstOp->getOperand(2)) { emitWarning("linking module flags '" + ID->getString() + "': IDs have conflicting values"); } continue; } case Module::Append: { MDNode *DstValue = cast<MDNode>(DstOp->getOperand(2)); MDNode *SrcValue = cast<MDNode>(SrcOp->getOperand(2)); SmallVector<Metadata *, 8> MDs; MDs.reserve(DstValue->getNumOperands() + SrcValue->getNumOperands()); MDs.append(DstValue->op_begin(), DstValue->op_end()); MDs.append(SrcValue->op_begin(), SrcValue->op_end()); replaceDstValue(MDNode::get(DstM.getContext(), MDs)); break; } case Module::AppendUnique: { SmallSetVector<Metadata *, 16> Elts; MDNode *DstValue = cast<MDNode>(DstOp->getOperand(2)); MDNode *SrcValue = cast<MDNode>(SrcOp->getOperand(2)); Elts.insert(DstValue->op_begin(), DstValue->op_end()); Elts.insert(SrcValue->op_begin(), SrcValue->op_end()); replaceDstValue(MDNode::get(DstM.getContext(), makeArrayRef(Elts.begin(), Elts.end()))); break; } } } // Check all of the requirements. for (unsigned I = 0, E = Requirements.size(); I != E; ++I) { MDNode *Requirement = Requirements[I]; MDString *Flag = cast<MDString>(Requirement->getOperand(0)); Metadata *ReqValue = Requirement->getOperand(1); MDNode *Op = Flags[Flag].first; if (!Op || Op->getOperand(2) != ReqValue) return stringErr("linking module flags '" + Flag->getString() + "': does not have the required value"); } return Error::success(); }
void MCObjectDisassembler::buildCFG(MCModule *Module) { typedef std::map<uint64_t, BBInfo> BBInfoByAddrTy; BBInfoByAddrTy BBInfos; AddressSetTy Splits; AddressSetTy Calls; error_code ec; for (symbol_iterator SI = Obj.begin_symbols(), SE = Obj.end_symbols(); SI != SE; SI.increment(ec)) { if (ec) break; SymbolRef::Type SymType; SI->getType(SymType); if (SymType == SymbolRef::ST_Function) { uint64_t SymAddr; SI->getAddress(SymAddr); SymAddr = getEffectiveLoadAddr(SymAddr); Calls.push_back(SymAddr); Splits.push_back(SymAddr); } } assert(Module->func_begin() == Module->func_end() && "Module already has a CFG!"); // First, determine the basic block boundaries and call targets. for (MCModule::atom_iterator AI = Module->atom_begin(), AE = Module->atom_end(); AI != AE; ++AI) { MCTextAtom *TA = dyn_cast<MCTextAtom>(*AI); if (!TA) continue; Calls.push_back(TA->getBeginAddr()); BBInfos[TA->getBeginAddr()].Atom = TA; for (MCTextAtom::const_iterator II = TA->begin(), IE = TA->end(); II != IE; ++II) { if (MIA.isTerminator(II->Inst)) Splits.push_back(II->Address + II->Size); uint64_t Target; if (MIA.evaluateBranch(II->Inst, II->Address, II->Size, Target)) { if (MIA.isCall(II->Inst)) Calls.push_back(Target); Splits.push_back(Target); } } } RemoveDupsFromAddressVector(Splits); RemoveDupsFromAddressVector(Calls); // Split text atoms into basic block atoms. for (AddressSetTy::const_iterator SI = Splits.begin(), SE = Splits.end(); SI != SE; ++SI) { MCAtom *A = Module->findAtomContaining(*SI); if (!A) continue; MCTextAtom *TA = cast<MCTextAtom>(A); if (TA->getBeginAddr() == *SI) continue; MCTextAtom *NewAtom = TA->split(*SI); BBInfos[NewAtom->getBeginAddr()].Atom = NewAtom; StringRef BBName = TA->getName(); BBName = BBName.substr(0, BBName.find_last_of(':')); NewAtom->setName((BBName + ":" + utohexstr(*SI)).str()); } // Compute succs/preds. for (MCModule::atom_iterator AI = Module->atom_begin(), AE = Module->atom_end(); AI != AE; ++AI) { MCTextAtom *TA = dyn_cast<MCTextAtom>(*AI); if (!TA) continue; BBInfo &CurBB = BBInfos[TA->getBeginAddr()]; const MCDecodedInst &LI = TA->back(); if (MIA.isBranch(LI.Inst)) { uint64_t Target; if (MIA.evaluateBranch(LI.Inst, LI.Address, LI.Size, Target)) CurBB.addSucc(BBInfos[Target]); if (MIA.isConditionalBranch(LI.Inst)) CurBB.addSucc(BBInfos[LI.Address + LI.Size]); } else if (!MIA.isTerminator(LI.Inst)) CurBB.addSucc(BBInfos[LI.Address + LI.Size]); } // Create functions and basic blocks. for (AddressSetTy::const_iterator CI = Calls.begin(), CE = Calls.end(); CI != CE; ++CI) { BBInfo &BBI = BBInfos[*CI]; if (!BBI.Atom) continue; MCFunction &MCFN = *Module->createFunction(BBI.Atom->getName()); // Create MCBBs. SmallSetVector<BBInfo*, 16> Worklist; Worklist.insert(&BBI); for (size_t wi = 0; wi < Worklist.size(); ++wi) { BBInfo *BBI = Worklist[wi]; if (!BBI->Atom) continue; BBI->BB = &MCFN.createBlock(*BBI->Atom); // Add all predecessors and successors to the worklist. for (BBInfoSetTy::iterator SI = BBI->Succs.begin(), SE = BBI->Succs.end(); SI != SE; ++SI) Worklist.insert(*SI); for (BBInfoSetTy::iterator PI = BBI->Preds.begin(), PE = BBI->Preds.end(); PI != PE; ++PI) Worklist.insert(*PI); } // Set preds/succs. for (size_t wi = 0; wi < Worklist.size(); ++wi) { BBInfo *BBI = Worklist[wi]; MCBasicBlock *MCBB = BBI->BB; if (!MCBB) continue; for (BBInfoSetTy::iterator SI = BBI->Succs.begin(), SE = BBI->Succs.end(); SI != SE; ++SI) if ((*SI)->BB) MCBB->addSuccessor((*SI)->BB); for (BBInfoSetTy::iterator PI = BBI->Preds.begin(), PE = BBI->Preds.end(); PI != PE; ++PI) if ((*PI)->BB) MCBB->addPredecessor((*PI)->BB); } } }
PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, CGSCCAnalysisManager &AM, LazyCallGraph &CG, CGSCCUpdateResult &UR) { const ModuleAnalysisManager &MAM = AM.getResult<ModuleAnalysisManagerCGSCCProxy>(InitialC, CG).getManager(); bool Changed = false; assert(InitialC.size() > 0 && "Cannot handle an empty SCC!"); Module &M = *InitialC.begin()->getFunction().getParent(); ProfileSummaryInfo *PSI = MAM.getCachedResult<ProfileSummaryAnalysis>(M); if (!ImportedFunctionsStats && InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No) { ImportedFunctionsStats = llvm::make_unique<ImportedFunctionsInliningStatistics>(); ImportedFunctionsStats->setModuleInfo(M); } // We use a single common worklist for calls across the entire SCC. We // process these in-order and append new calls introduced during inlining to // the end. // // Note that this particular order of processing is actually critical to // avoid very bad behaviors. Consider *highly connected* call graphs where // each function contains a small amonut of code and a couple of calls to // other functions. Because the LLVM inliner is fundamentally a bottom-up // inliner, it can handle gracefully the fact that these all appear to be // reasonable inlining candidates as it will flatten things until they become // too big to inline, and then move on and flatten another batch. // // However, when processing call edges *within* an SCC we cannot rely on this // bottom-up behavior. As a consequence, with heavily connected *SCCs* of // functions we can end up incrementally inlining N calls into each of // N functions because each incremental inlining decision looks good and we // don't have a topological ordering to prevent explosions. // // To compensate for this, we don't process transitive edges made immediate // by inlining until we've done one pass of inlining across the entire SCC. // Large, highly connected SCCs still lead to some amount of code bloat in // this model, but it is uniformly spread across all the functions in the SCC // and eventually they all become too large to inline, rather than // incrementally maknig a single function grow in a super linear fashion. SmallVector<std::pair<CallSite, int>, 16> Calls; FunctionAnalysisManager &FAM = AM.getResult<FunctionAnalysisManagerCGSCCProxy>(InitialC, CG) .getManager(); // Populate the initial list of calls in this SCC. for (auto &N : InitialC) { auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(N.getFunction()); // We want to generally process call sites top-down in order for // simplifications stemming from replacing the call with the returned value // after inlining to be visible to subsequent inlining decisions. // FIXME: Using instructions sequence is a really bad way to do this. // Instead we should do an actual RPO walk of the function body. for (Instruction &I : instructions(N.getFunction())) if (auto CS = CallSite(&I)) if (Function *Callee = CS.getCalledFunction()) { if (!Callee->isDeclaration()) Calls.push_back({CS, -1}); else if (!isa<IntrinsicInst>(I)) { using namespace ore; ORE.emit([&]() { return OptimizationRemarkMissed(DEBUG_TYPE, "NoDefinition", &I) << NV("Callee", Callee) << " will not be inlined into " << NV("Caller", CS.getCaller()) << " because its definition is unavailable" << setIsVerbose(); }); } } } if (Calls.empty()) return PreservedAnalyses::all(); // Capture updatable variables for the current SCC and RefSCC. auto *C = &InitialC; auto *RC = &C->getOuterRefSCC(); // When inlining a callee produces new call sites, we want to keep track of // the fact that they were inlined from the callee. This allows us to avoid // infinite inlining in some obscure cases. To represent this, we use an // index into the InlineHistory vector. SmallVector<std::pair<Function *, int>, 16> InlineHistory; // Track a set vector of inlined callees so that we can augment the caller // with all of their edges in the call graph before pruning out the ones that // got simplified away. SmallSetVector<Function *, 4> InlinedCallees; // Track the dead functions to delete once finished with inlining calls. We // defer deleting these to make it easier to handle the call graph updates. SmallVector<Function *, 4> DeadFunctions; // Loop forward over all of the calls. Note that we cannot cache the size as // inlining can introduce new calls that need to be processed. for (int i = 0; i < (int)Calls.size(); ++i) { // We expect the calls to typically be batched with sequences of calls that // have the same caller, so we first set up some shared infrastructure for // this caller. We also do any pruning we can at this layer on the caller // alone. Function &F = *Calls[i].first.getCaller(); LazyCallGraph::Node &N = *CG.lookup(F); if (CG.lookupSCC(N) != C) continue; if (F.hasFnAttribute(Attribute::OptimizeNone)) continue; LLVM_DEBUG(dbgs() << "Inlining calls in: " << F.getName() << "\n"); // Get a FunctionAnalysisManager via a proxy for this particular node. We // do this each time we visit a node as the SCC may have changed and as // we're going to mutate this particular function we want to make sure the // proxy is in place to forward any invalidation events. We can use the // manager we get here for looking up results for functions other than this // node however because those functions aren't going to be mutated by this // pass. FunctionAnalysisManager &FAM = AM.getResult<FunctionAnalysisManagerCGSCCProxy>(*C, CG) .getManager(); // Get the remarks emission analysis for the caller. auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); std::function<AssumptionCache &(Function &)> GetAssumptionCache = [&](Function &F) -> AssumptionCache & { return FAM.getResult<AssumptionAnalysis>(F); }; auto GetBFI = [&](Function &F) -> BlockFrequencyInfo & { return FAM.getResult<BlockFrequencyAnalysis>(F); }; auto GetInlineCost = [&](CallSite CS) { Function &Callee = *CS.getCalledFunction(); auto &CalleeTTI = FAM.getResult<TargetIRAnalysis>(Callee); return getInlineCost(CS, Params, CalleeTTI, GetAssumptionCache, {GetBFI}, PSI, &ORE); }; // Now process as many calls as we have within this caller in the sequnece. // We bail out as soon as the caller has to change so we can update the // call graph and prepare the context of that new caller. bool DidInline = false; for (; i < (int)Calls.size() && Calls[i].first.getCaller() == &F; ++i) { int InlineHistoryID; CallSite CS; std::tie(CS, InlineHistoryID) = Calls[i]; Function &Callee = *CS.getCalledFunction(); if (InlineHistoryID != -1 && InlineHistoryIncludes(&Callee, InlineHistoryID, InlineHistory)) continue; // Check if this inlining may repeat breaking an SCC apart that has // already been split once before. In that case, inlining here may // trigger infinite inlining, much like is prevented within the inliner // itself by the InlineHistory above, but spread across CGSCC iterations // and thus hidden from the full inline history. if (CG.lookupSCC(*CG.lookup(Callee)) == C && UR.InlinedInternalEdges.count({&N, C})) { LLVM_DEBUG(dbgs() << "Skipping inlining internal SCC edge from a node " "previously split out of this SCC by inlining: " << F.getName() << " -> " << Callee.getName() << "\n"); continue; } Optional<InlineCost> OIC = shouldInline(CS, GetInlineCost, ORE); // Check whether we want to inline this callsite. if (!OIC) continue; // Setup the data structure used to plumb customization into the // `InlineFunction` routine. InlineFunctionInfo IFI( /*cg=*/nullptr, &GetAssumptionCache, PSI, &FAM.getResult<BlockFrequencyAnalysis>(*(CS.getCaller())), &FAM.getResult<BlockFrequencyAnalysis>(Callee)); // Get DebugLoc to report. CS will be invalid after Inliner. DebugLoc DLoc = CS->getDebugLoc(); BasicBlock *Block = CS.getParent(); using namespace ore; if (!InlineFunction(CS, IFI)) { ORE.emit([&]() { return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc, Block) << NV("Callee", &Callee) << " will not be inlined into " << NV("Caller", &F); }); continue; } DidInline = true; InlinedCallees.insert(&Callee); ORE.emit([&]() { bool AlwaysInline = OIC->isAlways(); StringRef RemarkName = AlwaysInline ? "AlwaysInline" : "Inlined"; OptimizationRemark R(DEBUG_TYPE, RemarkName, DLoc, Block); R << NV("Callee", &Callee) << " inlined into "; R << NV("Caller", &F); if (AlwaysInline) R << " with cost=always"; else { R << " with cost=" << NV("Cost", OIC->getCost()); R << " (threshold=" << NV("Threshold", OIC->getThreshold()); R << ")"; } return R; }); // Add any new callsites to defined functions to the worklist. if (!IFI.InlinedCallSites.empty()) { int NewHistoryID = InlineHistory.size(); InlineHistory.push_back({&Callee, InlineHistoryID}); for (CallSite &CS : reverse(IFI.InlinedCallSites)) if (Function *NewCallee = CS.getCalledFunction()) if (!NewCallee->isDeclaration()) Calls.push_back({CS, NewHistoryID}); } if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No) ImportedFunctionsStats->recordInline(F, Callee); // Merge the attributes based on the inlining. AttributeFuncs::mergeAttributesForInlining(F, Callee); // For local functions, check whether this makes the callee trivially // dead. In that case, we can drop the body of the function eagerly // which may reduce the number of callers of other functions to one, // changing inline cost thresholds. if (Callee.hasLocalLinkage()) { // To check this we also need to nuke any dead constant uses (perhaps // made dead by this operation on other functions). Callee.removeDeadConstantUsers(); if (Callee.use_empty() && !CG.isLibFunction(Callee)) { Calls.erase( std::remove_if(Calls.begin() + i + 1, Calls.end(), [&Callee](const std::pair<CallSite, int> &Call) { return Call.first.getCaller() == &Callee; }), Calls.end()); // Clear the body and queue the function itself for deletion when we // finish inlining and call graph updates. // Note that after this point, it is an error to do anything other // than use the callee's address or delete it. Callee.dropAllReferences(); assert(find(DeadFunctions, &Callee) == DeadFunctions.end() && "Cannot put cause a function to become dead twice!"); DeadFunctions.push_back(&Callee); } } } // Back the call index up by one to put us in a good position to go around // the outer loop. --i; if (!DidInline) continue; Changed = true; // Add all the inlined callees' edges as ref edges to the caller. These are // by definition trivial edges as we always have *some* transitive ref edge // chain. While in some cases these edges are direct calls inside the // callee, they have to be modeled in the inliner as reference edges as // there may be a reference edge anywhere along the chain from the current // caller to the callee that causes the whole thing to appear like // a (transitive) reference edge that will require promotion to a call edge // below. for (Function *InlinedCallee : InlinedCallees) { LazyCallGraph::Node &CalleeN = *CG.lookup(*InlinedCallee); for (LazyCallGraph::Edge &E : *CalleeN) RC->insertTrivialRefEdge(N, E.getNode()); } // At this point, since we have made changes we have at least removed // a call instruction. However, in the process we do some incremental // simplification of the surrounding code. This simplification can // essentially do all of the same things as a function pass and we can // re-use the exact same logic for updating the call graph to reflect the // change. LazyCallGraph::SCC *OldC = C; C = &updateCGAndAnalysisManagerForFunctionPass(CG, *C, N, AM, UR); LLVM_DEBUG(dbgs() << "Updated inlining SCC: " << *C << "\n"); RC = &C->getOuterRefSCC(); // If this causes an SCC to split apart into multiple smaller SCCs, there // is a subtle risk we need to prepare for. Other transformations may // expose an "infinite inlining" opportunity later, and because of the SCC // mutation, we will revisit this function and potentially re-inline. If we // do, and that re-inlining also has the potentially to mutate the SCC // structure, the infinite inlining problem can manifest through infinite // SCC splits and merges. To avoid this, we capture the originating caller // node and the SCC containing the call edge. This is a slight over // approximation of the possible inlining decisions that must be avoided, // but is relatively efficient to store. // FIXME: This seems like a very heavyweight way of retaining the inline // history, we should look for a more efficient way of tracking it. if (C != OldC && llvm::any_of(InlinedCallees, [&](Function *Callee) { return CG.lookupSCC(*CG.lookup(*Callee)) == OldC; })) { LLVM_DEBUG(dbgs() << "Inlined an internal call edge and split an SCC, " "retaining this to avoid infinite inlining.\n"); UR.InlinedInternalEdges.insert({&N, OldC}); } InlinedCallees.clear(); } // Now that we've finished inlining all of the calls across this SCC, delete // all of the trivially dead functions, updating the call graph and the CGSCC // pass manager in the process. // // Note that this walks a pointer set which has non-deterministic order but // that is OK as all we do is delete things and add pointers to unordered // sets. for (Function *DeadF : DeadFunctions) { // Get the necessary information out of the call graph and nuke the // function there. Also, cclear out any cached analyses. auto &DeadC = *CG.lookupSCC(*CG.lookup(*DeadF)); FunctionAnalysisManager &FAM = AM.getResult<FunctionAnalysisManagerCGSCCProxy>(DeadC, CG) .getManager(); FAM.clear(*DeadF, DeadF->getName()); AM.clear(DeadC, DeadC.getName()); auto &DeadRC = DeadC.getOuterRefSCC(); CG.removeDeadFunction(*DeadF); // Mark the relevant parts of the call graph as invalid so we don't visit // them. UR.InvalidatedSCCs.insert(&DeadC); UR.InvalidatedRefSCCs.insert(&DeadRC); // And delete the actual function from the module. M.getFunctionList().erase(DeadF); } if (!Changed) return PreservedAnalyses::all(); // Even if we change the IR, we update the core CGSCC data structures and so // can preserve the proxy to the function analysis manager. PreservedAnalyses PA; PA.preserve<FunctionAnalysisManagerCGSCCProxy>(); return PA; }
/// propagateSiblingValue - Propagate the value in SVI to dependents if it is /// known. Otherwise remember the dependency for later. /// /// @param SVIIter SibValues entry to propagate. /// @param VNI Dependent value, or NULL to propagate to all saved dependents. void InlineSpiller::propagateSiblingValue(SibValueMap::iterator SVIIter, VNInfo *VNI) { SibValueMap::value_type *SVI = &*SVIIter; // When VNI is non-NULL, add it to SVI's deps, and only propagate to that. TinyPtrVector<VNInfo*> FirstDeps; if (VNI) { FirstDeps.push_back(VNI); SVI->second.Deps.push_back(VNI); } // Has the value been completely determined yet? If not, defer propagation. if (!SVI->second.hasDef()) return; // Work list of values to propagate. SmallSetVector<SibValueMap::value_type *, 8> WorkList; WorkList.insert(SVI); do { SVI = WorkList.pop_back_val(); TinyPtrVector<VNInfo*> *Deps = VNI ? &FirstDeps : &SVI->second.Deps; VNI = nullptr; SibValueInfo &SV = SVI->second; if (!SV.SpillMBB) SV.SpillMBB = LIS.getMBBFromIndex(SV.SpillVNI->def); DEBUG(dbgs() << " prop to " << Deps->size() << ": " << SVI->first->id << '@' << SVI->first->def << ":\t" << SV); assert(SV.hasDef() && "Propagating undefined value"); // Should this value be propagated as a preferred spill candidate? We don't // propagate values of registers that are about to spill. bool PropSpill = !DisableHoisting && !isRegToSpill(SV.SpillReg); unsigned SpillDepth = ~0u; for (TinyPtrVector<VNInfo*>::iterator DepI = Deps->begin(), DepE = Deps->end(); DepI != DepE; ++DepI) { SibValueMap::iterator DepSVI = SibValues.find(*DepI); assert(DepSVI != SibValues.end() && "Dependent value not in SibValues"); SibValueInfo &DepSV = DepSVI->second; if (!DepSV.SpillMBB) DepSV.SpillMBB = LIS.getMBBFromIndex(DepSV.SpillVNI->def); bool Changed = false; // Propagate defining instruction. if (!DepSV.hasDef()) { Changed = true; DepSV.DefMI = SV.DefMI; DepSV.DefByOrigPHI = SV.DefByOrigPHI; } // Propagate AllDefsAreReloads. For PHI values, this computes an AND of // all predecessors. if (!SV.AllDefsAreReloads && DepSV.AllDefsAreReloads) { Changed = true; DepSV.AllDefsAreReloads = false; } // Propagate best spill value. if (PropSpill && SV.SpillVNI != DepSV.SpillVNI) { if (SV.SpillMBB == DepSV.SpillMBB) { // DepSV is in the same block. Hoist when dominated. if (DepSV.KillsSource && SV.SpillVNI->def < DepSV.SpillVNI->def) { // This is an alternative def earlier in the same MBB. // Hoist the spill as far as possible in SpillMBB. This can ease // register pressure: // // x = def // y = use x // s = copy x // // Hoisting the spill of s to immediately after the def removes the // interference between x and y: // // x = def // spill x // y = use x<kill> // // This hoist only helps when the DepSV copy kills its source. Changed = true; DepSV.SpillReg = SV.SpillReg; DepSV.SpillVNI = SV.SpillVNI; DepSV.SpillMBB = SV.SpillMBB; } } else { // DepSV is in a different block. if (SpillDepth == ~0u) SpillDepth = Loops.getLoopDepth(SV.SpillMBB); // Also hoist spills to blocks with smaller loop depth, but make sure // that the new value dominates. Non-phi dependents are always // dominated, phis need checking. const BranchProbability MarginProb(4, 5); // 80% // Hoist a spill to outer loop if there are multiple dependents (it // can be beneficial if more than one dependents are hoisted) or // if DepSV (the hoisting source) is hotter than SV (the hoisting // destination) (we add a 80% margin to bias a little towards // loop depth). bool HoistCondition = (MBFI.getBlockFreq(DepSV.SpillMBB) >= (MBFI.getBlockFreq(SV.SpillMBB) * MarginProb)) || Deps->size() > 1; if ((Loops.getLoopDepth(DepSV.SpillMBB) > SpillDepth) && HoistCondition && (!DepSVI->first->isPHIDef() || MDT.dominates(SV.SpillMBB, DepSV.SpillMBB))) { Changed = true; DepSV.SpillReg = SV.SpillReg; DepSV.SpillVNI = SV.SpillVNI; DepSV.SpillMBB = SV.SpillMBB; } } } if (!Changed) continue; // Something changed in DepSVI. Propagate to dependents. WorkList.insert(&*DepSVI); DEBUG(dbgs() << " update " << DepSVI->first->id << '@' << DepSVI->first->def << " to:\t" << DepSV); } } while (!WorkList.empty()); }
MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) { if (!A || !B) return nullptr; if (A == B) return A; // For struct-path aware TBAA, we use the access type of the tag. bool StructPath = isStructPathTBAA(A) && isStructPathTBAA(B); if (StructPath) { A = cast_or_null<MDNode>(A->getOperand(1)); if (!A) return nullptr; B = cast_or_null<MDNode>(B->getOperand(1)); if (!B) return nullptr; } SmallSetVector<MDNode *, 4> PathA; MDNode *T = A; while (T) { if (PathA.count(T)) report_fatal_error("Cycle found in TBAA metadata."); PathA.insert(T); T = T->getNumOperands() >= 2 ? cast_or_null<MDNode>(T->getOperand(1)) : nullptr; } SmallSetVector<MDNode *, 4> PathB; T = B; while (T) { if (PathB.count(T)) report_fatal_error("Cycle found in TBAA metadata."); PathB.insert(T); T = T->getNumOperands() >= 2 ? cast_or_null<MDNode>(T->getOperand(1)) : nullptr; } int IA = PathA.size() - 1; int IB = PathB.size() - 1; MDNode *Ret = nullptr; while (IA >= 0 && IB >= 0) { if (PathA[IA] == PathB[IB]) Ret = PathA[IA]; else break; --IA; --IB; } if (!StructPath) return Ret; if (!Ret) return nullptr; // We need to convert from a type node to a tag node. Type *Int64 = IntegerType::get(A->getContext(), 64); Metadata *Ops[3] = {Ret, Ret, ConstantAsMetadata::get(ConstantInt::get(Int64, 0))}; return MDNode::get(A->getContext(), Ops); }
/// handleEndBlock - Remove dead stores to stack-allocated locations in the /// function end block. Ex: /// %A = alloca i32 /// ... /// store i32 1, i32* %A /// ret void bool DSE::handleEndBlock(BasicBlock &BB) { bool MadeChange = false; // Keep track of all of the stack objects that are dead at the end of the // function. SmallSetVector<Value*, 16> DeadStackObjects; // Find all of the alloca'd pointers in the entry block. BasicBlock *Entry = BB.getParent()->begin(); for (BasicBlock::iterator I = Entry->begin(), E = Entry->end(); I != E; ++I) { if (isa<AllocaInst>(I)) DeadStackObjects.insert(I); // Okay, so these are dead heap objects, but if the pointer never escapes // then it's leaked by this function anyways. else if (isAllocLikeFn(I, TLI) && !PointerMayBeCaptured(I, true, true)) DeadStackObjects.insert(I); } // Treat byval or inalloca arguments the same, stores to them are dead at the // end of the function. for (Function::arg_iterator AI = BB.getParent()->arg_begin(), AE = BB.getParent()->arg_end(); AI != AE; ++AI) if (AI->hasByValOrInAllocaAttr()) DeadStackObjects.insert(AI); // Scan the basic block backwards for (BasicBlock::iterator BBI = BB.end(); BBI != BB.begin(); ){ --BBI; // If we find a store, check to see if it points into a dead stack value. if (hasMemoryWrite(BBI, TLI) && isRemovable(BBI)) { // See through pointer-to-pointer bitcasts SmallVector<Value *, 4> Pointers; GetUnderlyingObjects(getStoredPointerOperand(BBI), Pointers); // Stores to stack values are valid candidates for removal. bool AllDead = true; for (SmallVectorImpl<Value *>::iterator I = Pointers.begin(), E = Pointers.end(); I != E; ++I) if (!DeadStackObjects.count(*I)) { AllDead = false; break; } if (AllDead) { Instruction *Dead = BBI++; DEBUG(dbgs() << "DSE: Dead Store at End of Block:\n DEAD: " << *Dead << "\n Objects: "; for (SmallVectorImpl<Value *>::iterator I = Pointers.begin(), E = Pointers.end(); I != E; ++I) { dbgs() << **I; if (std::next(I) != E) dbgs() << ", "; } dbgs() << '\n'); // DCE instructions only used to calculate that store. DeleteDeadInstruction(Dead, *MD, TLI, &DeadStackObjects); ++NumFastStores; MadeChange = true; continue; } } // Remove any dead non-memory-mutating instructions. if (isInstructionTriviallyDead(BBI, TLI)) { Instruction *Inst = BBI++; DeleteDeadInstruction(Inst, *MD, TLI, &DeadStackObjects); ++NumFastOther; MadeChange = true; continue; } if (isa<AllocaInst>(BBI)) { // Remove allocas from the list of dead stack objects; there can't be // any references before the definition. DeadStackObjects.remove(BBI); continue; } if (CallSite CS = cast<Value>(BBI)) { // Remove allocation function calls from the list of dead stack objects; // there can't be any references before the definition. if (isAllocLikeFn(BBI, TLI)) DeadStackObjects.remove(BBI); // If this call does not access memory, it can't be loading any of our // pointers. if (AA->doesNotAccessMemory(CS)) continue; // If the call might load from any of our allocas, then any store above // the call is live. DeadStackObjects.remove_if([&](Value *I) { // See if the call site touches the value. AliasAnalysis::ModRefResult A = AA->getModRefInfo(CS, I, getPointerSize(I, *AA)); return A == AliasAnalysis::ModRef || A == AliasAnalysis::Ref; }); // If all of the allocas were clobbered by the call then we're not going // to find anything else to process. if (DeadStackObjects.empty()) break; continue; } AliasAnalysis::Location LoadedLoc; // If we encounter a use of the pointer, it is no longer considered dead if (LoadInst *L = dyn_cast<LoadInst>(BBI)) { if (!L->isUnordered()) // Be conservative with atomic/volatile load break; LoadedLoc = AA->getLocation(L); } else if (VAArgInst *V = dyn_cast<VAArgInst>(BBI)) { LoadedLoc = AA->getLocation(V); } else if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(BBI)) { LoadedLoc = AA->getLocationForSource(MTI); } else if (!BBI->mayReadFromMemory()) { // Instruction doesn't read memory. Note that stores that weren't removed // above will hit this case. continue; } else { // Unknown inst; assume it clobbers everything. break; } // Remove any allocas from the DeadPointer set that are loaded, as this // makes any stores above the access live. RemoveAccessedObjects(LoadedLoc, DeadStackObjects); // If all of the allocas were clobbered by the access then we're not going // to find anything else to process. if (DeadStackObjects.empty()) break; }
/// setupEntryBlockAndCallSites - Setup the entry block by creating and filling /// the function context and marking the call sites with the appropriate /// values. These values are used by the DWARF EH emitter. bool SjLjEHPass::setupEntryBlockAndCallSites(Function &F) { SmallVector<ReturnInst*, 16> Returns; SmallVector<InvokeInst*, 16> Invokes; SmallSetVector<LandingPadInst*, 16> LPads; // Look through the terminators of the basic blocks to find invokes. for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) { Invokes.push_back(II); LPads.insert(II->getUnwindDest()->getLandingPadInst()); } else if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) { Returns.push_back(RI); } if (Invokes.empty()) return false; NumInvokes += Invokes.size(); lowerIncomingArguments(F); lowerAcrossUnwindEdges(F, Invokes); Value *FuncCtx = setupFunctionContext(F, makeArrayRef(LPads.begin(), LPads.end())); BasicBlock *EntryBB = F.begin(); Type *Int32Ty = Type::getInt32Ty(F.getContext()); Value *Idxs[2] = { ConstantInt::get(Int32Ty, 0), 0 }; // Get a reference to the jump buffer. Idxs[1] = ConstantInt::get(Int32Ty, 5); Value *JBufPtr = GetElementPtrInst::Create(FuncCtx, Idxs, "jbuf_gep", EntryBB->getTerminator()); // Save the frame pointer. Idxs[1] = ConstantInt::get(Int32Ty, 0); Value *FramePtr = GetElementPtrInst::Create(JBufPtr, Idxs, "jbuf_fp_gep", EntryBB->getTerminator()); Value *Val = CallInst::Create(FrameAddrFn, ConstantInt::get(Int32Ty, 0), "fp", EntryBB->getTerminator()); new StoreInst(Val, FramePtr, true, EntryBB->getTerminator()); // Save the stack pointer. Idxs[1] = ConstantInt::get(Int32Ty, 2); Value *StackPtr = GetElementPtrInst::Create(JBufPtr, Idxs, "jbuf_sp_gep", EntryBB->getTerminator()); Val = CallInst::Create(StackAddrFn, "sp", EntryBB->getTerminator()); new StoreInst(Val, StackPtr, true, EntryBB->getTerminator()); // Call the setjmp instrinsic. It fills in the rest of the jmpbuf. Value *SetjmpArg = CastInst::Create(Instruction::BitCast, JBufPtr, Type::getInt8PtrTy(F.getContext()), "", EntryBB->getTerminator()); CallInst::Create(BuiltinSetjmpFn, SetjmpArg, "", EntryBB->getTerminator()); // Store a pointer to the function context so that the back-end will know // where to look for it. Value *FuncCtxArg = CastInst::Create(Instruction::BitCast, FuncCtx, Type::getInt8PtrTy(F.getContext()), "", EntryBB->getTerminator()); CallInst::Create(FuncCtxFn, FuncCtxArg, "", EntryBB->getTerminator()); // At this point, we are all set up, update the invoke instructions to mark // their call_site values. for (unsigned I = 0, E = Invokes.size(); I != E; ++I) { insertCallSiteStore(Invokes[I], I + 1); ConstantInt *CallSiteNum = ConstantInt::get(Type::getInt32Ty(F.getContext()), I + 1); // Record the call site value for the back end so it stays associated with // the invoke. CallInst::Create(CallSiteFn, CallSiteNum, "", Invokes[I]); } // Mark call instructions that aren't nounwind as no-action (call_site == // -1). Skip the entry block, as prior to then, no function context has been // created for this function and any unexpected exceptions thrown will go // directly to the caller's context, which is what we want anyway, so no need // to do anything here. for (Function::iterator BB = F.begin(), E = F.end(); ++BB != E;) for (BasicBlock::iterator I = BB->begin(), end = BB->end(); I != end; ++I) if (CallInst *CI = dyn_cast<CallInst>(I)) { if (!CI->doesNotThrow()) insertCallSiteStore(CI, -1); } else if (ResumeInst *RI = dyn_cast<ResumeInst>(I)) { insertCallSiteStore(RI, -1); } // Register the function context and make sure it's known to not throw CallInst *Register = CallInst::Create(RegisterFn, FuncCtx, "", EntryBB->getTerminator()); Register->setDoesNotThrow(); // Following any allocas not in the entry block, update the saved SP in the // jmpbuf to the new value. for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { if (BB == F.begin()) continue; for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { if (CallInst *CI = dyn_cast<CallInst>(I)) { if (CI->getCalledFunction() != StackRestoreFn) continue; } else if (!isa<AllocaInst>(I)) { continue; } Instruction *StackAddr = CallInst::Create(StackAddrFn, "sp"); StackAddr->insertAfter(I); Instruction *StoreStackAddr = new StoreInst(StackAddr, StackPtr, true); StoreStackAddr->insertAfter(StackAddr); } } // Finally, for any returns from this function, if this function contains an // invoke, add a call to unregister the function context. for (unsigned I = 0, E = Returns.size(); I != E; ++I) CallInst::Create(UnregisterFn, FuncCtx, "", Returns[I]); return true; }
/// This works like CloneAndPruneFunctionInto, except that it does not clone the /// entire function. Instead it starts at an instruction provided by the caller /// and copies (and prunes) only the code reachable from that instruction. void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, const Instruction *StartingInst, ValueToValueMapTy &VMap, bool ModuleLevelChanges, SmallVectorImpl<ReturnInst *> &Returns, const char *NameSuffix, ClonedCodeInfo *CodeInfo) { assert(NameSuffix && "NameSuffix cannot be null!"); ValueMapTypeRemapper *TypeMapper = nullptr; ValueMaterializer *Materializer = nullptr; #ifndef NDEBUG // If the cloning starts at the beginning of the function, verify that // the function arguments are mapped. if (!StartingInst) for (const Argument &II : OldFunc->args()) assert(VMap.count(&II) && "No mapping from source argument specified!"); #endif PruningFunctionCloner PFC(NewFunc, OldFunc, VMap, ModuleLevelChanges, NameSuffix, CodeInfo); const BasicBlock *StartingBB; if (StartingInst) StartingBB = StartingInst->getParent(); else { StartingBB = &OldFunc->getEntryBlock(); StartingInst = &StartingBB->front(); } // Clone the entry block, and anything recursively reachable from it. std::vector<const BasicBlock*> CloneWorklist; PFC.CloneBlock(StartingBB, StartingInst->getIterator(), CloneWorklist); while (!CloneWorklist.empty()) { const BasicBlock *BB = CloneWorklist.back(); CloneWorklist.pop_back(); PFC.CloneBlock(BB, BB->begin(), CloneWorklist); } // Loop over all of the basic blocks in the old function. If the block was // reachable, we have cloned it and the old block is now in the value map: // insert it into the new function in the right order. If not, ignore it. // // Defer PHI resolution until rest of function is resolved. SmallVector<const PHINode*, 16> PHIToResolve; for (const BasicBlock &BI : *OldFunc) { Value *V = VMap.lookup(&BI); BasicBlock *NewBB = cast_or_null<BasicBlock>(V); if (!NewBB) continue; // Dead block. // Add the new block to the new function. NewFunc->getBasicBlockList().push_back(NewBB); // Handle PHI nodes specially, as we have to remove references to dead // blocks. for (BasicBlock::const_iterator I = BI.begin(), E = BI.end(); I != E; ++I) { // PHI nodes may have been remapped to non-PHI nodes by the caller or // during the cloning process. if (const PHINode *PN = dyn_cast<PHINode>(I)) { if (isa<PHINode>(VMap[PN])) PHIToResolve.push_back(PN); else break; } else { break; } } // Finally, remap the terminator instructions, as those can't be remapped // until all BBs are mapped. RemapInstruction(NewBB->getTerminator(), VMap, ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges, TypeMapper, Materializer); } // Defer PHI resolution until rest of function is resolved, PHI resolution // requires the CFG to be up-to-date. for (unsigned phino = 0, e = PHIToResolve.size(); phino != e; ) { const PHINode *OPN = PHIToResolve[phino]; unsigned NumPreds = OPN->getNumIncomingValues(); const BasicBlock *OldBB = OPN->getParent(); BasicBlock *NewBB = cast<BasicBlock>(VMap[OldBB]); // Map operands for blocks that are live and remove operands for blocks // that are dead. for (; phino != PHIToResolve.size() && PHIToResolve[phino]->getParent() == OldBB; ++phino) { OPN = PHIToResolve[phino]; PHINode *PN = cast<PHINode>(VMap[OPN]); for (unsigned pred = 0, e = NumPreds; pred != e; ++pred) { Value *V = VMap.lookup(PN->getIncomingBlock(pred)); if (BasicBlock *MappedBlock = cast_or_null<BasicBlock>(V)) { Value *InVal = MapValue(PN->getIncomingValue(pred), VMap, ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges); assert(InVal && "Unknown input value?"); PN->setIncomingValue(pred, InVal); PN->setIncomingBlock(pred, MappedBlock); } else { PN->removeIncomingValue(pred, false); --pred; // Revisit the next entry. --e; } } } // The loop above has removed PHI entries for those blocks that are dead // and has updated others. However, if a block is live (i.e. copied over) // but its terminator has been changed to not go to this block, then our // phi nodes will have invalid entries. Update the PHI nodes in this // case. PHINode *PN = cast<PHINode>(NewBB->begin()); NumPreds = std::distance(pred_begin(NewBB), pred_end(NewBB)); if (NumPreds != PN->getNumIncomingValues()) { assert(NumPreds < PN->getNumIncomingValues()); // Count how many times each predecessor comes to this block. std::map<BasicBlock*, unsigned> PredCount; for (pred_iterator PI = pred_begin(NewBB), E = pred_end(NewBB); PI != E; ++PI) --PredCount[*PI]; // Figure out how many entries to remove from each PHI. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) ++PredCount[PN->getIncomingBlock(i)]; // At this point, the excess predecessor entries are positive in the // map. Loop over all of the PHIs and remove excess predecessor // entries. BasicBlock::iterator I = NewBB->begin(); for (; (PN = dyn_cast<PHINode>(I)); ++I) { for (const auto &PCI : PredCount) { BasicBlock *Pred = PCI.first; for (unsigned NumToRemove = PCI.second; NumToRemove; --NumToRemove) PN->removeIncomingValue(Pred, false); } } } // If the loops above have made these phi nodes have 0 or 1 operand, // replace them with undef or the input value. We must do this for // correctness, because 0-operand phis are not valid. PN = cast<PHINode>(NewBB->begin()); if (PN->getNumIncomingValues() == 0) { BasicBlock::iterator I = NewBB->begin(); BasicBlock::const_iterator OldI = OldBB->begin(); while ((PN = dyn_cast<PHINode>(I++))) { Value *NV = UndefValue::get(PN->getType()); PN->replaceAllUsesWith(NV); assert(VMap[&*OldI] == PN && "VMap mismatch"); VMap[&*OldI] = NV; PN->eraseFromParent(); ++OldI; } } } // Make a second pass over the PHINodes now that all of them have been // remapped into the new function, simplifying the PHINode and performing any // recursive simplifications exposed. This will transparently update the // WeakVH in the VMap. Notably, we rely on that so that if we coalesce // two PHINodes, the iteration over the old PHIs remains valid, and the // mapping will just map us to the new node (which may not even be a PHI // node). const DataLayout &DL = NewFunc->getParent()->getDataLayout(); SmallSetVector<const Value *, 8> Worklist; for (unsigned Idx = 0, Size = PHIToResolve.size(); Idx != Size; ++Idx) if (isa<PHINode>(VMap[PHIToResolve[Idx]])) Worklist.insert(PHIToResolve[Idx]); // Note that we must test the size on each iteration, the worklist can grow. for (unsigned Idx = 0; Idx != Worklist.size(); ++Idx) { const Value *OrigV = Worklist[Idx]; auto *I = dyn_cast_or_null<Instruction>(VMap.lookup(OrigV)); if (!I) continue; // See if this instruction simplifies. Value *SimpleV = SimplifyInstruction(I, DL); if (!SimpleV) continue; // Stash away all the uses of the old instruction so we can check them for // recursive simplifications after a RAUW. This is cheaper than checking all // uses of To on the recursive step in most cases. for (const User *U : OrigV->users()) Worklist.insert(cast<Instruction>(U)); // Replace the instruction with its simplified value. I->replaceAllUsesWith(SimpleV); // If the original instruction had no side effects, remove it. if (isInstructionTriviallyDead(I)) I->eraseFromParent(); else VMap[OrigV] = I; } // Now that the inlined function body has been fully constructed, go through // and zap unconditional fall-through branches. This happens all the time when // specializing code: code specialization turns conditional branches into // uncond branches, and this code folds them. Function::iterator Begin = cast<BasicBlock>(VMap[StartingBB])->getIterator(); Function::iterator I = Begin; while (I != NewFunc->end()) { // Check if this block has become dead during inlining or other // simplifications. Note that the first block will appear dead, as it has // not yet been wired up properly. if (I != Begin && (pred_begin(&*I) == pred_end(&*I) || I->getSinglePredecessor() == &*I)) { BasicBlock *DeadBB = &*I++; DeleteDeadBlock(DeadBB); continue; } // We need to simplify conditional branches and switches with a constant // operand. We try to prune these out when cloning, but if the // simplification required looking through PHI nodes, those are only // available after forming the full basic block. That may leave some here, // and we still want to prune the dead code as early as possible. ConstantFoldTerminator(&*I); BranchInst *BI = dyn_cast<BranchInst>(I->getTerminator()); if (!BI || BI->isConditional()) { ++I; continue; } BasicBlock *Dest = BI->getSuccessor(0); if (!Dest->getSinglePredecessor()) { ++I; continue; } // We shouldn't be able to get single-entry PHI nodes here, as instsimplify // above should have zapped all of them.. assert(!isa<PHINode>(Dest->begin())); // We know all single-entry PHI nodes in the inlined function have been // removed, so we just need to splice the blocks. BI->eraseFromParent(); // Make all PHI nodes that referred to Dest now refer to I as their source. Dest->replaceAllUsesWith(&*I); // Move all the instructions in the succ to the pred. I->getInstList().splice(I->end(), Dest->getInstList()); // Remove the dest block. Dest->eraseFromParent(); // Do not increment I, iteratively merge all things this block branches to. } // Make a final pass over the basic blocks from the old function to gather // any return instructions which survived folding. We have to do this here // because we can iteratively remove and merge returns above. for (Function::iterator I = cast<BasicBlock>(VMap[StartingBB])->getIterator(), E = NewFunc->end(); I != E; ++I) if (ReturnInst *RI = dyn_cast<ReturnInst>(I->getTerminator())) Returns.push_back(RI); }
/// performStoreOnlyObjectElimination - Scan the graph of uses of the specified /// object allocation. If the object does not escape and is only stored to /// (this happens because GVN and other optimizations hoists forward substitutes /// all stores to the object to eliminate all loads from it), then zap the /// object and all accesses related to it. static bool performStoreOnlyObjectElimination(CallInst &Allocation, BasicBlock::iterator &BBI) { DtorKind DtorInfo = analyzeDestructor(Allocation.getArgOperand(0)); // We can't delete the object if its destructor has side effects. if (DtorInfo != DtorKind::NoSideEffects) return false; // Do a depth first search exploring all of the uses of the object pointer, // following through casts, pointer adjustments etc. If we find any loads or // any escape sites of the object, we give up. If we succeed in walking the // entire graph of uses, we can remove the resultant set. SmallSetVector<Instruction*, 16> InvolvedInstructions; SmallVector<Instruction*, 16> Worklist; Worklist.push_back(&Allocation); // Stores - Keep track of all of the store instructions we see. SmallVector<StoreInst*, 16> Stores; while (!Worklist.empty()) { Instruction *I = Worklist.pop_back_val(); // Insert the instruction into our InvolvedInstructions set. If we have // already seen it, then don't reprocess all of the uses. if (!InvolvedInstructions.insert(I)) continue; // Okay, this is the first time we've seen this instruction, proceed. switch (classifyInstruction(*I)) { // These instructions should not reach here based on the pass ordering. // i.e. LLVMARCOpt -> LLVMContractOpt. case RT_RetainN: case RT_UnknownRetainN: case RT_BridgeRetainN: case RT_ReleaseN: case RT_UnknownReleaseN: case RT_BridgeReleaseN: llvm_unreachable("These are only created by LLVMARCContract !"); case RT_AllocObject: // If this is a different swift_allocObject than we started with, then // there is some computation feeding into a size or alignment computation // that we have to keep... unless we can delete *that* entire object as // well. break; case RT_NoMemoryAccessed: // If no memory is accessed, then something is being done with the // pointer: maybe it is bitcast or GEP'd. Since there are no side effects, // it is perfectly fine to delete this instruction if all uses of the // instruction are also eliminable. if (I->mayHaveSideEffects() || isa<TerminatorInst>(I)) return false; break; case RT_Release: case RT_Retain: case RT_FixLifetime: case RT_CheckUnowned: // It is perfectly fine to eliminate various retains and releases of this // object: we are zapping all accesses or none. break; // If this is an unknown instruction, we have more interesting things to // consider. case RT_Unknown: case RT_ObjCRelease: case RT_ObjCRetain: case RT_UnknownRetain: case RT_UnknownRelease: case RT_BridgeRetain: case RT_BridgeRelease: case RT_RetainUnowned: // Otherwise, this really is some unhandled instruction. Bail out. return false; } // Okay, if we got here, the instruction can be eaten so-long as all of its // uses can be. Scan through the uses and add them to the worklist for // recursive processing. for (auto UI = I->user_begin(), E = I->user_end(); UI != E; ++UI) { Instruction *User = cast<Instruction>(*UI); // Handle stores as a special case here: we want to make sure that the // object is being stored *to*, not itself being stored (which would be an // escape point). Since stores themselves don't have any uses, we can // short-cut the classification scheme above. if (StoreInst *SI = dyn_cast<StoreInst>(User)) { // If this is a store *to* the object, we can zap it. if (UI.getUse().getOperandNo() == StoreInst::getPointerOperandIndex()) { InvolvedInstructions.insert(SI); continue; } // Otherwise, using the object as a source (or size) is an escape. return false; } if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) { // If this is a memset/memcpy/memmove *to* the object, we can zap it. if (UI.getUse().getOperandNo() == 0) { InvolvedInstructions.insert(MI); continue; } // Otherwise, using the object as a source (or size) is an escape. return false; } // Otherwise, normal instructions just go on the worklist for processing. Worklist.push_back(User); } } // Ok, we succeeded! This means we can zap all of the instructions that use // the object. One thing we have to be careful of is to make sure that we // don't invalidate "BBI" (the iterator the outer walk of the optimization // pass is using, and indicates the next instruction to process). This would // happen if we delete the instruction it is pointing to. Advance the // iterator if that would happen. while (InvolvedInstructions.count(&*BBI)) ++BBI; // Zap all of the instructions. for (auto I : InvolvedInstructions) { if (!I->use_empty()) I->replaceAllUsesWith(UndefValue::get(I->getType())); I->eraseFromParent(); } ++NumStoreOnlyObjectsEliminated; return true; }
/// \brief Figure out if the loop is worth full unrolling. /// /// Complete loop unrolling can make some loads constant, and we need to know /// if that would expose any further optimization opportunities. This routine /// estimates this optimization. It computes cost of unrolled loop /// (UnrolledCost) and dynamic cost of the original loop (RolledDynamicCost). By /// dynamic cost we mean that we won't count costs of blocks that are known not /// to be executed (i.e. if we have a branch in the loop and we know that at the /// given iteration its condition would be resolved to true, we won't add up the /// cost of the 'false'-block). /// \returns Optional value, holding the RolledDynamicCost and UnrolledCost. If /// the analysis failed (no benefits expected from the unrolling, or the loop is /// too big to analyze), the returned value is None. static Optional<EstimatedUnrollCost> analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT, ScalarEvolution &SE, const TargetTransformInfo &TTI, int MaxUnrolledLoopSize) { // We want to be able to scale offsets by the trip count and add more offsets // to them without checking for overflows, and we already don't want to // analyze *massive* trip counts, so we force the max to be reasonably small. assert(UnrollMaxIterationsCountToAnalyze < (INT_MAX / 2) && "The unroll iterations max is too large!"); // Only analyze inner loops. We can't properly estimate cost of nested loops // and we won't visit inner loops again anyway. if (!L->empty()) return None; // Don't simulate loops with a big or unknown tripcount if (!UnrollMaxIterationsCountToAnalyze || !TripCount || TripCount > UnrollMaxIterationsCountToAnalyze) return None; SmallSetVector<BasicBlock *, 16> BBWorklist; SmallSetVector<std::pair<BasicBlock *, BasicBlock *>, 4> ExitWorklist; DenseMap<Value *, Constant *> SimplifiedValues; SmallVector<std::pair<Value *, Constant *>, 4> SimplifiedInputValues; // The estimated cost of the unrolled form of the loop. We try to estimate // this by simplifying as much as we can while computing the estimate. int UnrolledCost = 0; // We also track the estimated dynamic (that is, actually executed) cost in // the rolled form. This helps identify cases when the savings from unrolling // aren't just exposing dead control flows, but actual reduced dynamic // instructions due to the simplifications which we expect to occur after // unrolling. int RolledDynamicCost = 0; // We track the simplification of each instruction in each iteration. We use // this to recursively merge costs into the unrolled cost on-demand so that // we don't count the cost of any dead code. This is essentially a map from // <instruction, int> to <bool, bool>, but stored as a densely packed struct. DenseSet<UnrolledInstState, UnrolledInstStateKeyInfo> InstCostMap; // A small worklist used to accumulate cost of instructions from each // observable and reached root in the loop. SmallVector<Instruction *, 16> CostWorklist; // PHI-used worklist used between iterations while accumulating cost. SmallVector<Instruction *, 4> PHIUsedList; // Helper function to accumulate cost for instructions in the loop. auto AddCostRecursively = [&](Instruction &RootI, int Iteration) { assert(Iteration >= 0 && "Cannot have a negative iteration!"); assert(CostWorklist.empty() && "Must start with an empty cost list"); assert(PHIUsedList.empty() && "Must start with an empty phi used list"); CostWorklist.push_back(&RootI); for (;; --Iteration) { do { Instruction *I = CostWorklist.pop_back_val(); // InstCostMap only uses I and Iteration as a key, the other two values // don't matter here. auto CostIter = InstCostMap.find({I, Iteration, 0, 0}); if (CostIter == InstCostMap.end()) // If an input to a PHI node comes from a dead path through the loop // we may have no cost data for it here. What that actually means is // that it is free. continue; auto &Cost = *CostIter; if (Cost.IsCounted) // Already counted this instruction. continue; // Mark that we are counting the cost of this instruction now. Cost.IsCounted = true; // If this is a PHI node in the loop header, just add it to the PHI set. if (auto *PhiI = dyn_cast<PHINode>(I)) if (PhiI->getParent() == L->getHeader()) { assert(Cost.IsFree && "Loop PHIs shouldn't be evaluated as they " "inherently simplify during unrolling."); if (Iteration == 0) continue; // Push the incoming value from the backedge into the PHI used list // if it is an in-loop instruction. We'll use this to populate the // cost worklist for the next iteration (as we count backwards). if (auto *OpI = dyn_cast<Instruction>( PhiI->getIncomingValueForBlock(L->getLoopLatch()))) if (L->contains(OpI)) PHIUsedList.push_back(OpI); continue; } // First accumulate the cost of this instruction. if (!Cost.IsFree) { UnrolledCost += TTI.getUserCost(I); DEBUG(dbgs() << "Adding cost of instruction (iteration " << Iteration << "): "); DEBUG(I->dump()); } // We must count the cost of every operand which is not free, // recursively. If we reach a loop PHI node, simply add it to the set // to be considered on the next iteration (backwards!). for (Value *Op : I->operands()) { // Check whether this operand is free due to being a constant or // outside the loop. auto *OpI = dyn_cast<Instruction>(Op); if (!OpI || !L->contains(OpI)) continue; // Otherwise accumulate its cost. CostWorklist.push_back(OpI); } } while (!CostWorklist.empty()); if (PHIUsedList.empty()) // We've exhausted the search. break; assert(Iteration > 0 && "Cannot track PHI-used values past the first iteration!"); CostWorklist.append(PHIUsedList.begin(), PHIUsedList.end()); PHIUsedList.clear(); } }; // Ensure that we don't violate the loop structure invariants relied on by // this analysis. assert(L->isLoopSimplifyForm() && "Must put loop into normal form first."); assert(L->isLCSSAForm(DT) && "Must have loops in LCSSA form to track live-out values."); DEBUG(dbgs() << "Starting LoopUnroll profitability analysis...\n"); // Simulate execution of each iteration of the loop counting instructions, // which would be simplified. // Since the same load will take different values on different iterations, // we literally have to go through all loop's iterations. for (unsigned Iteration = 0; Iteration < TripCount; ++Iteration) { DEBUG(dbgs() << " Analyzing iteration " << Iteration << "\n"); // Prepare for the iteration by collecting any simplified entry or backedge // inputs. for (Instruction &I : *L->getHeader()) { auto *PHI = dyn_cast<PHINode>(&I); if (!PHI) break; // The loop header PHI nodes must have exactly two input: one from the // loop preheader and one from the loop latch. assert( PHI->getNumIncomingValues() == 2 && "Must have an incoming value only for the preheader and the latch."); Value *V = PHI->getIncomingValueForBlock( Iteration == 0 ? L->getLoopPreheader() : L->getLoopLatch()); Constant *C = dyn_cast<Constant>(V); if (Iteration != 0 && !C) C = SimplifiedValues.lookup(V); if (C) SimplifiedInputValues.push_back({PHI, C}); } // Now clear and re-populate the map for the next iteration. SimplifiedValues.clear(); while (!SimplifiedInputValues.empty()) SimplifiedValues.insert(SimplifiedInputValues.pop_back_val()); UnrolledInstAnalyzer Analyzer(Iteration, SimplifiedValues, SE, L); BBWorklist.clear(); BBWorklist.insert(L->getHeader()); // Note that we *must not* cache the size, this loop grows the worklist. for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) { BasicBlock *BB = BBWorklist[Idx]; // Visit all instructions in the given basic block and try to simplify // it. We don't change the actual IR, just count optimization // opportunities. for (Instruction &I : *BB) { // Track this instruction's expected baseline cost when executing the // rolled loop form. RolledDynamicCost += TTI.getUserCost(&I); // Visit the instruction to analyze its loop cost after unrolling, // and if the visitor returns true, mark the instruction as free after // unrolling and continue. bool IsFree = Analyzer.visit(I); bool Inserted = InstCostMap.insert({&I, (int)Iteration, (unsigned)IsFree, /*IsCounted*/ false}).second; (void)Inserted; assert(Inserted && "Cannot have a state for an unvisited instruction!"); if (IsFree) continue; // If the instruction might have a side-effect recursively account for // the cost of it and all the instructions leading up to it. if (I.mayHaveSideEffects()) AddCostRecursively(I, Iteration); // Can't properly model a cost of a call. // FIXME: With a proper cost model we should be able to do it. if(isa<CallInst>(&I)) return None; // If unrolled body turns out to be too big, bail out. if (UnrolledCost > MaxUnrolledLoopSize) { DEBUG(dbgs() << " Exceeded threshold.. exiting.\n" << " UnrolledCost: " << UnrolledCost << ", MaxUnrolledLoopSize: " << MaxUnrolledLoopSize << "\n"); return None; } } TerminatorInst *TI = BB->getTerminator(); // Add in the live successors by first checking whether we have terminator // that may be simplified based on the values simplified by this call. BasicBlock *KnownSucc = nullptr; if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { if (BI->isConditional()) { if (Constant *SimpleCond = SimplifiedValues.lookup(BI->getCondition())) { // Just take the first successor if condition is undef if (isa<UndefValue>(SimpleCond)) KnownSucc = BI->getSuccessor(0); else if (ConstantInt *SimpleCondVal = dyn_cast<ConstantInt>(SimpleCond)) KnownSucc = BI->getSuccessor(SimpleCondVal->isZero() ? 1 : 0); } } } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) { if (Constant *SimpleCond = SimplifiedValues.lookup(SI->getCondition())) { // Just take the first successor if condition is undef if (isa<UndefValue>(SimpleCond)) KnownSucc = SI->getSuccessor(0); else if (ConstantInt *SimpleCondVal = dyn_cast<ConstantInt>(SimpleCond)) KnownSucc = SI->findCaseValue(SimpleCondVal).getCaseSuccessor(); } } if (KnownSucc) { if (L->contains(KnownSucc)) BBWorklist.insert(KnownSucc); else ExitWorklist.insert({BB, KnownSucc}); continue; } // Add BB's successors to the worklist. for (BasicBlock *Succ : successors(BB)) if (L->contains(Succ)) BBWorklist.insert(Succ); else ExitWorklist.insert({BB, Succ}); AddCostRecursively(*TI, Iteration); } // If we found no optimization opportunities on the first iteration, we // won't find them on later ones too. if (UnrolledCost == RolledDynamicCost) { DEBUG(dbgs() << " No opportunities found.. exiting.\n" << " UnrolledCost: " << UnrolledCost << "\n"); return None; } } while (!ExitWorklist.empty()) { BasicBlock *ExitingBB, *ExitBB; std::tie(ExitingBB, ExitBB) = ExitWorklist.pop_back_val(); for (Instruction &I : *ExitBB) { auto *PN = dyn_cast<PHINode>(&I); if (!PN) break; Value *Op = PN->getIncomingValueForBlock(ExitingBB); if (auto *OpI = dyn_cast<Instruction>(Op)) if (L->contains(OpI)) AddCostRecursively(*OpI, TripCount - 1); } } DEBUG(dbgs() << "Analysis finished:\n" << "UnrolledCost: " << UnrolledCost << ", " << "RolledDynamicCost: " << RolledDynamicCost << "\n"); return {{UnrolledCost, RolledDynamicCost}}; }
/// \brief Figure out if the loop is worth full unrolling. /// /// Complete loop unrolling can make some loads constant, and we need to know /// if that would expose any further optimization opportunities. This routine /// estimates this optimization. It computes cost of unrolled loop /// (UnrolledCost) and dynamic cost of the original loop (RolledDynamicCost). By /// dynamic cost we mean that we won't count costs of blocks that are known not /// to be executed (i.e. if we have a branch in the loop and we know that at the /// given iteration its condition would be resolved to true, we won't add up the /// cost of the 'false'-block). /// \returns Optional value, holding the RolledDynamicCost and UnrolledCost. If /// the analysis failed (no benefits expected from the unrolling, or the loop is /// too big to analyze), the returned value is None. static Optional<EstimatedUnrollCost> analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT, ScalarEvolution &SE, const TargetTransformInfo &TTI, int MaxUnrolledLoopSize) { // We want to be able to scale offsets by the trip count and add more offsets // to them without checking for overflows, and we already don't want to // analyze *massive* trip counts, so we force the max to be reasonably small. assert(UnrollMaxIterationsCountToAnalyze < (INT_MAX / 2) && "The unroll iterations max is too large!"); // Don't simulate loops with a big or unknown tripcount if (!UnrollMaxIterationsCountToAnalyze || !TripCount || TripCount > UnrollMaxIterationsCountToAnalyze) return None; SmallSetVector<BasicBlock *, 16> BBWorklist; DenseMap<Value *, Constant *> SimplifiedValues; SmallVector<std::pair<Value *, Constant *>, 4> SimplifiedInputValues; // The estimated cost of the unrolled form of the loop. We try to estimate // this by simplifying as much as we can while computing the estimate. int UnrolledCost = 0; // We also track the estimated dynamic (that is, actually executed) cost in // the rolled form. This helps identify cases when the savings from unrolling // aren't just exposing dead control flows, but actual reduced dynamic // instructions due to the simplifications which we expect to occur after // unrolling. int RolledDynamicCost = 0; // Ensure that we don't violate the loop structure invariants relied on by // this analysis. assert(L->isLoopSimplifyForm() && "Must put loop into normal form first."); assert(L->isLCSSAForm(DT) && "Must have loops in LCSSA form to track live-out values."); DEBUG(dbgs() << "Starting LoopUnroll profitability analysis...\n"); // Simulate execution of each iteration of the loop counting instructions, // which would be simplified. // Since the same load will take different values on different iterations, // we literally have to go through all loop's iterations. for (unsigned Iteration = 0; Iteration < TripCount; ++Iteration) { DEBUG(dbgs() << " Analyzing iteration " << Iteration << "\n"); // Prepare for the iteration by collecting any simplified entry or backedge // inputs. for (Instruction &I : *L->getHeader()) { auto *PHI = dyn_cast<PHINode>(&I); if (!PHI) break; // The loop header PHI nodes must have exactly two input: one from the // loop preheader and one from the loop latch. assert( PHI->getNumIncomingValues() == 2 && "Must have an incoming value only for the preheader and the latch."); Value *V = PHI->getIncomingValueForBlock( Iteration == 0 ? L->getLoopPreheader() : L->getLoopLatch()); Constant *C = dyn_cast<Constant>(V); if (Iteration != 0 && !C) C = SimplifiedValues.lookup(V); if (C) SimplifiedInputValues.push_back({PHI, C}); } // Now clear and re-populate the map for the next iteration. SimplifiedValues.clear(); while (!SimplifiedInputValues.empty()) SimplifiedValues.insert(SimplifiedInputValues.pop_back_val()); UnrolledInstAnalyzer Analyzer(Iteration, SimplifiedValues, SE); BBWorklist.clear(); BBWorklist.insert(L->getHeader()); // Note that we *must not* cache the size, this loop grows the worklist. for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) { BasicBlock *BB = BBWorklist[Idx]; // Visit all instructions in the given basic block and try to simplify // it. We don't change the actual IR, just count optimization // opportunities. for (Instruction &I : *BB) { int InstCost = TTI.getUserCost(&I); // Visit the instruction to analyze its loop cost after unrolling, // and if the visitor returns false, include this instruction in the // unrolled cost. if (!Analyzer.visit(I)) UnrolledCost += InstCost; else { DEBUG(dbgs() << " " << I << " would be simplified if loop is unrolled.\n"); (void)0; } // Also track this instructions expected cost when executing the rolled // loop form. RolledDynamicCost += InstCost; // If unrolled body turns out to be too big, bail out. if (UnrolledCost > MaxUnrolledLoopSize) { DEBUG(dbgs() << " Exceeded threshold.. exiting.\n" << " UnrolledCost: " << UnrolledCost << ", MaxUnrolledLoopSize: " << MaxUnrolledLoopSize << "\n"); return None; } } TerminatorInst *TI = BB->getTerminator(); // Add in the live successors by first checking whether we have terminator // that may be simplified based on the values simplified by this call. if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { if (BI->isConditional()) { if (Constant *SimpleCond = SimplifiedValues.lookup(BI->getCondition())) { BasicBlock *Succ = nullptr; // Just take the first successor if condition is undef if (isa<UndefValue>(SimpleCond)) Succ = BI->getSuccessor(0); else Succ = BI->getSuccessor( cast<ConstantInt>(SimpleCond)->isZero() ? 1 : 0); if (L->contains(Succ)) BBWorklist.insert(Succ); continue; } } } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) { if (Constant *SimpleCond = SimplifiedValues.lookup(SI->getCondition())) { BasicBlock *Succ = nullptr; // Just take the first successor if condition is undef if (isa<UndefValue>(SimpleCond)) Succ = SI->getSuccessor(0); else Succ = SI->findCaseValue(cast<ConstantInt>(SimpleCond)) .getCaseSuccessor(); if (L->contains(Succ)) BBWorklist.insert(Succ); continue; } } // Add BB's successors to the worklist. for (BasicBlock *Succ : successors(BB)) if (L->contains(Succ)) BBWorklist.insert(Succ); } // If we found no optimization opportunities on the first iteration, we // won't find them on later ones too. if (UnrolledCost == RolledDynamicCost) { DEBUG(dbgs() << " No opportunities found.. exiting.\n" << " UnrolledCost: " << UnrolledCost << "\n"); return None; } } DEBUG(dbgs() << "Analysis finished:\n" << "UnrolledCost: " << UnrolledCost << ", " << "RolledDynamicCost: " << RolledDynamicCost << "\n"); return {{UnrolledCost, RolledDynamicCost}}; }