/// If there's a single exit block, sink any loop-invariant values that /// were defined in the preheader but not used inside the loop into the /// exit block to reduce register pressure in the loop. void IndVarSimplify::SinkUnusedInvariants(Loop *L) { BasicBlock *ExitBlock = L->getExitBlock(); if (!ExitBlock) return; BasicBlock *Preheader = L->getLoopPreheader(); if (!Preheader) return; Instruction *InsertPt = ExitBlock->getFirstNonPHI(); BasicBlock::iterator I = Preheader->getTerminator(); while (I != Preheader->begin()) { --I; // New instructions were inserted at the end of the preheader. if (isa<PHINode>(I)) break; // Don't move instructions which might have side effects, since the side // effects need to complete before instructions inside the loop. Also // don't move instructions which might read memory, since the loop may // modify memory. Note that it's okay if the instruction might have // undefined behavior: LoopSimplify guarantees that the preheader // dominates the exit block. if (I->mayHaveSideEffects() || I->mayReadFromMemory()) continue; // Don't sink static AllocaInsts out of the entry block, which would // turn them into dynamic allocas! if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) if (AI->isStaticAlloca()) continue; // Determine if there is a use in or before the loop (direct or // otherwise). bool UsedInLoop = false; for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); UI != UE; ++UI) { BasicBlock *UseBB = cast<Instruction>(UI)->getParent(); if (PHINode *P = dyn_cast<PHINode>(UI)) { unsigned i = PHINode::getIncomingValueNumForOperand(UI.getOperandNo()); UseBB = P->getIncomingBlock(i); } if (UseBB == Preheader || L->contains(UseBB)) { UsedInLoop = true; break; } } // If there is, the def must remain in the preheader. if (UsedInLoop) continue; // Otherwise, sink it to the exit block. Instruction *ToMove = I; bool Done = false; if (I != Preheader->begin()) --I; else Done = true; ToMove->moveBefore(InsertPt); if (Done) break; InsertPt = ToMove; } }
/// FindAllMemoryUses - Recursively walk all the uses of I until we find a /// memory use. If we find an obviously non-foldable instruction, return true. /// Add the ultimately found memory instructions to MemoryUses. static bool FindAllMemoryUses(Instruction *I, SmallVectorImpl<std::pair<Instruction*,unsigned> > &MemoryUses, SmallPtrSet<Instruction*, 16> &ConsideredInsts, const TargetLowering &TLI) { // If we already considered this instruction, we're done. if (!ConsideredInsts.insert(I)) return false; // If this is an obviously unfoldable instruction, bail out. if (!MightBeFoldableInst(I)) return true; // Loop over all the uses, recursively processing them. for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E; ++UI) { User *U = *UI; if (LoadInst *LI = dyn_cast<LoadInst>(U)) { MemoryUses.push_back(std::make_pair(LI, UI.getOperandNo())); continue; } if (StoreInst *SI = dyn_cast<StoreInst>(U)) { unsigned opNo = UI.getOperandNo(); if (opNo == 0) return true; // Storing addr, not into addr. MemoryUses.push_back(std::make_pair(SI, opNo)); continue; } if (CallInst *CI = dyn_cast<CallInst>(U)) { InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue()); if (!IA) return true; // If this is a memory operand, we're cool, otherwise bail out. if (!IsOperandAMemoryOperand(CI, IA, I, TLI)) return true; continue; } if (FindAllMemoryUses(cast<Instruction>(U), MemoryUses, ConsideredInsts, TLI)) return true; } return false; }
void ARM64PromoteConstant:: computeInsertionPoints(Constant *Val, InsertionPointsPerFunc &InsPtsPerFunc) { DEBUG(dbgs() << "** Compute insertion points **\n"); for (Value::use_iterator UseIt = Val->use_begin(), EndUseIt = Val->use_end(); UseIt != EndUseIt; ++UseIt) { // If the user is not an Instruction, we cannot modify it if (!isa<Instruction>(*UseIt)) continue; // Filter out uses that should not be converted if (!shouldConvertUse(Val, cast<Instruction>(*UseIt), UseIt.getOperandNo())) continue; DEBUG(dbgs() << "Considered use, opidx " << UseIt.getOperandNo() << ":\n"); DEBUG(UseIt->print(dbgs())); DEBUG(dbgs() << '\n'); Instruction *InsertionPoint = findInsertionPoint(UseIt); DEBUG(dbgs() << "Considered insertion point:\n"); DEBUG(InsertionPoint->print(dbgs())); DEBUG(dbgs() << '\n'); // Check if the current insertion point is useless, i.e., it is dominated // by another one. InsertionPoints &InsertPts = InsPtsPerFunc[InsertionPoint->getParent()->getParent()]; if (isDominated(InsertionPoint, UseIt, InsertPts)) continue; // This insertion point is useful, check if we can merge some insertion // point in a common dominator or if NewPt dominates an existing one. if (tryAndMerge(InsertionPoint, UseIt, InsertPts)) continue; DEBUG(dbgs() << "Keep considered insertion point\n"); // It is definitely useful by its own InsertPts[InsertionPoint].push_back(UseIt); } }
/// AllUsesDominatedByBlock - Return true if all uses of the specified value /// occur in blocks dominated by the specified block. bool Sinking::AllUsesDominatedByBlock(Instruction *Inst, BasicBlock *BB) const { // Ignoring debug uses is necessary so debug info doesn't affect the code. // This may leave a referencing dbg_value in the original block, before // the definition of the vreg. Dwarf generator handles this although the // user might not get the right info at runtime. for (Value::use_iterator I = Inst->use_begin(), E = Inst->use_end(); I != E; ++I) { // Determine the block of the use. Instruction *UseInst = cast<Instruction>(*I); BasicBlock *UseBlock = UseInst->getParent(); if (PHINode *PN = dyn_cast<PHINode>(UseInst)) { // PHI nodes use the operand in the predecessor block, not the block with // the PHI. unsigned Num = PHINode::getIncomingValueNumForOperand(I.getOperandNo()); UseBlock = PN->getIncomingBlock(Num); } // Check that it dominates. if (!DT->dominates(BB, UseBlock)) return false; } return true; }
/// SplitCriticalEdge - If this edge is a critical edge, insert a new node to /// split the critical edge. This will update DominatorTree and /// DominatorFrontier information if it is available, thus calling this pass /// will not invalidate either of them. This returns the new block if the edge /// was split, null otherwise. /// /// If MergeIdenticalEdges is true (not the default), *all* edges from TI to the /// specified successor will be merged into the same critical edge block. /// This is most commonly interesting with switch instructions, which may /// have many edges to any one destination. This ensures that all edges to that /// dest go to one block instead of each going to a different block, but isn't /// the standard definition of a "critical edge". /// /// It is invalid to call this function on a critical edge that starts at an /// IndirectBrInst. Splitting these edges will almost always create an invalid /// program because the address of the new block won't be the one that is jumped /// to. /// BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, Pass *P, bool MergeIdenticalEdges) { if (!isCriticalEdge(TI, SuccNum, MergeIdenticalEdges)) return 0; assert(!isa<IndirectBrInst>(TI) && "Cannot split critical edge from IndirectBrInst"); BasicBlock *TIBB = TI->getParent(); BasicBlock *DestBB = TI->getSuccessor(SuccNum); // Create a new basic block, linking it into the CFG. BasicBlock *NewBB = BasicBlock::Create(TI->getContext(), TIBB->getName() + "." + DestBB->getName() + "_crit_edge"); // Create our unconditional branch. BranchInst::Create(DestBB, NewBB); // Branch to the new block, breaking the edge. TI->setSuccessor(SuccNum, NewBB); // Insert the block into the function... right after the block TI lives in. Function &F = *TIBB->getParent(); Function::iterator FBBI = TIBB; F.getBasicBlockList().insert(++FBBI, NewBB); // If there are any PHI nodes in DestBB, we need to update them so that they // merge incoming values from NewBB instead of from TIBB. if (PHINode *APHI = dyn_cast<PHINode>(DestBB->begin())) { // This conceptually does: // foreach (PHINode *PN in DestBB) // PN->setIncomingBlock(PN->getIncomingBlock(TIBB), NewBB); // but is optimized for two cases. if (APHI->getNumIncomingValues() <= 8) { // Small # preds case. unsigned BBIdx = 0; for (BasicBlock::iterator I = DestBB->begin(); isa<PHINode>(I); ++I) { // We no longer enter through TIBB, now we come in through NewBB. // Revector exactly one entry in the PHI node that used to come from // TIBB to come from NewBB. PHINode *PN = cast<PHINode>(I); // Reuse the previous value of BBIdx if it lines up. In cases where we // have multiple phi nodes with *lots* of predecessors, this is a speed // win because we don't have to scan the PHI looking for TIBB. This // happens because the BB list of PHI nodes are usually in the same // order. if (PN->getIncomingBlock(BBIdx) != TIBB) BBIdx = PN->getBasicBlockIndex(TIBB); PN->setIncomingBlock(BBIdx, NewBB); } } else { // However, the foreach loop is slow for blocks with lots of predecessors // because PHINode::getIncomingBlock is O(n) in # preds. Instead, walk // the user list of TIBB to find the PHI nodes. SmallPtrSet<PHINode*, 16> UpdatedPHIs; for (Value::use_iterator UI = TIBB->use_begin(), E = TIBB->use_end(); UI != E; ) { Value::use_iterator Use = UI++; if (PHINode *PN = dyn_cast<PHINode>(Use)) { // Remove one entry from each PHI. if (PN->getParent() == DestBB && UpdatedPHIs.insert(PN)) PN->setOperand(Use.getOperandNo(), NewBB); } } } } // If there are any other edges from TIBB to DestBB, update those to go // through the split block, making those edges non-critical as well (and // reducing the number of phi entries in the DestBB if relevant). if (MergeIdenticalEdges) { for (unsigned i = SuccNum+1, e = TI->getNumSuccessors(); i != e; ++i) { if (TI->getSuccessor(i) != DestBB) continue; // Remove an entry for TIBB from DestBB phi nodes. DestBB->removePredecessor(TIBB); // We found another edge to DestBB, go to NewBB instead. TI->setSuccessor(i, NewBB); } } // If we don't have a pass object, we can't update anything... if (P == 0) return NewBB; DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>(); DominanceFrontier *DF = P->getAnalysisIfAvailable<DominanceFrontier>(); LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>(); ProfileInfo *PI = P->getAnalysisIfAvailable<ProfileInfo>(); // If we have nothing to update, just return. if (DT == 0 && DF == 0 && LI == 0 && PI == 0) return NewBB; // Now update analysis information. Since the only predecessor of NewBB is // the TIBB, TIBB clearly dominates NewBB. TIBB usually doesn't dominate // anything, as there are other successors of DestBB. However, if all other // predecessors of DestBB are already dominated by DestBB (e.g. DestBB is a // loop header) then NewBB dominates DestBB. SmallVector<BasicBlock*, 8> OtherPreds; // If there is a PHI in the block, loop over predecessors with it, which is // faster than iterating pred_begin/end. if (PHINode *PN = dyn_cast<PHINode>(DestBB->begin())) { for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) if (PN->getIncomingBlock(i) != NewBB) OtherPreds.push_back(PN->getIncomingBlock(i)); } else { for (pred_iterator I = pred_begin(DestBB), E = pred_end(DestBB); I != E; ++I) if (*I != NewBB) OtherPreds.push_back(*I); } bool NewBBDominatesDestBB = true; // Should we update DominatorTree information? if (DT) { DomTreeNode *TINode = DT->getNode(TIBB); // The new block is not the immediate dominator for any other nodes, but // TINode is the immediate dominator for the new node. // if (TINode) { // Don't break unreachable code! DomTreeNode *NewBBNode = DT->addNewBlock(NewBB, TIBB); DomTreeNode *DestBBNode = 0; // If NewBBDominatesDestBB hasn't been computed yet, do so with DT. if (!OtherPreds.empty()) { DestBBNode = DT->getNode(DestBB); while (!OtherPreds.empty() && NewBBDominatesDestBB) { if (DomTreeNode *OPNode = DT->getNode(OtherPreds.back())) NewBBDominatesDestBB = DT->dominates(DestBBNode, OPNode); OtherPreds.pop_back(); } OtherPreds.clear(); } // If NewBBDominatesDestBB, then NewBB dominates DestBB, otherwise it // doesn't dominate anything. if (NewBBDominatesDestBB) { if (!DestBBNode) DestBBNode = DT->getNode(DestBB); DT->changeImmediateDominator(DestBBNode, NewBBNode); } } } // Should we update DominanceFrontier information? if (DF) { // If NewBBDominatesDestBB hasn't been computed yet, do so with DF. if (!OtherPreds.empty()) { // FIXME: IMPLEMENT THIS! llvm_unreachable("Requiring domfrontiers but not idom/domtree/domset." " not implemented yet!"); } // Since the new block is dominated by its only predecessor TIBB, // it cannot be in any block's dominance frontier. If NewBB dominates // DestBB, its dominance frontier is the same as DestBB's, otherwise it is // just {DestBB}. DominanceFrontier::DomSetType NewDFSet; if (NewBBDominatesDestBB) { DominanceFrontier::iterator I = DF->find(DestBB); if (I != DF->end()) { DF->addBasicBlock(NewBB, I->second); if (I->second.count(DestBB)) { // However NewBB's frontier does not include DestBB. DominanceFrontier::iterator NF = DF->find(NewBB); DF->removeFromFrontier(NF, DestBB); } } else DF->addBasicBlock(NewBB, DominanceFrontier::DomSetType()); } else { DominanceFrontier::DomSetType NewDFSet; NewDFSet.insert(DestBB); DF->addBasicBlock(NewBB, NewDFSet); } } // Update LoopInfo if it is around. if (LI) { if (Loop *TIL = LI->getLoopFor(TIBB)) { // If one or the other blocks were not in a loop, the new block is not // either, and thus LI doesn't need to be updated. if (Loop *DestLoop = LI->getLoopFor(DestBB)) { if (TIL == DestLoop) { // Both in the same loop, the NewBB joins loop. DestLoop->addBasicBlockToLoop(NewBB, LI->getBase()); } else if (TIL->contains(DestLoop)) { // Edge from an outer loop to an inner loop. Add to the outer loop. TIL->addBasicBlockToLoop(NewBB, LI->getBase()); } else if (DestLoop->contains(TIL)) { // Edge from an inner loop to an outer loop. Add to the outer loop. DestLoop->addBasicBlockToLoop(NewBB, LI->getBase()); } else { // Edge from two loops with no containment relation. Because these // are natural loops, we know that the destination block must be the // header of its loop (adding a branch into a loop elsewhere would // create an irreducible loop). assert(DestLoop->getHeader() == DestBB && "Should not create irreducible loops!"); if (Loop *P = DestLoop->getParentLoop()) P->addBasicBlockToLoop(NewBB, LI->getBase()); } } // If TIBB is in a loop and DestBB is outside of that loop, split the // other exit blocks of the loop that also have predecessors outside // the loop, to maintain a LoopSimplify guarantee. if (!TIL->contains(DestBB) && P->mustPreserveAnalysisID(LoopSimplifyID)) { assert(!TIL->contains(NewBB) && "Split point for loop exit is contained in loop!"); // Update LCSSA form in the newly created exit block. if (P->mustPreserveAnalysisID(LCSSAID)) { SmallVector<BasicBlock *, 1> OrigPred; OrigPred.push_back(TIBB); CreatePHIsForSplitLoopExit(OrigPred, NewBB, DestBB); } // For each unique exit block... SmallVector<BasicBlock *, 4> ExitBlocks; TIL->getExitBlocks(ExitBlocks); for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { // Collect all the preds that are inside the loop, and note // whether there are any preds outside the loop. SmallVector<BasicBlock *, 4> Preds; bool HasPredOutsideOfLoop = false; BasicBlock *Exit = ExitBlocks[i]; for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit); I != E; ++I) if (TIL->contains(*I)) Preds.push_back(*I); else HasPredOutsideOfLoop = true; // If there are any preds not in the loop, we'll need to split // the edges. The Preds.empty() check is needed because a block // may appear multiple times in the list. We can't use // getUniqueExitBlocks above because that depends on LoopSimplify // form, which we're in the process of restoring! if (!Preds.empty() && HasPredOutsideOfLoop) { BasicBlock *NewExitBB = SplitBlockPredecessors(Exit, Preds.data(), Preds.size(), "split", P); if (P->mustPreserveAnalysisID(LCSSAID)) CreatePHIsForSplitLoopExit(Preds, NewExitBB, Exit); } } } // LCSSA form was updated above for the case where LoopSimplify is // available, which means that all predecessors of loop exit blocks // are within the loop. Without LoopSimplify form, it would be // necessary to insert a new phi. assert((!P->mustPreserveAnalysisID(LCSSAID) || P->mustPreserveAnalysisID(LoopSimplifyID)) && "SplitCriticalEdge doesn't know how to update LCCSA form " "without LoopSimplify!"); } } // Update ProfileInfo if it is around. if (PI) PI->splitEdge(TIBB, DestBB, NewBB, MergeIdenticalEdges); return NewBB; }
/// SurveyUse - This looks at a single use of an argument or return value /// and determines if it should be alive or not. Adds this use to MaybeLiveUses /// if it causes the used value to become MaybeAlive. /// /// RetValNum is the return value number to use when this use is used in a /// return instruction. This is used in the recursion, you should always leave /// it at 0. DAE::Liveness DAE::SurveyUse(Value::use_iterator U, UseVector &MaybeLiveUses, unsigned RetValNum) { Value *V = *U; if (ReturnInst *RI = dyn_cast<ReturnInst>(V)) { // The value is returned from a function. It's only live when the // function's return value is live. We use RetValNum here, for the case // that U is really a use of an insertvalue instruction that uses the // orginal Use. RetOrArg Use = CreateRet(RI->getParent()->getParent(), RetValNum); // We might be live, depending on the liveness of Use. return MarkIfNotLive(Use, MaybeLiveUses); } if (InsertValueInst *IV = dyn_cast<InsertValueInst>(V)) { if (U.getOperandNo() != InsertValueInst::getAggregateOperandIndex() && IV->hasIndices()) // The use we are examining is inserted into an aggregate. Our liveness // depends on all uses of that aggregate, but if it is used as a return // value, only index at which we were inserted counts. RetValNum = *IV->idx_begin(); // Note that if we are used as the aggregate operand to the insertvalue, // we don't change RetValNum, but do survey all our uses. Liveness Result = MaybeLive; for (Value::use_iterator I = IV->use_begin(), E = V->use_end(); I != E; ++I) { Result = SurveyUse(I, MaybeLiveUses, RetValNum); if (Result == Live) break; } return Result; } CallSite CS = CallSite::get(V); if (CS.getInstruction()) { Function *F = CS.getCalledFunction(); if (F) { // Used in a direct call. // Find the argument number. We know for sure that this use is an // argument, since if it was the function argument this would be an // indirect call and the we know can't be looking at a value of the // label type (for the invoke instruction). unsigned ArgNo = CS.getArgumentNo(U.getOperandNo()); if (ArgNo >= F->getFunctionType()->getNumParams()) // The value is passed in through a vararg! Must be live. return Live; assert(CS.getArgument(ArgNo) == CS.getInstruction()->getOperand(U.getOperandNo()) && "Argument is not where we expected it"); // Value passed to a normal call. It's only live when the corresponding // argument to the called function turns out live. RetOrArg Use = CreateArg(F, ArgNo); return MarkIfNotLive(Use, MaybeLiveUses); } } // Used in any other way? Value must be live. return Live; }
/// isOnlyCopiedFromConstantGlobal - Recursively walk the uses of a (derived) /// pointer to an alloca. Ignore any reads of the pointer, return false if we /// see any stores or other unknown uses. If we see pointer arithmetic, keep /// track of whether it moves the pointer (with IsOffset) but otherwise traverse /// the uses. If we see a memcpy/memmove that targets an unoffseted pointer to /// the alloca, and if the source pointer is a pointer to a constant global, we /// can optimize this. static bool isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, SmallVectorImpl<Instruction *> &ToDelete, bool IsOffset = false) { // We track lifetime intrinsics as we encounter them. If we decide to go // ahead and replace the value with the global, this lets the caller quickly // eliminate the markers. for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) { User *U = cast<Instruction>(*UI); if (LoadInst *LI = dyn_cast<LoadInst>(U)) { // Ignore non-volatile loads, they are always ok. if (!LI->isSimple()) return false; continue; } if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) { // If uses of the bitcast are ok, we are ok. if (!isOnlyCopiedFromConstantGlobal(BCI, TheCopy, ToDelete, IsOffset)) return false; continue; } if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) { // If the GEP has all zero indices, it doesn't offset the pointer. If it // doesn't, it does. if (!isOnlyCopiedFromConstantGlobal(GEP, TheCopy, ToDelete, IsOffset || !GEP->hasAllZeroIndices())) return false; continue; } if (CallSite CS = U) { // If this is the function being called then we treat it like a load and // ignore it. if (CS.isCallee(UI)) continue; // If this is a readonly/readnone call site, then we know it is just a // load (but one that potentially returns the value itself), so we can // ignore it if we know that the value isn't captured. unsigned ArgNo = CS.getArgumentNo(UI); if (CS.onlyReadsMemory() && (CS.getInstruction()->use_empty() || CS.doesNotCapture(ArgNo))) continue; // If this is being passed as a byval argument, the caller is making a // copy, so it is only a read of the alloca. if (CS.isByValArgument(ArgNo)) continue; } // Lifetime intrinsics can be handled by the caller. if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) { if (II->getIntrinsicID() == Intrinsic::lifetime_start || II->getIntrinsicID() == Intrinsic::lifetime_end) { assert(II->use_empty() && "Lifetime markers have no result to use!"); ToDelete.push_back(II); continue; } } // If this is isn't our memcpy/memmove, reject it as something we can't // handle. MemTransferInst *MI = dyn_cast<MemTransferInst>(U); if (MI == 0) return false; // If the transfer is using the alloca as a source of the transfer, then // ignore it since it is a load (unless the transfer is volatile). if (UI.getOperandNo() == 1) { if (MI->isVolatile()) return false; continue; } // If we already have seen a copy, reject the second one. if (TheCopy) return false; // If the pointer has been offset from the start of the alloca, we can't // safely handle this. if (IsOffset) return false; // If the memintrinsic isn't using the alloca as the dest, reject it. if (UI.getOperandNo() != 0) return false; // If the source of the memcpy/move is not a constant global, reject it. if (!pointsToConstantGlobal(MI->getSource())) return false; // Otherwise, the transform is safe. Remember the copy instruction. TheCopy = MI; } return true; }