// This function calculates the number of iterations after which the given Phi // becomes an invariant. The pre-calculated values are memorized in the map. The // function (shortcut is I) is calculated according to the following definition: // Given %x = phi <Inputs from above the loop>, ..., [%y, %back.edge]. // If %y is a loop invariant, then I(%x) = 1. // If %y is a Phi from the loop header, I(%x) = I(%y) + 1. // Otherwise, I(%x) is infinite. // TODO: Actually if %y is an expression that depends only on Phi %z and some // loop invariants, we can estimate I(%x) = I(%z) + 1. The example // looks like: // %x = phi(0, %a), <-- becomes invariant starting from 3rd iteration. // %y = phi(0, 5), // %a = %y + 1. static unsigned calculateIterationsToInvariance( PHINode *Phi, Loop *L, BasicBlock *BackEdge, SmallDenseMap<PHINode *, unsigned> &IterationsToInvariance) { assert(Phi->getParent() == L->getHeader() && "Non-loop Phi should not be checked for turning into invariant."); assert(BackEdge == L->getLoopLatch() && "Wrong latch?"); // If we already know the answer, take it from the map. auto I = IterationsToInvariance.find(Phi); if (I != IterationsToInvariance.end()) return I->second; // Otherwise we need to analyze the input from the back edge. Value *Input = Phi->getIncomingValueForBlock(BackEdge); // Place infinity to map to avoid infinite recursion for cycled Phis. Such // cycles can never stop on an invariant. IterationsToInvariance[Phi] = InfiniteIterationsToInvariance; unsigned ToInvariance = InfiniteIterationsToInvariance; if (L->isLoopInvariant(Input)) ToInvariance = 1u; else if (PHINode *IncPhi = dyn_cast<PHINode>(Input)) { // Only consider Phis in header block. if (IncPhi->getParent() != L->getHeader()) return InfiniteIterationsToInvariance; // If the input becomes an invariant after X iterations, then our Phi // becomes an invariant after X + 1 iterations. unsigned InputToInvariance = calculateIterationsToInvariance( IncPhi, L, BackEdge, IterationsToInvariance); if (InputToInvariance != InfiniteIterationsToInvariance) ToInvariance = InputToInvariance + 1u; } // If we found that this Phi lies in an invariant chain, update the map. if (ToInvariance != InfiniteIterationsToInvariance) IterationsToInvariance[Phi] = ToInvariance; return ToInvariance; }
/// Remove dead functions that are not included in DNR (Do Not Remove) list. bool Inliner::removeDeadFunctions(CallGraph &CG, bool AlwaysInlineOnly) { SmallVector<CallGraphNode*, 16> FunctionsToRemove; SmallVector<CallGraphNode *, 16> DeadFunctionsInComdats; SmallDenseMap<const Comdat *, int, 16> ComdatEntriesAlive; auto RemoveCGN = [&](CallGraphNode *CGN) { // Remove any call graph edges from the function to its callees. CGN->removeAllCalledFunctions(); // Remove any edges from the external node to the function's call graph // node. These edges might have been made irrelegant due to // optimization of the program. CG.getExternalCallingNode()->removeAnyCallEdgeTo(CGN); // Removing the node for callee from the call graph and delete it. FunctionsToRemove.push_back(CGN); }; // Scan for all of the functions, looking for ones that should now be removed // from the program. Insert the dead ones in the FunctionsToRemove set. for (CallGraph::iterator I = CG.begin(), E = CG.end(); I != E; ++I) { CallGraphNode *CGN = I->second; Function *F = CGN->getFunction(); if (!F || F->isDeclaration()) continue; // Handle the case when this function is called and we only want to care // about always-inline functions. This is a bit of a hack to share code // between here and the InlineAlways pass. if (AlwaysInlineOnly && !F->hasFnAttribute(Attribute::AlwaysInline)) continue; // If the only remaining users of the function are dead constants, remove // them. F->removeDeadConstantUsers(); if (!F->isDefTriviallyDead()) continue; // It is unsafe to drop a function with discardable linkage from a COMDAT // without also dropping the other members of the COMDAT. // The inliner doesn't visit non-function entities which are in COMDAT // groups so it is unsafe to do so *unless* the linkage is local. if (!F->hasLocalLinkage()) { if (const Comdat *C = F->getComdat()) { --ComdatEntriesAlive[C]; DeadFunctionsInComdats.push_back(CGN); continue; } } RemoveCGN(CGN); } if (!DeadFunctionsInComdats.empty()) { // Count up all the entities in COMDAT groups auto ComdatGroupReferenced = [&](const Comdat *C) { auto I = ComdatEntriesAlive.find(C); if (I != ComdatEntriesAlive.end()) ++(I->getSecond()); }; for (const Function &F : CG.getModule()) if (const Comdat *C = F.getComdat()) ComdatGroupReferenced(C); for (const GlobalVariable &GV : CG.getModule().globals()) if (const Comdat *C = GV.getComdat()) ComdatGroupReferenced(C); for (const GlobalAlias &GA : CG.getModule().aliases()) if (const Comdat *C = GA.getComdat()) ComdatGroupReferenced(C); for (CallGraphNode *CGN : DeadFunctionsInComdats) { Function *F = CGN->getFunction(); const Comdat *C = F->getComdat(); int NumAlive = ComdatEntriesAlive[C]; // We can remove functions in a COMDAT group if the entire group is dead. assert(NumAlive >= 0); if (NumAlive > 0) continue; RemoveCGN(CGN); } } if (FunctionsToRemove.empty()) return false; // Now that we know which functions to delete, do so. We didn't want to do // this inline, because that would invalidate our CallGraph::iterator // objects. :( // // Note that it doesn't matter that we are iterating over a non-stable order // here to do this, it doesn't matter which order the functions are deleted // in. array_pod_sort(FunctionsToRemove.begin(), FunctionsToRemove.end()); FunctionsToRemove.erase(std::unique(FunctionsToRemove.begin(), FunctionsToRemove.end()), FunctionsToRemove.end()); for (SmallVectorImpl<CallGraphNode *>::iterator I = FunctionsToRemove.begin(), E = FunctionsToRemove.end(); I != E; ++I) { delete CG.removeFunctionFromModule(*I); ++NumDeleted; } return true; }
// Sinks \p I from the loop \p L's preheader to its uses. Returns true if // sinking is successful. // \p LoopBlockNumber is used to sort the insertion blocks to ensure // determinism. static bool sinkInstruction(Loop &L, Instruction &I, const SmallVectorImpl<BasicBlock *> &ColdLoopBBs, const SmallDenseMap<BasicBlock *, int, 16> &LoopBlockNumber, LoopInfo &LI, DominatorTree &DT, BlockFrequencyInfo &BFI) { // Compute the set of blocks in loop L which contain a use of I. SmallPtrSet<BasicBlock *, 2> BBs; for (auto &U : I.uses()) { Instruction *UI = cast<Instruction>(U.getUser()); // We cannot sink I to PHI-uses. if (dyn_cast<PHINode>(UI)) return false; // We cannot sink I if it has uses outside of the loop. if (!L.contains(LI.getLoopFor(UI->getParent()))) return false; BBs.insert(UI->getParent()); } // findBBsToSinkInto is O(BBs.size() * ColdLoopBBs.size()). We cap the max // BBs.size() to avoid expensive computation. // FIXME: Handle code size growth for min_size and opt_size. if (BBs.size() > MaxNumberOfUseBBsForSinking) return false; // Find the set of BBs that we should insert a copy of I. SmallPtrSet<BasicBlock *, 2> BBsToSinkInto = findBBsToSinkInto(L, BBs, ColdLoopBBs, DT, BFI); if (BBsToSinkInto.empty()) return false; // Copy the final BBs into a vector and sort them using the total ordering // of the loop block numbers as iterating the set doesn't give a useful // order. No need to stable sort as the block numbers are a total ordering. SmallVector<BasicBlock *, 2> SortedBBsToSinkInto; SortedBBsToSinkInto.insert(SortedBBsToSinkInto.begin(), BBsToSinkInto.begin(), BBsToSinkInto.end()); std::sort(SortedBBsToSinkInto.begin(), SortedBBsToSinkInto.end(), [&](BasicBlock *A, BasicBlock *B) { return *LoopBlockNumber.find(A) < *LoopBlockNumber.find(B); }); BasicBlock *MoveBB = *SortedBBsToSinkInto.begin(); // FIXME: Optimize the efficiency for cloned value replacement. The current // implementation is O(SortedBBsToSinkInto.size() * I.num_uses()). for (BasicBlock *N : SortedBBsToSinkInto) { if (N == MoveBB) continue; // Clone I and replace its uses. Instruction *IC = I.clone(); IC->setName(I.getName()); IC->insertBefore(&*N->getFirstInsertionPt()); // Replaces uses of I with IC in N for (Value::use_iterator UI = I.use_begin(), UE = I.use_end(); UI != UE;) { Use &U = *UI++; auto *I = cast<Instruction>(U.getUser()); if (I->getParent() == N) U.set(IC); } // Replaces uses of I with IC in blocks dominated by N replaceDominatedUsesWith(&I, IC, DT, N); DEBUG(dbgs() << "Sinking a clone of " << I << " To: " << N->getName() << '\n'); NumLoopSunkCloned++; } DEBUG(dbgs() << "Sinking " << I << " To: " << MoveBB->getName() << '\n'); NumLoopSunk++; I.moveBefore(&*MoveBB->getFirstInsertionPt()); return true; }