PreservedAnalyses PassManager<LazyCallGraph::SCC, CGSCCAnalysisManager, LazyCallGraph &, CGSCCUpdateResult &>::run(LazyCallGraph::SCC &InitialC, CGSCCAnalysisManager &AM, LazyCallGraph &G, CGSCCUpdateResult &UR) { PreservedAnalyses PA = PreservedAnalyses::all(); if (DebugLogging) dbgs() << "Starting CGSCC pass manager run.\n"; // The SCC may be refined while we are running passes over it, so set up // a pointer that we can update. LazyCallGraph::SCC *C = &InitialC; for (auto &Pass : Passes) { if (DebugLogging) dbgs() << "Running pass: "******" on " << *C << "\n"; PreservedAnalyses PassPA = Pass->run(*C, AM, G, UR); // Update the SCC if necessary. C = UR.UpdatedC ? UR.UpdatedC : C; // Check that we didn't miss any update scenario. assert(!UR.InvalidatedSCCs.count(C) && "Processing an invalid SCC!"); assert(C->begin() != C->end() && "Cannot have an empty SCC!"); // Update the analysis manager as each pass runs and potentially // invalidates analyses. AM.invalidate(*C, PassPA); // Finally, we intersect the final preserved analyses to compute the // aggregate preserved set for this pass manager. PA.intersect(std::move(PassPA)); // FIXME: Historically, the pass managers all called the LLVM context's // yield function here. We don't have a generic way to acquire the // context and it isn't yet clear what the right pattern is for yielding // in the new pass manager so it is currently omitted. // ...getContext().yield(); } // Invaliadtion was handled after each pass in the above loop for the current // SCC. Therefore, the remaining analysis results in the AnalysisManager are // preserved. We mark this with a set so that we don't need to inspect each // one individually. PA.preserve<AllAnalysesOn<LazyCallGraph::SCC>>(); if (DebugLogging) dbgs() << "Finished CGSCC pass manager run.\n"; return PA; }
PreservedAnalyses ArgumentPromotionPass::run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM, LazyCallGraph &CG, CGSCCUpdateResult &UR) { bool Changed = false, LocalChange; // Iterate until we stop promoting from this SCC. do { LocalChange = false; for (LazyCallGraph::Node &N : C) { Function &OldF = N.getFunction(); FunctionAnalysisManager &FAM = AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager(); // FIXME: This lambda must only be used with this function. We should // skip the lambda and just get the AA results directly. auto AARGetter = [&](Function &F) -> AAResults & { assert(&F == &OldF && "Called with an unexpected function!"); return FAM.getResult<AAManager>(F); }; Function *NewF = promoteArguments(&OldF, AARGetter, MaxElements, None); if (!NewF) continue; LocalChange = true; // Directly substitute the functions in the call graph. Note that this // requires the old function to be completely dead and completely // replaced by the new function. It does no call graph updates, it merely // swaps out the particular function mapped to a particular node in the // graph. C.getOuterRefSCC().replaceNodeFunction(N, *NewF); OldF.eraseFromParent(); } Changed |= LocalChange; } while (LocalChange); if (!Changed) return PreservedAnalyses::all(); return PreservedAnalyses::none(); }
FunctionAnalysisManagerCGSCCProxy::Result FunctionAnalysisManagerCGSCCProxy::run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM, LazyCallGraph &CG) { // Collect the FunctionAnalysisManager from the Module layer and use that to // build the proxy result. // // This allows us to rely on the FunctionAnalysisMangaerModuleProxy to // invalidate the function analyses. auto &MAM = AM.getResult<ModuleAnalysisManagerCGSCCProxy>(C, CG).getManager(); Module &M = *C.begin()->getFunction().getParent(); auto *FAMProxy = MAM.getCachedResult<FunctionAnalysisManagerModuleProxy>(M); assert(FAMProxy && "The CGSCC pass manager requires that the FAM module " "proxy is run on the module prior to entering the CGSCC " "walk."); // Note that we special-case invalidation handling of this proxy in the CGSCC // analysis manager's Module proxy. This avoids the need to do anything // special here to recompute all of this if ever the FAM's module proxy goes // away. return Result(FAMProxy->getManager()); }
PreservedAnalyses PassManager<LazyCallGraph::SCC, CGSCCAnalysisManager, LazyCallGraph &, CGSCCUpdateResult &>::run(LazyCallGraph::SCC &InitialC, CGSCCAnalysisManager &AM, LazyCallGraph &G, CGSCCUpdateResult &UR) { // Request PassInstrumentation from analysis manager, will use it to run // instrumenting callbacks for the passes later. PassInstrumentation PI = AM.getResult<PassInstrumentationAnalysis>(InitialC, G); PreservedAnalyses PA = PreservedAnalyses::all(); if (DebugLogging) dbgs() << "Starting CGSCC pass manager run.\n"; // The SCC may be refined while we are running passes over it, so set up // a pointer that we can update. LazyCallGraph::SCC *C = &InitialC; for (auto &Pass : Passes) { if (DebugLogging) dbgs() << "Running pass: "******" on " << *C << "\n"; // Check the PassInstrumentation's BeforePass callbacks before running the // pass, skip its execution completely if asked to (callback returns false). if (!PI.runBeforePass(*Pass, *C)) continue; PreservedAnalyses PassPA = Pass->run(*C, AM, G, UR); if (UR.InvalidatedSCCs.count(C)) PI.runAfterPassInvalidated<LazyCallGraph::SCC>(*Pass); else PI.runAfterPass<LazyCallGraph::SCC>(*Pass, *C); // Update the SCC if necessary. C = UR.UpdatedC ? UR.UpdatedC : C; // If the CGSCC pass wasn't able to provide a valid updated SCC, the // current SCC may simply need to be skipped if invalid. if (UR.InvalidatedSCCs.count(C)) { LLVM_DEBUG(dbgs() << "Skipping invalidated root or island SCC!\n"); break; } // Check that we didn't miss any update scenario. assert(C->begin() != C->end() && "Cannot have an empty SCC!"); // Update the analysis manager as each pass runs and potentially // invalidates analyses. AM.invalidate(*C, PassPA); // Finally, we intersect the final preserved analyses to compute the // aggregate preserved set for this pass manager. PA.intersect(std::move(PassPA)); // FIXME: Historically, the pass managers all called the LLVM context's // yield function here. We don't have a generic way to acquire the // context and it isn't yet clear what the right pattern is for yielding // in the new pass manager so it is currently omitted. // ...getContext().yield(); } // Invalidation was handled after each pass in the above loop for the current // SCC. Therefore, the remaining analysis results in the AnalysisManager are // preserved. We mark this with a set so that we don't need to inspect each // one individually. PA.preserveSet<AllAnalysesOn<LazyCallGraph::SCC>>(); if (DebugLogging) dbgs() << "Finished CGSCC pass manager run.\n"; return PA; }
PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, CGSCCAnalysisManager &AM, LazyCallGraph &CG, CGSCCUpdateResult &UR) { const ModuleAnalysisManager &MAM = AM.getResult<ModuleAnalysisManagerCGSCCProxy>(InitialC, CG).getManager(); bool Changed = false; assert(InitialC.size() > 0 && "Cannot handle an empty SCC!"); Module &M = *InitialC.begin()->getFunction().getParent(); ProfileSummaryInfo *PSI = MAM.getCachedResult<ProfileSummaryAnalysis>(M); if (!ImportedFunctionsStats && InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No) { ImportedFunctionsStats = llvm::make_unique<ImportedFunctionsInliningStatistics>(); ImportedFunctionsStats->setModuleInfo(M); } // We use a single common worklist for calls across the entire SCC. We // process these in-order and append new calls introduced during inlining to // the end. // // Note that this particular order of processing is actually critical to // avoid very bad behaviors. Consider *highly connected* call graphs where // each function contains a small amonut of code and a couple of calls to // other functions. Because the LLVM inliner is fundamentally a bottom-up // inliner, it can handle gracefully the fact that these all appear to be // reasonable inlining candidates as it will flatten things until they become // too big to inline, and then move on and flatten another batch. // // However, when processing call edges *within* an SCC we cannot rely on this // bottom-up behavior. As a consequence, with heavily connected *SCCs* of // functions we can end up incrementally inlining N calls into each of // N functions because each incremental inlining decision looks good and we // don't have a topological ordering to prevent explosions. // // To compensate for this, we don't process transitive edges made immediate // by inlining until we've done one pass of inlining across the entire SCC. // Large, highly connected SCCs still lead to some amount of code bloat in // this model, but it is uniformly spread across all the functions in the SCC // and eventually they all become too large to inline, rather than // incrementally maknig a single function grow in a super linear fashion. SmallVector<std::pair<CallSite, int>, 16> Calls; FunctionAnalysisManager &FAM = AM.getResult<FunctionAnalysisManagerCGSCCProxy>(InitialC, CG) .getManager(); // Populate the initial list of calls in this SCC. for (auto &N : InitialC) { auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(N.getFunction()); // We want to generally process call sites top-down in order for // simplifications stemming from replacing the call with the returned value // after inlining to be visible to subsequent inlining decisions. // FIXME: Using instructions sequence is a really bad way to do this. // Instead we should do an actual RPO walk of the function body. for (Instruction &I : instructions(N.getFunction())) if (auto CS = CallSite(&I)) if (Function *Callee = CS.getCalledFunction()) { if (!Callee->isDeclaration()) Calls.push_back({CS, -1}); else if (!isa<IntrinsicInst>(I)) { using namespace ore; ORE.emit([&]() { return OptimizationRemarkMissed(DEBUG_TYPE, "NoDefinition", &I) << NV("Callee", Callee) << " will not be inlined into " << NV("Caller", CS.getCaller()) << " because its definition is unavailable" << setIsVerbose(); }); } } } if (Calls.empty()) return PreservedAnalyses::all(); // Capture updatable variables for the current SCC and RefSCC. auto *C = &InitialC; auto *RC = &C->getOuterRefSCC(); // When inlining a callee produces new call sites, we want to keep track of // the fact that they were inlined from the callee. This allows us to avoid // infinite inlining in some obscure cases. To represent this, we use an // index into the InlineHistory vector. SmallVector<std::pair<Function *, int>, 16> InlineHistory; // Track a set vector of inlined callees so that we can augment the caller // with all of their edges in the call graph before pruning out the ones that // got simplified away. SmallSetVector<Function *, 4> InlinedCallees; // Track the dead functions to delete once finished with inlining calls. We // defer deleting these to make it easier to handle the call graph updates. SmallVector<Function *, 4> DeadFunctions; // Loop forward over all of the calls. Note that we cannot cache the size as // inlining can introduce new calls that need to be processed. for (int i = 0; i < (int)Calls.size(); ++i) { // We expect the calls to typically be batched with sequences of calls that // have the same caller, so we first set up some shared infrastructure for // this caller. We also do any pruning we can at this layer on the caller // alone. Function &F = *Calls[i].first.getCaller(); LazyCallGraph::Node &N = *CG.lookup(F); if (CG.lookupSCC(N) != C) continue; if (F.hasFnAttribute(Attribute::OptimizeNone)) continue; LLVM_DEBUG(dbgs() << "Inlining calls in: " << F.getName() << "\n"); // Get a FunctionAnalysisManager via a proxy for this particular node. We // do this each time we visit a node as the SCC may have changed and as // we're going to mutate this particular function we want to make sure the // proxy is in place to forward any invalidation events. We can use the // manager we get here for looking up results for functions other than this // node however because those functions aren't going to be mutated by this // pass. FunctionAnalysisManager &FAM = AM.getResult<FunctionAnalysisManagerCGSCCProxy>(*C, CG) .getManager(); // Get the remarks emission analysis for the caller. auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); std::function<AssumptionCache &(Function &)> GetAssumptionCache = [&](Function &F) -> AssumptionCache & { return FAM.getResult<AssumptionAnalysis>(F); }; auto GetBFI = [&](Function &F) -> BlockFrequencyInfo & { return FAM.getResult<BlockFrequencyAnalysis>(F); }; auto GetInlineCost = [&](CallSite CS) { Function &Callee = *CS.getCalledFunction(); auto &CalleeTTI = FAM.getResult<TargetIRAnalysis>(Callee); return getInlineCost(CS, Params, CalleeTTI, GetAssumptionCache, {GetBFI}, PSI, &ORE); }; // Now process as many calls as we have within this caller in the sequnece. // We bail out as soon as the caller has to change so we can update the // call graph and prepare the context of that new caller. bool DidInline = false; for (; i < (int)Calls.size() && Calls[i].first.getCaller() == &F; ++i) { int InlineHistoryID; CallSite CS; std::tie(CS, InlineHistoryID) = Calls[i]; Function &Callee = *CS.getCalledFunction(); if (InlineHistoryID != -1 && InlineHistoryIncludes(&Callee, InlineHistoryID, InlineHistory)) continue; // Check if this inlining may repeat breaking an SCC apart that has // already been split once before. In that case, inlining here may // trigger infinite inlining, much like is prevented within the inliner // itself by the InlineHistory above, but spread across CGSCC iterations // and thus hidden from the full inline history. if (CG.lookupSCC(*CG.lookup(Callee)) == C && UR.InlinedInternalEdges.count({&N, C})) { LLVM_DEBUG(dbgs() << "Skipping inlining internal SCC edge from a node " "previously split out of this SCC by inlining: " << F.getName() << " -> " << Callee.getName() << "\n"); continue; } Optional<InlineCost> OIC = shouldInline(CS, GetInlineCost, ORE); // Check whether we want to inline this callsite. if (!OIC) continue; // Setup the data structure used to plumb customization into the // `InlineFunction` routine. InlineFunctionInfo IFI( /*cg=*/nullptr, &GetAssumptionCache, PSI, &FAM.getResult<BlockFrequencyAnalysis>(*(CS.getCaller())), &FAM.getResult<BlockFrequencyAnalysis>(Callee)); // Get DebugLoc to report. CS will be invalid after Inliner. DebugLoc DLoc = CS->getDebugLoc(); BasicBlock *Block = CS.getParent(); using namespace ore; if (!InlineFunction(CS, IFI)) { ORE.emit([&]() { return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc, Block) << NV("Callee", &Callee) << " will not be inlined into " << NV("Caller", &F); }); continue; } DidInline = true; InlinedCallees.insert(&Callee); ORE.emit([&]() { bool AlwaysInline = OIC->isAlways(); StringRef RemarkName = AlwaysInline ? "AlwaysInline" : "Inlined"; OptimizationRemark R(DEBUG_TYPE, RemarkName, DLoc, Block); R << NV("Callee", &Callee) << " inlined into "; R << NV("Caller", &F); if (AlwaysInline) R << " with cost=always"; else { R << " with cost=" << NV("Cost", OIC->getCost()); R << " (threshold=" << NV("Threshold", OIC->getThreshold()); R << ")"; } return R; }); // Add any new callsites to defined functions to the worklist. if (!IFI.InlinedCallSites.empty()) { int NewHistoryID = InlineHistory.size(); InlineHistory.push_back({&Callee, InlineHistoryID}); for (CallSite &CS : reverse(IFI.InlinedCallSites)) if (Function *NewCallee = CS.getCalledFunction()) if (!NewCallee->isDeclaration()) Calls.push_back({CS, NewHistoryID}); } if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No) ImportedFunctionsStats->recordInline(F, Callee); // Merge the attributes based on the inlining. AttributeFuncs::mergeAttributesForInlining(F, Callee); // For local functions, check whether this makes the callee trivially // dead. In that case, we can drop the body of the function eagerly // which may reduce the number of callers of other functions to one, // changing inline cost thresholds. if (Callee.hasLocalLinkage()) { // To check this we also need to nuke any dead constant uses (perhaps // made dead by this operation on other functions). Callee.removeDeadConstantUsers(); if (Callee.use_empty() && !CG.isLibFunction(Callee)) { Calls.erase( std::remove_if(Calls.begin() + i + 1, Calls.end(), [&Callee](const std::pair<CallSite, int> &Call) { return Call.first.getCaller() == &Callee; }), Calls.end()); // Clear the body and queue the function itself for deletion when we // finish inlining and call graph updates. // Note that after this point, it is an error to do anything other // than use the callee's address or delete it. Callee.dropAllReferences(); assert(find(DeadFunctions, &Callee) == DeadFunctions.end() && "Cannot put cause a function to become dead twice!"); DeadFunctions.push_back(&Callee); } } } // Back the call index up by one to put us in a good position to go around // the outer loop. --i; if (!DidInline) continue; Changed = true; // Add all the inlined callees' edges as ref edges to the caller. These are // by definition trivial edges as we always have *some* transitive ref edge // chain. While in some cases these edges are direct calls inside the // callee, they have to be modeled in the inliner as reference edges as // there may be a reference edge anywhere along the chain from the current // caller to the callee that causes the whole thing to appear like // a (transitive) reference edge that will require promotion to a call edge // below. for (Function *InlinedCallee : InlinedCallees) { LazyCallGraph::Node &CalleeN = *CG.lookup(*InlinedCallee); for (LazyCallGraph::Edge &E : *CalleeN) RC->insertTrivialRefEdge(N, E.getNode()); } // At this point, since we have made changes we have at least removed // a call instruction. However, in the process we do some incremental // simplification of the surrounding code. This simplification can // essentially do all of the same things as a function pass and we can // re-use the exact same logic for updating the call graph to reflect the // change. LazyCallGraph::SCC *OldC = C; C = &updateCGAndAnalysisManagerForFunctionPass(CG, *C, N, AM, UR); LLVM_DEBUG(dbgs() << "Updated inlining SCC: " << *C << "\n"); RC = &C->getOuterRefSCC(); // If this causes an SCC to split apart into multiple smaller SCCs, there // is a subtle risk we need to prepare for. Other transformations may // expose an "infinite inlining" opportunity later, and because of the SCC // mutation, we will revisit this function and potentially re-inline. If we // do, and that re-inlining also has the potentially to mutate the SCC // structure, the infinite inlining problem can manifest through infinite // SCC splits and merges. To avoid this, we capture the originating caller // node and the SCC containing the call edge. This is a slight over // approximation of the possible inlining decisions that must be avoided, // but is relatively efficient to store. // FIXME: This seems like a very heavyweight way of retaining the inline // history, we should look for a more efficient way of tracking it. if (C != OldC && llvm::any_of(InlinedCallees, [&](Function *Callee) { return CG.lookupSCC(*CG.lookup(*Callee)) == OldC; })) { LLVM_DEBUG(dbgs() << "Inlined an internal call edge and split an SCC, " "retaining this to avoid infinite inlining.\n"); UR.InlinedInternalEdges.insert({&N, OldC}); } InlinedCallees.clear(); } // Now that we've finished inlining all of the calls across this SCC, delete // all of the trivially dead functions, updating the call graph and the CGSCC // pass manager in the process. // // Note that this walks a pointer set which has non-deterministic order but // that is OK as all we do is delete things and add pointers to unordered // sets. for (Function *DeadF : DeadFunctions) { // Get the necessary information out of the call graph and nuke the // function there. Also, cclear out any cached analyses. auto &DeadC = *CG.lookupSCC(*CG.lookup(*DeadF)); FunctionAnalysisManager &FAM = AM.getResult<FunctionAnalysisManagerCGSCCProxy>(DeadC, CG) .getManager(); FAM.clear(*DeadF, DeadF->getName()); AM.clear(DeadC, DeadC.getName()); auto &DeadRC = DeadC.getOuterRefSCC(); CG.removeDeadFunction(*DeadF); // Mark the relevant parts of the call graph as invalid so we don't visit // them. UR.InvalidatedSCCs.insert(&DeadC); UR.InvalidatedRefSCCs.insert(&DeadRC); // And delete the actual function from the module. M.getFunctionList().erase(DeadF); } if (!Changed) return PreservedAnalyses::all(); // Even if we change the IR, we update the core CGSCC data structures and so // can preserve the proxy to the function analysis manager. PreservedAnalyses PA; PA.preserve<FunctionAnalysisManagerCGSCCProxy>(); return PA; }
PreservedAnalyses PostOrderFunctionAttrsPass::run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM) { Module &M = *C.begin()->getFunction().getParent(); const ModuleAnalysisManager &MAM = AM.getResult<ModuleAnalysisManagerCGSCCProxy>(C).getManager(); FunctionAnalysisManager &FAM = AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C).getManager(); // FIXME: Need some way to make it more reasonable to assume that this is // always cached. TargetLibraryInfo &TLI = *MAM.getCachedResult<TargetLibraryAnalysis>(M); // We pass a lambda into functions to wire them up to the analysis manager // for getting function analyses. auto AARGetter = [&](Function &F) -> AAResults & { return FAM.getResult<AAManager>(F); }; // Fill SCCNodes with the elements of the SCC. Also track whether there are // any external or opt-none nodes that will prevent us from optimizing any // part of the SCC. SCCNodeSet SCCNodes; bool HasUnknownCall = false; for (LazyCallGraph::Node &N : C) { Function &F = N.getFunction(); if (F.hasFnAttribute(Attribute::OptimizeNone)) { // Treat any function we're trying not to optimize as if it were an // indirect call and omit it from the node set used below. HasUnknownCall = true; continue; } // Track whether any functions in this SCC have an unknown call edge. // Note: if this is ever a performance hit, we can common it with // subsequent routines which also do scans over the instructions of the // function. if (!HasUnknownCall) for (Instruction &I : instructions(F)) if (auto CS = CallSite(&I)) if (!CS.getCalledFunction()) { HasUnknownCall = true; break; } SCCNodes.insert(&F); } bool Changed = false; Changed |= addReadAttrs(SCCNodes, AARGetter); Changed |= addArgumentAttrs(SCCNodes); // If we have no external nodes participating in the SCC, we can deduce some // more precise attributes as well. if (!HasUnknownCall) { Changed |= addNoAliasAttrs(SCCNodes); Changed |= addNonNullAttrs(SCCNodes, TLI); Changed |= removeConvergentAttrs(SCCNodes); Changed |= addNoRecurseAttrs(SCCNodes); } return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all(); }