void FunctionSignatureSpecializationMangler::mangleConstantProp(LiteralInst *LI) { // Append the prefix for constant propagation 'p'. ArgOpBuffer << 'p'; // Then append the unique identifier of our literal. switch (LI->getKind()) { default: llvm_unreachable("unknown literal"); case SILInstructionKind::DynamicFunctionRefInst: { SILFunction *F = cast<DynamicFunctionRefInst>(LI)->getReferencedFunction(); ArgOpBuffer << 'f'; appendIdentifier(F->getName()); break; } case SILInstructionKind::FunctionRefInst: { SILFunction *F = cast<FunctionRefInst>(LI)->getReferencedFunction(); ArgOpBuffer << 'f'; appendIdentifier(F->getName()); break; } case SILInstructionKind::GlobalAddrInst: { SILGlobalVariable *G = cast<GlobalAddrInst>(LI)->getReferencedGlobal(); ArgOpBuffer << 'g'; appendIdentifier(G->getName()); break; } case SILInstructionKind::IntegerLiteralInst: { APInt apint = cast<IntegerLiteralInst>(LI)->getValue(); ArgOpBuffer << 'i' << apint; break; } case SILInstructionKind::FloatLiteralInst: { APInt apint = cast<FloatLiteralInst>(LI)->getBits(); ArgOpBuffer << 'd' << apint; break; } case SILInstructionKind::StringLiteralInst: { StringLiteralInst *SLI = cast<StringLiteralInst>(LI); StringRef V = SLI->getValue(); assert(V.size() <= 32 && "Cannot encode string of length > 32"); std::string VBuffer; if (!V.empty() && (isDigit(V[0]) || V[0] == '_')) { VBuffer = "_"; VBuffer.append(V.data(), V.size()); V = VBuffer; } appendIdentifier(V); ArgOpBuffer << 's'; switch (SLI->getEncoding()) { case StringLiteralInst::Encoding::Bytes: ArgOpBuffer << 'B'; break; case StringLiteralInst::Encoding::UTF8: ArgOpBuffer << 'b'; break; case StringLiteralInst::Encoding::UTF16: ArgOpBuffer << 'w'; break; case StringLiteralInst::Encoding::ObjCSelector: ArgOpBuffer << 'c'; break; } break; } } }
static bool processFunctionWithLoopSupport( SILFunction &F, AliasAnalysis *AA, PostOrderAnalysis *POTA, LoopRegionFunctionInfo *LRFI, SILLoopInfo *LI, RCIdentityFunctionInfo *RCFI, ProgramTerminationFunctionInfo *PTFI) { // GlobalARCOpts seems to be taking up a lot of compile time when running on // globalinit_func. Since that is not *that* interesting from an ARC // perspective (i.e. no ref count operations in a loop), disable it on such // functions temporarily in order to unblock others. This should be removed. if (F.getName().startswith("globalinit_")) return false; DEBUG(llvm::dbgs() << "***** Processing " << F.getName() << " *****\n"); LoopARCPairingContext Context(F, AA, LRFI, LI, RCFI, PTFI); return Context.process(); }
std::string getNodeLabel(const OrderedCallGraph::Node *Node, const OrderedCallGraph *Graph) { SILFunction *F = Node->CGNode->getFunction(); std::string Label = F->getName(); wrap(Label, Node->NumCallSites); return Label; }
// Update UnhandledOnceCallee and InitializerCount by going through all "once" // calls. void SILGlobalOpt::collectOnceCall(BuiltinInst *BI) { if (UnhandledOnceCallee) return; const BuiltinInfo &Builtin = Module->getBuiltinInfo(BI->getName()); if (Builtin.ID != BuiltinValueKind::Once) return; SILFunction *Callee = getCalleeOfOnceCall(BI); if (!Callee) { LLVM_DEBUG(llvm::dbgs() << "GlobalOpt: unhandled once callee\n"); UnhandledOnceCallee = true; return; } if (!Callee->getName().startswith("globalinit_")) return; // We currently disable optimizing the initializer if a globalinit_func // is called by "once" from multiple locations. if (!BI->getFunction()->isGlobalInit()) // If a globalinit_func is called by "once" from a function that is not // an addressor, we set count to 2 to disable optimizing the initializer. InitializerCount[Callee] = 2; else InitializerCount[Callee]++; }
std::string getNodeDescription(const OrderedCallGraph::Node *Node, const OrderedCallGraph *Graph) { SILFunction *F = Node->CGNode->getFunction(); std::string Label = demangle_wrappers:: demangleSymbolAsString(F->getName()); wrap(Label, Node->NumCallSites); return Label; }
static bool processFunctionWithoutLoopSupport(SILFunction &F, bool FreezePostDomReleases, AliasAnalysis *AA, PostOrderAnalysis *POTA, RCIdentityFunctionInfo *RCIA, EpilogueARCFunctionInfo *EAFI, ProgramTerminationFunctionInfo *PTFI) { // GlobalARCOpts seems to be taking up a lot of compile time when running on // globalinit_func. Since that is not *that* interesting from an ARC // perspective (i.e. no ref count operations in a loop), disable it on such // functions temporarily in order to unblock others. This should be removed. if (F.getName().startswith("globalinit_")) return false; LLVM_DEBUG(llvm::dbgs() << "***** Processing " << F.getName() << " *****\n"); bool Changed = false; BlockARCPairingContext Context(F, AA, POTA, RCIA, EAFI, PTFI); // Until we do not remove any instructions or have nested increments, // decrements... while (true) { // Compute matching sets of increments, decrements, and their insertion // points. // // We need to blot pointers we remove after processing an individual pointer // so we don't process pairs after we have paired them up. Thus we pass in a // lambda that performs the work for us. bool ShouldRunAgain = Context.run(FreezePostDomReleases); Changed |= Context.madeChange(); // If we did not remove any instructions or have any nested increments, do // not perform another iteration. if (!ShouldRunAgain) break; // Otherwise, perform another iteration. LLVM_DEBUG(llvm::dbgs() << "\n<<< Made a Change! " "Reprocessing Function! >>>\n"); } LLVM_DEBUG(llvm::dbgs() << "\n"); // Return true if we moved or deleted any instructions. return Changed; }
/// \brief Attempt to inline all calls smaller than our threshold. /// returns True if a function was inlined. bool SILPerformanceInliner::inlineCallsIntoFunction(SILFunction *Caller, DominanceAnalysis *DA, SILLoopAnalysis *LA) { // Don't optimize functions that are marked with the opt.never attribute. if (!Caller->shouldOptimize()) return false; DEBUG(llvm::dbgs() << "Visiting Function: " << Caller->getName() << "\n"); // First step: collect all the functions we want to inline. We // don't change anything yet so that the dominator information // remains valid. SmallVector<FullApplySite, 8> AppliesToInline; collectAppliesToInline(Caller, AppliesToInline, DA, LA); if (AppliesToInline.empty()) return false; // Second step: do the actual inlining. for (auto AI : AppliesToInline) { SILFunction *Callee = AI.getCalleeFunction(); assert(Callee && "apply_inst does not have a direct callee anymore"); DEBUG(llvm::dbgs() << " Inline:" << *AI.getInstruction()); if (!Callee->shouldOptimize()) { DEBUG(llvm::dbgs() << " Cannot inline function " << Callee->getName() << " marked to be excluded from optimizations.\n"); continue; } SmallVector<SILValue, 8> Args; for (const auto &Arg : AI.getArguments()) Args.push_back(Arg); // Notice that we will skip all of the newly inlined ApplyInsts. That's // okay because we will visit them in our next invocation of the inliner. TypeSubstitutionMap ContextSubs; SILInliner Inliner(*Caller, *Callee, SILInliner::InlineKind::PerformanceInline, ContextSubs, AI.getSubstitutions()); auto Success = Inliner.inlineFunction(AI, Args); (void) Success; // We've already determined we should be able to inline this, so // we expect it to have happened. assert(Success && "Expected inliner to inline this function!"); recursivelyDeleteTriviallyDeadInstructions(AI.getInstruction(), true); NumFunctionsInlined++; } DEBUG(llvm::dbgs() << "\n"); return true; }
void FunctionSignatureSpecializationMangler::mangleConstantProp(LiteralInst *LI) { Mangler &M = getMangler(); // Append the prefix for constant propagation 'cp'. M.append("cp"); // Then append the unique identifier of our literal. switch (LI->getKind()) { default: llvm_unreachable("unknown literal"); case ValueKind::FunctionRefInst: { SILFunction *F = cast<FunctionRefInst>(LI)->getReferencedFunction(); M.append("fr"); M.mangleIdentifierSymbol(F->getName()); break; } case ValueKind::GlobalAddrInst: { SILGlobalVariable *G = cast<GlobalAddrInst>(LI)->getReferencedGlobal(); M.append("g"); M.mangleIdentifierSymbol(G->getName()); break; } case ValueKind::IntegerLiteralInst: { APInt apint = cast<IntegerLiteralInst>(LI)->getValue(); M.append("i"); M.mangleNatural(apint); break; } case ValueKind::FloatLiteralInst: { APInt apint = cast<FloatLiteralInst>(LI)->getBits(); M.append("fl"); M.mangleNatural(apint); break; } case ValueKind::StringLiteralInst: { StringLiteralInst *SLI = cast<StringLiteralInst>(LI); StringRef V = SLI->getValue(); assert(V.size() <= 32 && "Cannot encode string of length > 32"); llvm::SmallString<33> Str; Str += "u"; Str += V; M.append("se"); M.mangleNatural(APInt(32, unsigned(SLI->getEncoding()))); M.append("v"); M.mangleIdentifier(Str); break; } } }
/// \brief Inlines all mandatory inlined functions into the body of a function, /// first recursively inlining all mandatory apply instructions in those /// functions into their bodies if necessary. /// /// \param F the function to be processed /// \param AI nullptr if this is being called from the top level; the relevant /// ApplyInst requiring the recursive call when non-null /// \param FullyInlinedSet the set of all functions already known to be fully /// processed, to avoid processing them over again /// \param SetFactory an instance of ImmutableFunctionSet::Factory /// \param CurrentInliningSet the set of functions currently being inlined in /// the current call stack of recursive calls /// /// \returns true if successful, false if failed due to circular inlining. static bool runOnFunctionRecursively(SILFunction *F, FullApplySite AI, DenseFunctionSet &FullyInlinedSet, ImmutableFunctionSet::Factory &SetFactory, ImmutableFunctionSet CurrentInliningSet, ClassHierarchyAnalysis *CHA) { // Avoid reprocessing functions needlessly. if (FullyInlinedSet.count(F)) return true; // Prevent attempt to circularly inline. if (CurrentInliningSet.contains(F)) { // This cannot happen on a top-level call, so AI should be non-null. assert(AI && "Cannot have circular inline without apply"); SILLocation L = AI.getLoc(); assert(L && "Must have location for transparent inline apply"); diagnose(F->getModule().getASTContext(), L.getStartSourceLoc(), diag::circular_transparent); return false; } // Add to the current inlining set (immutably, so we only affect the set // during this call and recursive subcalls). CurrentInliningSet = SetFactory.add(CurrentInliningSet, F); SmallVector<std::pair<SILValue, ParameterConvention>, 16> CaptureArgs; SmallVector<SILValue, 32> FullArgs; for (auto BI = F->begin(), BE = F->end(); BI != BE; ++BI) { for (auto II = BI->begin(), IE = BI->end(); II != IE; ++II) { FullApplySite InnerAI = FullApplySite::isa(&*II); if (!InnerAI) continue; auto *ApplyBlock = InnerAI.getParent(); // *NOTE* If devirtualization succeeds, sometimes II will not be InnerAI, // but a casted result of InnerAI or even a block argument due to // abstraction changes when calling the witness or class method. We still // know that InnerAI dominates II though. std::tie(InnerAI, II) = tryDevirtualizeApplyHelper(InnerAI, II, CHA); if (!InnerAI) continue; SILValue CalleeValue = InnerAI.getCallee(); bool IsThick; PartialApplyInst *PAI; SILFunction *CalleeFunction = getCalleeFunction( F, InnerAI, IsThick, CaptureArgs, FullArgs, PAI); if (!CalleeFunction) continue; // Then recursively process it first before trying to inline it. if (!runOnFunctionRecursively(CalleeFunction, InnerAI, FullyInlinedSet, SetFactory, CurrentInliningSet, CHA)) { // If we failed due to circular inlining, then emit some notes to // trace back the failure if we have more information. // FIXME: possibly it could be worth recovering and attempting other // inlines within this same recursive call rather than simply // propagating the failure. if (AI) { SILLocation L = AI.getLoc(); assert(L && "Must have location for transparent inline apply"); diagnose(F->getModule().getASTContext(), L.getStartSourceLoc(), diag::note_while_inlining); } return false; } // Get our list of substitutions. auto Subs = (PAI ? PAI->getSubstitutionMap() : InnerAI.getSubstitutionMap()); SILOpenedArchetypesTracker OpenedArchetypesTracker(F); F->getModule().registerDeleteNotificationHandler( &OpenedArchetypesTracker); // The callee only needs to know about opened archetypes used in // the substitution list. OpenedArchetypesTracker.registerUsedOpenedArchetypes( InnerAI.getInstruction()); if (PAI) { OpenedArchetypesTracker.registerUsedOpenedArchetypes(PAI); } SILInliner Inliner(*F, *CalleeFunction, SILInliner::InlineKind::MandatoryInline, Subs, OpenedArchetypesTracker); if (!Inliner.canInlineFunction(InnerAI)) { // See comment above about casting when devirtualizing and how this // sometimes causes II and InnerAI to be different and even in different // blocks. II = InnerAI.getInstruction()->getIterator(); continue; } // Inline function at I, which also changes I to refer to the first // instruction inlined in the case that it succeeds. We purposely // process the inlined body after inlining, because the inlining may // have exposed new inlining opportunities beyond those present in // the inlined function when processed independently. LLVM_DEBUG(llvm::errs() << "Inlining @" << CalleeFunction->getName() << " into @" << InnerAI.getFunction()->getName() << "\n"); // If we intend to inline a thick function, then we need to balance the // reference counts for correctness. if (IsThick) { bool IsCalleeGuaranteed = PAI && PAI->getType().castTo<SILFunctionType>()->isCalleeGuaranteed(); fixupReferenceCounts(II, CalleeValue, CaptureArgs, IsCalleeGuaranteed); } // Decrement our iterator (carefully, to avoid going off the front) so it // is valid after inlining is done. Inlining deletes the apply, and can // introduce multiple new basic blocks. II = prev_or_default(II, ApplyBlock->begin(), ApplyBlock->end()); Inliner.inlineFunction(InnerAI, FullArgs); // We were able to inline successfully. Remove the apply. InnerAI.getInstruction()->eraseFromParent(); // Reestablish our iterator if it wrapped. if (II == ApplyBlock->end()) II = ApplyBlock->begin(); // Update the iterator when instructions are removed. DeleteInstructionsHandler DeletionHandler(II); // Now that the IR is correct, see if we can remove dead callee // computations (e.g. dead partial_apply closures). cleanupCalleeValue(CalleeValue, FullArgs); // Reposition iterators possibly invalidated by mutation. BI = SILFunction::iterator(ApplyBlock); IE = ApplyBlock->end(); assert(BI == SILFunction::iterator(II->getParent()) && "Mismatch between the instruction and basic block"); ++NumMandatoryInlines; } } // Keep track of full inlined functions so we don't waste time recursively // reprocessing them. FullyInlinedSet.insert(F); return true; }
bool SILCombiner::doOneIteration(SILFunction &F, unsigned Iteration) { MadeChange = false; DEBUG(llvm::dbgs() << "\n\nSILCOMBINE ITERATION #" << Iteration << " on " << F.getName() << "\n"); // Add reachable instructions to our worklist. addReachableCodeToWorklist(&*F.begin()); // Process until we run out of items in our worklist. while (!Worklist.isEmpty()) { SILInstruction *I = Worklist.removeOne(); // When we erase an instruction, we use the map in the worklist to check if // the instruction is in the worklist. If it is, we replace it with null // instead of shifting all members of the worklist towards the front. This // check makes sure that if we run into any such residual null pointers, we // skip them. if (I == nullptr) continue; // Check to see if we can DCE the instruction. if (isInstructionTriviallyDead(I)) { DEBUG(llvm::dbgs() << "SC: DCE: " << *I << '\n'); eraseInstFromFunction(*I); ++NumDeadInst; MadeChange = true; continue; } // Check to see if we can instsimplify the instruction. if (SILValue Result = simplifyInstruction(I)) { ++NumSimplified; DEBUG(llvm::dbgs() << "SC: Simplify Old = " << *I << '\n' << " New = " << *Result << '\n'); // Everything uses the new instruction now. replaceInstUsesWith(*I, Result); // Push the new instruction and any users onto the worklist. Worklist.addUsersToWorklist(Result); eraseInstFromFunction(*I); MadeChange = true; continue; } // If we have reached this point, all attempts to do simple simplifications // have failed. Prepare to SILCombine. Builder.setInsertionPoint(I); #ifndef NDEBUG std::string OrigI; #endif DEBUG(llvm::raw_string_ostream SS(OrigI); I->print(SS); OrigI = SS.str();); DEBUG(llvm::dbgs() << "SC: Visiting: " << OrigI << '\n'); if (SILInstruction *Result = visit(I)) { ++NumCombined; // Should we replace the old instruction with a new one? if (Result != I) { assert(&*std::prev(SILBasicBlock::iterator(I)) == Result && "Expected new instruction inserted before existing instruction!"); DEBUG(llvm::dbgs() << "SC: Old = " << *I << '\n' << " New = " << *Result << '\n'); // Everything uses the new instruction now. replaceInstUsesWith(*I, Result); // Push the new instruction and any users onto the worklist. Worklist.add(Result); Worklist.addUsersToWorklist(Result); eraseInstFromFunction(*I); } else { DEBUG(llvm::dbgs() << "SC: Mod = " << OrigI << '\n' << " New = " << *I << '\n'); // If the instruction was modified, it's possible that it is now dead. // if so, remove it. if (isInstructionTriviallyDead(I)) { eraseInstFromFunction(*I); } else { Worklist.add(I); Worklist.addUsersToWorklist(I); } } MadeChange = true; } // Our tracking list has been accumulating instructions created by the // SILBuilder during this iteration. Go through the tracking list and add // its contents to the worklist and then clear said list in preparation for // the next iteration. auto &TrackingList = *Builder.getTrackingList(); for (SILInstruction *I : TrackingList) { DEBUG(llvm::dbgs() << "SC: add " << *I << " from tracking list to worklist\n"); Worklist.add(I); } TrackingList.clear(); }
/// Remove retain/release pairs around builtin "unsafeGuaranteed" instruction /// sequences. static bool removeGuaranteedRetainReleasePairs(SILFunction &F, RCIdentityFunctionInfo &RCIA, PostDominanceAnalysis *PDA) { DEBUG(llvm::dbgs() << "Running on function " << F.getName() << "\n"); bool Changed = false; // Lazily compute post-dominance info only when we really need it. PostDominanceInfo *PDI = nullptr; for (auto &BB : F) { auto It = BB.begin(), End = BB.end(); llvm::DenseMap<SILValue, SILInstruction *> LastRetain; while (It != End) { auto *CurInst = &*It; ++It; // Memorize the last retain. if (isa<StrongRetainInst>(CurInst) || isa<RetainValueInst>(CurInst)) { LastRetain[RCIA.getRCIdentityRoot(CurInst->getOperand(0))] = CurInst; continue; } // Look for a builtin "unsafeGuaranteed" instruction. auto *UnsafeGuaranteedI = dyn_cast<BuiltinInst>(CurInst); if (!UnsafeGuaranteedI || !UnsafeGuaranteedI->getBuiltinKind() || *UnsafeGuaranteedI->getBuiltinKind() != BuiltinValueKind::UnsafeGuaranteed) continue; auto Opd = UnsafeGuaranteedI->getOperand(0); auto RCIdOpd = RCIA.getRCIdentityRoot(UnsafeGuaranteedI->getOperand(0)); if (!LastRetain.count(RCIdOpd)) { DEBUG(llvm::dbgs() << "LastRetain failed\n"); continue; } // This code is very conservative. Check that there is a matching retain // before the unsafeGuaranteed builtin with only retains inbetween. auto *LastRetainInst = LastRetain[RCIdOpd]; auto NextInstIter = std::next(SILBasicBlock::iterator(LastRetainInst)); while (NextInstIter != BB.end() && &*NextInstIter != CurInst && (isa<RetainValueInst>(*NextInstIter) || isa<StrongRetainInst>(*NextInstIter) || !NextInstIter->mayHaveSideEffects() || isa<DebugValueInst>(*NextInstIter) || isa<DebugValueAddrInst>(*NextInstIter))) ++NextInstIter; if (&*NextInstIter != CurInst) { DEBUG(llvm::dbgs() << "Last retain right before match failed\n"); continue; } DEBUG(llvm::dbgs() << "Saw " << *UnsafeGuaranteedI); DEBUG(llvm::dbgs() << " with operand " << *Opd); // Match the reference and token result. // %4 = builtin "unsafeGuaranteed"<Foo>(%0 : $Foo) // %5 = tuple_extract %4 : $(Foo, Builtin.Int8), 0 // %6 = tuple_extract %4 : $(Foo, Builtin.Int8), 1 SILInstruction *UnsafeGuaranteedValue; SILInstruction *UnsafeGuaranteedToken; std::tie(UnsafeGuaranteedValue, UnsafeGuaranteedToken) = getSingleUnsafeGuaranteedValueResult(UnsafeGuaranteedI); if (!UnsafeGuaranteedValue) { DEBUG(llvm::dbgs() << " no single unsafeGuaranteed value use\n"); continue; } // Look for a builtin "unsafeGuaranteedEnd" instruction that uses the // token. // builtin "unsafeGuaranteedEnd"(%6 : $Builtin.Int8) : $() auto *UnsafeGuaranteedEndI = getUnsafeGuaranteedEndUser(UnsafeGuaranteedToken); if (!UnsafeGuaranteedEndI) { DEBUG(llvm::dbgs() << " no single unsafeGuaranteedEnd use found\n"); continue; } if (!PDI) PDI = PDA->get(&F); // It needs to post-dominated the end instruction, since we need to remove // the release along all paths to exit. if (!PDI->properlyDominates(UnsafeGuaranteedEndI, UnsafeGuaranteedI)) continue; // Find the release to match with the unsafeGuaranteedValue. auto &UnsafeGuaranteedEndBB = *UnsafeGuaranteedEndI->getParent(); auto LastRelease = findReleaseToMatchUnsafeGuaranteedValue( UnsafeGuaranteedEndI, UnsafeGuaranteedI, UnsafeGuaranteedValue, UnsafeGuaranteedEndBB, RCIA); if (!LastRelease) { DEBUG(llvm::dbgs() << " no release before/after unsafeGuaranteedEnd found\n"); continue; } // Restart iteration before the earliest instruction we remove. bool RestartAtBeginningOfBlock = false; auto LastRetainIt = SILBasicBlock::iterator(LastRetainInst); if (LastRetainIt != BB.begin()) { It = std::prev(LastRetainIt); } else RestartAtBeginningOfBlock = true; // Okay we found a post dominating release. Let's remove the // retain/unsafeGuaranteed/release combo. // LastRetainInst->eraseFromParent(); LastRelease->eraseFromParent(); UnsafeGuaranteedEndI->eraseFromParent(); deleteAllDebugUses(UnsafeGuaranteedValue); deleteAllDebugUses(UnsafeGuaranteedToken); deleteAllDebugUses(UnsafeGuaranteedI); UnsafeGuaranteedValue->replaceAllUsesWith(Opd); UnsafeGuaranteedValue->eraseFromParent(); UnsafeGuaranteedToken->eraseFromParent(); UnsafeGuaranteedI->replaceAllUsesWith(Opd); UnsafeGuaranteedI->eraseFromParent(); if (RestartAtBeginningOfBlock) It = BB.begin(); Changed = true; } } return Changed; }
/// \brief Inlines all mandatory inlined functions into the body of a function, /// first recursively inlining all mandatory apply instructions in those /// functions into their bodies if necessary. /// /// \param F the function to be processed /// \param AI nullptr if this is being called from the top level; the relevant /// ApplyInst requiring the recursive call when non-null /// \param FullyInlinedSet the set of all functions already known to be fully /// processed, to avoid processing them over again /// \param SetFactory an instance of ImmutableFunctionSet::Factory /// \param CurrentInliningSet the set of functions currently being inlined in /// the current call stack of recursive calls /// /// \returns true if successful, false if failed due to circular inlining. static bool runOnFunctionRecursively(SILFunction *F, FullApplySite AI, SILModule::LinkingMode Mode, DenseFunctionSet &FullyInlinedSet, ImmutableFunctionSet::Factory &SetFactory, ImmutableFunctionSet CurrentInliningSet, ClassHierarchyAnalysis *CHA) { // Avoid reprocessing functions needlessly. if (FullyInlinedSet.count(F)) return true; // Prevent attempt to circularly inline. if (CurrentInliningSet.contains(F)) { // This cannot happen on a top-level call, so AI should be non-null. assert(AI && "Cannot have circular inline without apply"); SILLocation L = AI.getLoc(); assert(L && "Must have location for transparent inline apply"); diagnose(F->getModule().getASTContext(), L.getStartSourceLoc(), diag::circular_transparent); return false; } // Add to the current inlining set (immutably, so we only affect the set // during this call and recursive subcalls). CurrentInliningSet = SetFactory.add(CurrentInliningSet, F); SmallVector<SILValue, 16> CaptureArgs; SmallVector<SILValue, 32> FullArgs; for (auto FI = F->begin(), FE = F->end(); FI != FE; ++FI) { for (auto I = FI->begin(), E = FI->end(); I != E; ++I) { FullApplySite InnerAI = FullApplySite::isa(&*I); if (!InnerAI) continue; auto *ApplyBlock = InnerAI.getParent(); auto NewInstPair = tryDevirtualizeApply(InnerAI, CHA); if (auto *NewInst = NewInstPair.first) { replaceDeadApply(InnerAI, NewInst); if (auto *II = dyn_cast<SILInstruction>(NewInst)) I = II->getIterator(); else I = NewInst->getParentBlock()->begin(); auto NewAI = FullApplySite::isa(NewInstPair.second.getInstruction()); if (!NewAI) continue; InnerAI = NewAI; } SILLocation Loc = InnerAI.getLoc(); SILValue CalleeValue = InnerAI.getCallee(); bool IsThick; PartialApplyInst *PAI; SILFunction *CalleeFunction = getCalleeFunction(InnerAI, IsThick, CaptureArgs, FullArgs, PAI, Mode); if (!CalleeFunction || CalleeFunction->isTransparent() == IsNotTransparent) continue; if (F->isFragile() && !CalleeFunction->hasValidLinkageForFragileRef()) { if (!CalleeFunction->hasValidLinkageForFragileInline()) { llvm::errs() << "caller: " << F->getName() << "\n"; llvm::errs() << "callee: " << CalleeFunction->getName() << "\n"; llvm_unreachable("Should never be inlining a resilient function into " "a fragile function"); } continue; } // Then recursively process it first before trying to inline it. if (!runOnFunctionRecursively(CalleeFunction, InnerAI, Mode, FullyInlinedSet, SetFactory, CurrentInliningSet, CHA)) { // If we failed due to circular inlining, then emit some notes to // trace back the failure if we have more information. // FIXME: possibly it could be worth recovering and attempting other // inlines within this same recursive call rather than simply // propagating the failure. if (AI) { SILLocation L = AI.getLoc(); assert(L && "Must have location for transparent inline apply"); diagnose(F->getModule().getASTContext(), L.getStartSourceLoc(), diag::note_while_inlining); } return false; } // Inline function at I, which also changes I to refer to the first // instruction inlined in the case that it succeeds. We purposely // process the inlined body after inlining, because the inlining may // have exposed new inlining opportunities beyond those present in // the inlined function when processed independently. DEBUG(llvm::errs() << "Inlining @" << CalleeFunction->getName() << " into @" << InnerAI.getFunction()->getName() << "\n"); // If we intend to inline a thick function, then we need to balance the // reference counts for correctness. if (IsThick && I != ApplyBlock->begin()) { // We need to find an appropriate location for our fix up code // We used to do this after inlining Without any modifications // This caused us to add a release in a wrong place: // It would release a value *before* retaining it! // It is really problematic to do this after inlining - // Finding a valid insertion point is tricky: // Inlining might add new basic blocks and/or remove the apply // We want to add the fix up *just before* where the current apply is! // Unfortunately, we *can't* add the fix up code here: // Inlining might fail for any reason - // If that occurred we'd need to undo our fix up code. // Instead, we split the current basic block - // Making sure we have a basic block that starts with our apply. SILBuilderWithScope B(I); ApplyBlock = splitBasicBlockAndBranch(B, &*I, nullptr, nullptr); I = ApplyBlock->begin(); } // Decrement our iterator (carefully, to avoid going off the front) so it // is valid after inlining is done. Inlining deletes the apply, and can // introduce multiple new basic blocks. if (I != ApplyBlock->begin()) --I; else I = ApplyBlock->end(); std::vector<Substitution> ApplySubs(InnerAI.getSubstitutions()); if (PAI) { auto PAISubs = PAI->getSubstitutions(); ApplySubs.insert(ApplySubs.end(), PAISubs.begin(), PAISubs.end()); } SILOpenedArchetypesTracker OpenedArchetypesTracker(*F); F->getModule().registerDeleteNotificationHandler( &OpenedArchetypesTracker); // The callee only needs to know about opened archetypes used in // the substitution list. OpenedArchetypesTracker.registerUsedOpenedArchetypes(InnerAI.getInstruction()); if (PAI) { OpenedArchetypesTracker.registerUsedOpenedArchetypes(PAI); } SILInliner Inliner(*F, *CalleeFunction, SILInliner::InlineKind::MandatoryInline, ApplySubs, OpenedArchetypesTracker); if (!Inliner.inlineFunction(InnerAI, FullArgs)) { I = InnerAI.getInstruction()->getIterator(); continue; } // Inlining was successful. Remove the apply. InnerAI.getInstruction()->eraseFromParent(); // Reestablish our iterator if it wrapped. if (I == ApplyBlock->end()) I = ApplyBlock->begin(); // Update the iterator when instructions are removed. DeleteInstructionsHandler DeletionHandler(I); // If the inlined apply was a thick function, then we need to balance the // reference counts for correctness. if (IsThick) fixupReferenceCounts(I, Loc, CalleeValue, CaptureArgs); // Now that the IR is correct, see if we can remove dead callee // computations (e.g. dead partial_apply closures). cleanupCalleeValue(CalleeValue, CaptureArgs, FullArgs); // Reposition iterators possibly invalidated by mutation. FI = SILFunction::iterator(ApplyBlock); E = ApplyBlock->end(); assert(FI == SILFunction::iterator(I->getParent()) && "Mismatch between the instruction and basic block"); ++NumMandatoryInlines; } } // Keep track of full inlined functions so we don't waste time recursively // reprocessing them. FullyInlinedSet.insert(F); return true; }
/// Returns the callee SILFunction called at a call site, in the case /// that the call is transparent (as in, both that the call is marked /// with the transparent flag and that callee function is actually transparently /// determinable from the SIL) or nullptr otherwise. This assumes that the SIL /// is already in SSA form. /// /// In the case that a non-null value is returned, FullArgs contains effective /// argument operands for the callee function. static SILFunction *getCalleeFunction( SILFunction *F, FullApplySite AI, bool &IsThick, SmallVectorImpl<std::pair<SILValue, ParameterConvention>> &CaptureArgs, SmallVectorImpl<SILValue> &FullArgs, PartialApplyInst *&PartialApply) { IsThick = false; PartialApply = nullptr; CaptureArgs.clear(); FullArgs.clear(); for (const auto &Arg : AI.getArguments()) FullArgs.push_back(Arg); SILValue CalleeValue = AI.getCallee(); if (auto *LI = dyn_cast<LoadInst>(CalleeValue)) { // Conservatively only see through alloc_box; we assume this pass is run // immediately after SILGen auto *PBI = dyn_cast<ProjectBoxInst>(LI->getOperand()); if (!PBI) return nullptr; auto *ABI = dyn_cast<AllocBoxInst>(PBI->getOperand()); if (!ABI) return nullptr; // Ensure there are no other uses of alloc_box than the project_box and // retains, releases. for (Operand *ABIUse : ABI->getUses()) if (ABIUse->getUser() != PBI && !isa<StrongRetainInst>(ABIUse->getUser()) && !isa<StrongReleaseInst>(ABIUse->getUser())) return nullptr; // Scan forward from the alloc box to find the first store, which // (conservatively) must be in the same basic block as the alloc box StoreInst *SI = nullptr; for (auto I = SILBasicBlock::iterator(ABI), E = I->getParent()->end(); I != E; ++I) { // If we find the load instruction first, then the load is loading from // a non-initialized alloc; this shouldn't really happen but I'm not // making any assumptions if (&*I == LI) return nullptr; if ((SI = dyn_cast<StoreInst>(I)) && SI->getDest() == PBI) { // We found a store that we know dominates the load; now ensure there // are no other uses of the project_box except loads. for (Operand *PBIUse : PBI->getUses()) if (PBIUse->getUser() != SI && !isa<LoadInst>(PBIUse->getUser())) return nullptr; // We can conservatively see through the store break; } } if (!SI) return nullptr; CalleeValue = SI->getSrc(); } // PartialApply/ThinToThick -> ConvertFunction patterns are generated // by @noescape closures. // // FIXME: We don't currently handle mismatched return types, however, this // would be a good optimization to handle and would be as simple as inserting // a cast. auto skipFuncConvert = [](SILValue CalleeValue) { // We can also allow a thin @escape to noescape conversion as such: // %1 = function_ref @thin_closure_impl : $@convention(thin) () -> () // %2 = convert_function %1 : // $@convention(thin) () -> () to $@convention(thin) @noescape () -> () // %3 = thin_to_thick_function %2 : // $@convention(thin) @noescape () -> () to // $@noescape @callee_guaranteed () -> () // %4 = apply %3() : $@noescape @callee_guaranteed () -> () if (auto *ThinToNoescapeCast = dyn_cast<ConvertFunctionInst>(CalleeValue)) { auto FromCalleeTy = ThinToNoescapeCast->getOperand()->getType().castTo<SILFunctionType>(); if (FromCalleeTy->getExtInfo().hasContext()) return CalleeValue; auto ToCalleeTy = ThinToNoescapeCast->getType().castTo<SILFunctionType>(); auto EscapingCalleeTy = ToCalleeTy->getWithExtInfo( ToCalleeTy->getExtInfo().withNoEscape(false)); if (FromCalleeTy != EscapingCalleeTy) return CalleeValue; return ThinToNoescapeCast->getOperand(); } auto *CFI = dyn_cast<ConvertEscapeToNoEscapeInst>(CalleeValue); if (!CFI) return CalleeValue; // TODO: Handle argument conversion. All the code in this file needs to be // cleaned up and generalized. The argument conversion handling in // optimizeApplyOfConvertFunctionInst should apply to any combine // involving an apply, not just a specific pattern. // // For now, just handle conversion that doesn't affect argument types, // return types, or throws. We could trivially handle any other // representation change, but the only one that doesn't affect the ABI and // matters here is @noescape, so just check for that. auto FromCalleeTy = CFI->getOperand()->getType().castTo<SILFunctionType>(); auto ToCalleeTy = CFI->getType().castTo<SILFunctionType>(); auto EscapingCalleeTy = ToCalleeTy->getWithExtInfo(ToCalleeTy->getExtInfo().withNoEscape(false)); if (FromCalleeTy != EscapingCalleeTy) return CalleeValue; return CFI->getOperand(); }; // Look through a escape to @noescape conversion. CalleeValue = skipFuncConvert(CalleeValue); // We are allowed to see through exactly one "partial apply" instruction or // one "thin to thick function" instructions, since those are the patterns // generated when using auto closures. if (auto *PAI = dyn_cast<PartialApplyInst>(CalleeValue)) { // Collect the applied arguments and their convention. collectPartiallyAppliedArguments(PAI, CaptureArgs, FullArgs); CalleeValue = PAI->getCallee(); IsThick = true; PartialApply = PAI; } else if (auto *TTTFI = dyn_cast<ThinToThickFunctionInst>(CalleeValue)) { CalleeValue = TTTFI->getOperand(); IsThick = true; } CalleeValue = skipFuncConvert(CalleeValue); auto *FRI = dyn_cast<FunctionRefInst>(CalleeValue); if (!FRI) return nullptr; SILFunction *CalleeFunction = FRI->getReferencedFunction(); switch (CalleeFunction->getRepresentation()) { case SILFunctionTypeRepresentation::Thick: case SILFunctionTypeRepresentation::Thin: case SILFunctionTypeRepresentation::Method: case SILFunctionTypeRepresentation::Closure: case SILFunctionTypeRepresentation::WitnessMethod: break; case SILFunctionTypeRepresentation::CFunctionPointer: case SILFunctionTypeRepresentation::ObjCMethod: case SILFunctionTypeRepresentation::Block: return nullptr; } // If the CalleeFunction is a not-transparent definition, we can not process // it. if (CalleeFunction->isTransparent() == IsNotTransparent) return nullptr; // If CalleeFunction is a declaration, see if we can load it. if (CalleeFunction->empty()) AI.getModule().loadFunction(CalleeFunction); // If we fail to load it, bail. if (CalleeFunction->empty()) return nullptr; if (F->isSerialized() && !CalleeFunction->hasValidLinkageForFragileInline()) { if (!CalleeFunction->hasValidLinkageForFragileRef()) { llvm::errs() << "caller: " << F->getName() << "\n"; llvm::errs() << "callee: " << CalleeFunction->getName() << "\n"; llvm_unreachable("Should never be inlining a resilient function into " "a fragile function"); } return nullptr; } return CalleeFunction; }
// Returns the callee of an apply_inst if it is basically inlineable. SILFunction *SILPerformanceInliner::getEligibleFunction(FullApplySite AI) { SILFunction *Callee = AI.getCalleeFunction(); if (!Callee) { DEBUG(llvm::dbgs() << " FAIL: Cannot find inlineable callee.\n"); return nullptr; } // Don't inline functions that are marked with the @_semantics or @effects // attribute if the inliner is asked not to inline them. if (Callee->hasSemanticsAttrs() || Callee->hasEffectsKind()) { if (WhatToInline == InlineSelection::NoSemanticsAndGlobalInit) { DEBUG(llvm::dbgs() << " FAIL: Function " << Callee->getName() << " has special semantics or effects attribute.\n"); return nullptr; } // The "availability" semantics attribute is treated like global-init. if (Callee->hasSemanticsAttrs() && WhatToInline != InlineSelection::Everything && Callee->hasSemanticsAttrThatStartsWith("availability")) { return nullptr; } } else if (Callee->isGlobalInit()) { if (WhatToInline != InlineSelection::Everything) { DEBUG(llvm::dbgs() << " FAIL: Function " << Callee->getName() << " has the global-init attribute.\n"); return nullptr; } } // We can't inline external declarations. if (Callee->empty() || Callee->isExternalDeclaration()) { DEBUG(llvm::dbgs() << " FAIL: Cannot inline external " << Callee->getName() << ".\n"); return nullptr; } // Explicitly disabled inlining. if (Callee->getInlineStrategy() == NoInline) { DEBUG(llvm::dbgs() << " FAIL: noinline attribute on " << Callee->getName() << ".\n"); return nullptr; } if (!Callee->shouldOptimize()) { DEBUG(llvm::dbgs() << " FAIL: optimizations disabled on " << Callee->getName() << ".\n"); return nullptr; } // We don't support this yet. if (AI.hasSubstitutions()) { DEBUG(llvm::dbgs() << " FAIL: Generic substitutions on " << Callee->getName() << ".\n"); return nullptr; } // We don't support inlining a function that binds dynamic self because we // have no mechanism to preserve the original function's local self metadata. if (computeMayBindDynamicSelf(Callee)) { DEBUG(llvm::dbgs() << " FAIL: Binding dynamic Self in " << Callee->getName() << ".\n"); return nullptr; } SILFunction *Caller = AI.getFunction(); // Detect inlining cycles. if (hasInliningCycle(Caller, Callee)) { DEBUG(llvm::dbgs() << " FAIL: Detected a recursion inlining " << Callee->getName() << ".\n"); return nullptr; } // A non-fragile function may not be inlined into a fragile function. if (Caller->isFragile() && !Callee->isFragile()) { DEBUG(llvm::dbgs() << " FAIL: Can't inline fragile " << Callee->getName() << ".\n"); return nullptr; } // Inlining self-recursive functions into other functions can result // in excessive code duplication since we run the inliner multiple // times in our pipeline if (calleeIsSelfRecursive(Callee)) { DEBUG(llvm::dbgs() << " FAIL: Callee is self-recursive in " << Callee->getName() << ".\n"); return nullptr; } DEBUG(llvm::dbgs() << " Eligible callee: " << Callee->getName() << "\n"); return Callee; }
/// \brief Attempt to inline all calls smaller than our threshold. /// returns True if a function was inlined. bool SILPerformanceInliner::inlineCallsIntoFunction(SILFunction *Caller, DominanceAnalysis *DA, SILLoopAnalysis *LA, llvm::SmallVectorImpl<FullApplySite> &NewApplies) { // Don't optimize functions that are marked with the opt.never attribute. if (!Caller->shouldOptimize()) return false; // Construct a log of all of the names of the functions that we've inlined // in the current iteration. SmallVector<StringRef, 16> InlinedFunctionNames; StringRef CallerName = Caller->getName(); DEBUG(llvm::dbgs() << "Visiting Function: " << CallerName << "\n"); assert(NewApplies.empty() && "Expected empty vector to store results in!"); // First step: collect all the functions we want to inline. We // don't change anything yet so that the dominator information // remains valid. SmallVector<FullApplySite, 8> AppliesToInline; collectAppliesToInline(Caller, AppliesToInline, DA, LA); if (AppliesToInline.empty()) return false; // Second step: do the actual inlining. for (auto AI : AppliesToInline) { SILFunction *Callee = AI.getCalleeFunction(); assert(Callee && "apply_inst does not have a direct callee anymore"); DEBUG(llvm::dbgs() << " Inline:" << *AI.getInstruction()); if (!Callee->shouldOptimize()) { DEBUG(llvm::dbgs() << " Cannot inline function " << Callee->getName() << " marked to be excluded from optimizations.\n"); continue; } SmallVector<SILValue, 8> Args; for (const auto &Arg : AI.getArguments()) Args.push_back(Arg); // As we inline and clone we need to collect new applies. auto Filter = [](SILInstruction *I) -> bool { return bool(FullApplySite::isa(I)); }; CloneCollector Collector(Filter); // Notice that we will skip all of the newly inlined ApplyInsts. That's // okay because we will visit them in our next invocation of the inliner. TypeSubstitutionMap ContextSubs; SILInliner Inliner(*Caller, *Callee, SILInliner::InlineKind::PerformanceInline, ContextSubs, AI.getSubstitutions(), Collector.getCallback()); // Record the name of the inlined function (for cycle detection). InlinedFunctionNames.push_back(Callee->getName()); auto Success = Inliner.inlineFunction(AI, Args); (void) Success; // We've already determined we should be able to inline this, so // we expect it to have happened. assert(Success && "Expected inliner to inline this function!"); llvm::SmallVector<FullApplySite, 4> AppliesFromInlinee; for (auto &P : Collector.getInstructionPairs()) AppliesFromInlinee.push_back(FullApplySite(P.first)); recursivelyDeleteTriviallyDeadInstructions(AI.getInstruction(), true); NewApplies.insert(NewApplies.end(), AppliesFromInlinee.begin(), AppliesFromInlinee.end()); DA->invalidate(Caller, SILAnalysis::InvalidationKind::Everything); NumFunctionsInlined++; } // Record the names of the functions that we inlined. // We'll use this list to detect cycles in future iterations of // the inliner. for (auto CalleeName : InlinedFunctionNames) { InlinedFunctions.insert(std::make_pair(CallerName, CalleeName)); } DEBUG(llvm::dbgs() << "\n"); return true; }
/// Return true if inlining this call site is profitable. bool SILPerformanceInliner::isProfitableToInline(FullApplySite AI, unsigned loopDepthOfAI, DominanceAnalysis *DA, SILLoopAnalysis *LA, ConstantTracker &callerTracker, unsigned &NumCallerBlocks) { SILFunction *Callee = AI.getReferencedFunction(); if (Callee->getInlineStrategy() == AlwaysInline) return true; ConstantTracker constTracker(Callee, &callerTracker, AI); DominanceInfo *DT = DA->get(Callee); SILLoopInfo *LI = LA->get(Callee); DominanceOrder domOrder(&Callee->front(), DT, Callee->size()); // Calculate the inlining cost of the callee. unsigned CalleeCost = 0; unsigned Benefit = InlineCostThreshold > 0 ? InlineCostThreshold : RemovedCallBenefit; Benefit += loopDepthOfAI * LoopBenefitFactor; int testThreshold = TestThreshold; while (SILBasicBlock *block = domOrder.getNext()) { constTracker.beginBlock(); for (SILInstruction &I : *block) { constTracker.trackInst(&I); if (testThreshold >= 0) { // We are in test-mode: use a simplified cost model. CalleeCost += testCost(&I); } else { // Use the regular cost model. CalleeCost += unsigned(instructionInlineCost(I)); } if (ApplyInst *AI = dyn_cast<ApplyInst>(&I)) { // Check if the callee is passed as an argument. If so, increase the // threshold, because inlining will (probably) eliminate the closure. SILInstruction *def = constTracker.getDefInCaller(AI->getCallee()); if (def && (isa<FunctionRefInst>(def) || isa<PartialApplyInst>(def))) { unsigned loopDepth = LI->getLoopDepth(block); Benefit += ConstCalleeBenefit + loopDepth * LoopBenefitFactor; testThreshold *= 2; } } } // Don't count costs in blocks which are dead after inlining. SILBasicBlock *takenBlock = getTakenBlock(block->getTerminator(), constTracker); if (takenBlock) { Benefit += ConstTerminatorBenefit; domOrder.pushChildrenIf(block, [=] (SILBasicBlock *child) { return child->getSinglePredecessor() != block || child == takenBlock; }); } else { domOrder.pushChildren(block); } } unsigned Threshold = Benefit; // The default. if (testThreshold >= 0) { // We are in testing mode. Threshold = testThreshold; } else if (AI.getFunction()->isThunk()) { // Only inline trivial functions into thunks (which will not increase the // code size). Threshold = TrivialFunctionThreshold; } else { // The default case. // We reduce the benefit if the caller is too large. For this we use a // cubic function on the number of caller blocks. This starts to prevent // inlining at about 800 - 1000 caller blocks. unsigned blockMinus = (NumCallerBlocks * NumCallerBlocks) / BlockLimitDenominator * NumCallerBlocks / BlockLimitDenominator; if (Threshold > blockMinus + TrivialFunctionThreshold) Threshold -= blockMinus; else Threshold = TrivialFunctionThreshold; } if (CalleeCost > Threshold) { return false; } NumCallerBlocks += Callee->size(); DEBUG( dumpCaller(AI.getFunction()); llvm::dbgs() << " decision {" << CalleeCost << " < " << Threshold << ", ld=" << loopDepthOfAI << ", bb=" << NumCallerBlocks << "} " << Callee->getName() << '\n'; );
bool SILPerformanceInliner::isProfitableToInline(FullApplySite AI, Weight CallerWeight, ConstantTracker &callerTracker, int &NumCallerBlocks, bool IsGeneric) { SILFunction *Callee = AI.getReferencedFunction(); SILLoopInfo *LI = LA->get(Callee); ShortestPathAnalysis *SPA = getSPA(Callee, LI); assert(SPA->isValid()); ConstantTracker constTracker(Callee, &callerTracker, AI); DominanceInfo *DT = DA->get(Callee); SILBasicBlock *CalleeEntry = &Callee->front(); DominanceOrder domOrder(CalleeEntry, DT, Callee->size()); // Calculate the inlining cost of the callee. int CalleeCost = 0; int Benefit = 0; // Start with a base benefit. int BaseBenefit = RemovedCallBenefit; const SILOptions &Opts = Callee->getModule().getOptions(); // For some reason -Ounchecked can accept a higher base benefit without // increasing the code size too much. if (Opts.Optimization == SILOptions::SILOptMode::OptimizeUnchecked) BaseBenefit *= 2; CallerWeight.updateBenefit(Benefit, BaseBenefit); // Go through all blocks of the function, accumulate the cost and find // benefits. while (SILBasicBlock *block = domOrder.getNext()) { constTracker.beginBlock(); Weight BlockW = SPA->getWeight(block, CallerWeight); for (SILInstruction &I : *block) { constTracker.trackInst(&I); CalleeCost += (int)instructionInlineCost(I); if (FullApplySite AI = FullApplySite::isa(&I)) { // Check if the callee is passed as an argument. If so, increase the // threshold, because inlining will (probably) eliminate the closure. SILInstruction *def = constTracker.getDefInCaller(AI.getCallee()); if (def && (isa<FunctionRefInst>(def) || isa<PartialApplyInst>(def))) BlockW.updateBenefit(Benefit, RemovedClosureBenefit); } else if (auto *LI = dyn_cast<LoadInst>(&I)) { // Check if it's a load from a stack location in the caller. Such a load // might be optimized away if inlined. if (constTracker.isStackAddrInCaller(LI->getOperand())) BlockW.updateBenefit(Benefit, RemovedLoadBenefit); } else if (auto *SI = dyn_cast<StoreInst>(&I)) { // Check if it's a store to a stack location in the caller. Such a load // might be optimized away if inlined. if (constTracker.isStackAddrInCaller(SI->getDest())) BlockW.updateBenefit(Benefit, RemovedStoreBenefit); } else if (isa<StrongReleaseInst>(&I) || isa<ReleaseValueInst>(&I)) { SILValue Op = stripCasts(I.getOperand(0)); if (SILArgument *Arg = dyn_cast<SILArgument>(Op)) { if (Arg->isFunctionArg() && Arg->getArgumentConvention() == SILArgumentConvention::Direct_Guaranteed) { BlockW.updateBenefit(Benefit, RefCountBenefit); } } } else if (auto *BI = dyn_cast<BuiltinInst>(&I)) { if (BI->getBuiltinInfo().ID == BuiltinValueKind::OnFastPath) BlockW.updateBenefit(Benefit, FastPathBuiltinBenefit); } } // Don't count costs in blocks which are dead after inlining. SILBasicBlock *takenBlock = constTracker.getTakenBlock(block->getTerminator()); if (takenBlock) { BlockW.updateBenefit(Benefit, RemovedTerminatorBenefit); domOrder.pushChildrenIf(block, [=] (SILBasicBlock *child) { return child->getSinglePredecessor() != block || child == takenBlock; }); } else { domOrder.pushChildren(block); } } if (AI.getFunction()->isThunk()) { // Only inline trivial functions into thunks (which will not increase the // code size). if (CalleeCost > TrivialFunctionThreshold) return false; DEBUG( dumpCaller(AI.getFunction()); llvm::dbgs() << " decision {" << CalleeCost << " into thunk} " << Callee->getName() << '\n'; ); return true; }
// Returns the callee of an apply_inst if it is basically inlineable. SILFunction *SILPerformanceInliner::getEligibleFunction(FullApplySite AI) { SILFunction *Callee = AI.getReferencedFunction(); if (!Callee) { return nullptr; } // Don't inline functions that are marked with the @_semantics or @effects // attribute if the inliner is asked not to inline them. if (Callee->hasSemanticsAttrs() || Callee->hasEffectsKind()) { if (WhatToInline == InlineSelection::NoSemanticsAndGlobalInit) { return nullptr; } // The "availability" semantics attribute is treated like global-init. if (Callee->hasSemanticsAttrs() && WhatToInline != InlineSelection::Everything && Callee->hasSemanticsAttrThatStartsWith("availability")) { return nullptr; } } else if (Callee->isGlobalInit()) { if (WhatToInline != InlineSelection::Everything) { return nullptr; } } // We can't inline external declarations. if (Callee->empty() || Callee->isExternalDeclaration()) { return nullptr; } // Explicitly disabled inlining. if (Callee->getInlineStrategy() == NoInline) { return nullptr; } if (!Callee->shouldOptimize()) { return nullptr; } // We don't support this yet. if (AI.hasSubstitutions()) return nullptr; SILFunction *Caller = AI.getFunction(); // We don't support inlining a function that binds dynamic self because we // have no mechanism to preserve the original function's local self metadata. if (mayBindDynamicSelf(Callee)) { // Check if passed Self is the same as the Self of the caller. // In this case, it is safe to inline because both functions // use the same Self. if (AI.hasSelfArgument() && Caller->hasSelfParam()) { auto CalleeSelf = stripCasts(AI.getSelfArgument()); auto CallerSelf = Caller->getSelfArgument(); if (CalleeSelf != SILValue(CallerSelf)) return nullptr; } else return nullptr; } // Detect self-recursive calls. if (Caller == Callee) { return nullptr; } // A non-fragile function may not be inlined into a fragile function. if (Caller->isFragile() && !Callee->hasValidLinkageForFragileInline()) { if (!Callee->hasValidLinkageForFragileRef()) { llvm::errs() << "caller: " << Caller->getName() << "\n"; llvm::errs() << "callee: " << Callee->getName() << "\n"; llvm_unreachable("Should never be inlining a resilient function into " "a fragile function"); } return nullptr; } // Inlining self-recursive functions into other functions can result // in excessive code duplication since we run the inliner multiple // times in our pipeline if (calleeIsSelfRecursive(Callee)) { return nullptr; } return Callee; }
CanSILFunctionType FunctionSignatureTransformDescriptor::createOptimizedSILFunctionType() { SILFunction *F = OriginalFunction; CanSILFunctionType FTy = F->getLoweredFunctionType(); auto ExpectedFTy = F->getLoweredType().castTo<SILFunctionType>(); auto HasGenericSignature = FTy->getGenericSignature() != nullptr; // The only way that we modify the arity of function parameters is here for // dead arguments. Doing anything else is unsafe since by definition non-dead // arguments will have SSA uses in the function. We would need to be smarter // in our moving to handle such cases. llvm::SmallVector<SILParameterInfo, 8> InterfaceParams; for (auto &ArgDesc : ArgumentDescList) { computeOptimizedArgInterface(ArgDesc, InterfaceParams); } // ResultDescs only covers the direct results; we currently can't ever // change an indirect result. Piece the modified direct result information // back into the all-results list. llvm::SmallVector<SILResultInfo, 8> InterfaceResults; for (SILResultInfo InterfaceResult : FTy->getResults()) { if (InterfaceResult.isFormalDirect()) { auto &RV = ResultDescList[0]; if (!RV.CalleeRetain.empty()) { ++NumOwnedConvertedToNotOwnedResult; InterfaceResults.push_back(SILResultInfo(InterfaceResult.getType(), ResultConvention::Unowned)); continue; } } InterfaceResults.push_back(InterfaceResult); } llvm::SmallVector<SILYieldInfo, 8> InterfaceYields; for (SILYieldInfo InterfaceYield : FTy->getYields()) { // For now, don't touch the yield types. InterfaceYields.push_back(InterfaceYield); } bool UsesGenerics = false; if (HasGenericSignature) { // Not all of the generic type parameters are used by the function // parameters. // Check which of the generic type parameters are not used and check if they // are used anywhere in the function body. If this is not the case, we can // remove the unused generic type parameters from the generic signature. // This makes the code both smaller and faster, because no implicit // parameters for type metadata and conformances need to be passed to the // callee at the LLVM IR level. // TODO: Implement a more precise analysis, so that we can eliminate only // those generic parameters which are not used. UsesGenerics = usesGenerics(F, InterfaceParams, InterfaceResults); // The set of used archetypes is complete now. if (!UsesGenerics) { // None of the generic type parameters are used. LLVM_DEBUG(llvm::dbgs() << "None of generic parameters are used by " << F->getName() << "\n"; llvm::dbgs() << "Interface params:\n"; for (auto Param : InterfaceParams) { Param.getType().dump(); } llvm::dbgs() << "Interface results:\n"; for (auto Result : InterfaceResults) { Result.getType().dump(); }); }
// Returns the callee of an apply_inst if it is basically inlinable. SILFunction *swift::getEligibleFunction(FullApplySite AI, InlineSelection WhatToInline) { SILFunction *Callee = AI.getReferencedFunction(); if (!Callee) { return nullptr; } // Not all apply sites can be inlined, even if they're direct. if (!SILInliner::canInline(AI)) return nullptr; ModuleDecl *SwiftModule = Callee->getModule().getSwiftModule(); bool IsInStdlib = (SwiftModule->isStdlibModule() || SwiftModule->isOnoneSupportModule()); // Don't inline functions that are marked with the @_semantics or @_effects // attribute if the inliner is asked not to inline them. if (Callee->hasSemanticsAttrs() || Callee->hasEffectsKind()) { if (WhatToInline == InlineSelection::NoSemanticsAndGlobalInit) { if (shouldSkipApplyDuringEarlyInlining(AI)) return nullptr; if (Callee->hasSemanticsAttr("inline_late")) return nullptr; } // The "availability" semantics attribute is treated like global-init. if (Callee->hasSemanticsAttrs() && WhatToInline != InlineSelection::Everything && (Callee->hasSemanticsAttrThatStartsWith("availability") || (Callee->hasSemanticsAttrThatStartsWith("inline_late")))) { return nullptr; } if (Callee->hasSemanticsAttrs() && WhatToInline == InlineSelection::Everything) { if (Callee->hasSemanticsAttrThatStartsWith("inline_late") && IsInStdlib) { return nullptr; } } } else if (Callee->isGlobalInit()) { if (WhatToInline != InlineSelection::Everything) { return nullptr; } } // We can't inline external declarations. if (Callee->empty() || Callee->isExternalDeclaration()) { return nullptr; } // Explicitly disabled inlining. if (Callee->getInlineStrategy() == NoInline) { return nullptr; } if (!Callee->shouldOptimize()) { return nullptr; } SILFunction *Caller = AI.getFunction(); // We don't support inlining a function that binds dynamic self because we // have no mechanism to preserve the original function's local self metadata. if (mayBindDynamicSelf(Callee)) { // Check if passed Self is the same as the Self of the caller. // In this case, it is safe to inline because both functions // use the same Self. if (AI.hasSelfArgument() && Caller->hasSelfParam()) { auto CalleeSelf = stripCasts(AI.getSelfArgument()); auto CallerSelf = Caller->getSelfArgument(); if (CalleeSelf != SILValue(CallerSelf)) return nullptr; } else return nullptr; } // Detect self-recursive calls. if (Caller == Callee) { return nullptr; } // A non-fragile function may not be inlined into a fragile function. if (Caller->isSerialized() && !Callee->hasValidLinkageForFragileInline()) { if (!Callee->hasValidLinkageForFragileRef()) { llvm::errs() << "caller: " << Caller->getName() << "\n"; llvm::errs() << "callee: " << Callee->getName() << "\n"; llvm_unreachable("Should never be inlining a resilient function into " "a fragile function"); } return nullptr; } // Inlining self-recursive functions into other functions can result // in excessive code duplication since we run the inliner multiple // times in our pipeline if (calleeIsSelfRecursive(Callee)) { return nullptr; } if (!EnableSILInliningOfGenerics && AI.hasSubstitutions()) { // Inlining of generics is not allowed unless it is an @inline(__always) // or transparent function. if (Callee->getInlineStrategy() != AlwaysInline && !Callee->isTransparent()) return nullptr; } // We cannot inline function with layout constraints on its generic types // if the corresponding substitution type does not have the same constraints. // The reason for this restriction is that we'd need to be able to express // in SIL something like casting a value of generic type T into a value of // generic type T: _LayoutConstraint, which is impossible currently. if (EnableSILInliningOfGenerics && AI.hasSubstitutions()) { if (!isCallerAndCalleeLayoutConstraintsCompatible(AI)) return nullptr; } // IRGen cannot handle partial_applies containing opened_existentials // in its substitutions list. if (calleeHasPartialApplyWithOpenedExistentials(AI)) { return nullptr; } return Callee; }
/// \brief Inlines all mandatory inlined functions into the body of a function, /// first recursively inlining all mandatory apply instructions in those /// functions into their bodies if necessary. /// /// \param F the function to be processed /// \param AI nullptr if this is being called from the top level; the relevant /// ApplyInst requiring the recursive call when non-null /// \param FullyInlinedSet the set of all functions already known to be fully /// processed, to avoid processing them over again /// \param SetFactory an instance of ImmutableFunctionSet::Factory /// \param CurrentInliningSet the set of functions currently being inlined in /// the current call stack of recursive calls /// /// \returns true if successful, false if failed due to circular inlining. static bool runOnFunctionRecursively(SILFunction *F, FullApplySite AI, SILModule::LinkingMode Mode, DenseFunctionSet &FullyInlinedSet, ImmutableFunctionSet::Factory &SetFactory, ImmutableFunctionSet CurrentInliningSet, ClassHierarchyAnalysis *CHA) { // Avoid reprocessing functions needlessly. if (FullyInlinedSet.count(F)) return true; // Prevent attempt to circularly inline. if (CurrentInliningSet.contains(F)) { // This cannot happen on a top-level call, so AI should be non-null. assert(AI && "Cannot have circular inline without apply"); SILLocation L = AI.getLoc(); assert(L && "Must have location for transparent inline apply"); diagnose(F->getModule().getASTContext(), L.getStartSourceLoc(), diag::circular_transparent); return false; } // Add to the current inlining set (immutably, so we only affect the set // during this call and recursive subcalls). CurrentInliningSet = SetFactory.add(CurrentInliningSet, F); SmallVector<SILValue, 16> CaptureArgs; SmallVector<SILValue, 32> FullArgs; for (auto FI = F->begin(), FE = F->end(); FI != FE; ++FI) { for (auto I = FI->begin(), E = FI->end(); I != E; ++I) { FullApplySite InnerAI = FullApplySite::isa(&*I); if (!InnerAI) continue; auto *ApplyBlock = InnerAI.getParent(); auto NewInstPair = tryDevirtualizeApply(InnerAI, CHA); if (auto *NewInst = NewInstPair.first) { replaceDeadApply(InnerAI, NewInst); if (auto *II = dyn_cast<SILInstruction>(NewInst)) I = II->getIterator(); else I = NewInst->getParentBB()->begin(); auto NewAI = FullApplySite::isa(NewInstPair.second.getInstruction()); if (!NewAI) continue; InnerAI = NewAI; } SILLocation Loc = InnerAI.getLoc(); SILValue CalleeValue = InnerAI.getCallee(); bool IsThick; PartialApplyInst *PAI; SILFunction *CalleeFunction = getCalleeFunction(InnerAI, IsThick, CaptureArgs, FullArgs, PAI, Mode); if (!CalleeFunction || CalleeFunction->isTransparent() == IsNotTransparent) continue; // Then recursively process it first before trying to inline it. if (!runOnFunctionRecursively(CalleeFunction, InnerAI, Mode, FullyInlinedSet, SetFactory, CurrentInliningSet, CHA)) { // If we failed due to circular inlining, then emit some notes to // trace back the failure if we have more information. // FIXME: possibly it could be worth recovering and attempting other // inlines within this same recursive call rather than simply // propagating the failure. if (AI) { SILLocation L = AI.getLoc(); assert(L && "Must have location for transparent inline apply"); diagnose(F->getModule().getASTContext(), L.getStartSourceLoc(), diag::note_while_inlining); } return false; } // Inline function at I, which also changes I to refer to the first // instruction inlined in the case that it succeeds. We purposely // process the inlined body after inlining, because the inlining may // have exposed new inlining opportunities beyond those present in // the inlined function when processed independently. DEBUG(llvm::errs() << "Inlining @" << CalleeFunction->getName() << " into @" << InnerAI.getFunction()->getName() << "\n"); // Decrement our iterator (carefully, to avoid going off the front) so it // is valid after inlining is done. Inlining deletes the apply, and can // introduce multiple new basic blocks. if (I != ApplyBlock->begin()) --I; else I = ApplyBlock->end(); TypeSubstitutionMap ContextSubs; std::vector<Substitution> ApplySubs(InnerAI.getSubstitutions()); if (PAI) { auto PAISubs = PAI->getSubstitutions(); ApplySubs.insert(ApplySubs.end(), PAISubs.begin(), PAISubs.end()); } ContextSubs.copyFrom(CalleeFunction->getContextGenericParams() ->getSubstitutionMap(ApplySubs)); SILInliner Inliner(*F, *CalleeFunction, SILInliner::InlineKind::MandatoryInline, ContextSubs, ApplySubs); if (!Inliner.inlineFunction(InnerAI, FullArgs)) { I = InnerAI.getInstruction()->getIterator(); continue; } // Inlining was successful. Remove the apply. InnerAI.getInstruction()->eraseFromParent(); // Reestablish our iterator if it wrapped. if (I == ApplyBlock->end()) I = ApplyBlock->begin(); else ++I; // If the inlined apply was a thick function, then we need to balance the // reference counts for correctness. if (IsThick) fixupReferenceCounts(I, Loc, CalleeValue, CaptureArgs); // Now that the IR is correct, see if we can remove dead callee // computations (e.g. dead partial_apply closures). cleanupCalleeValue(CalleeValue, CaptureArgs, FullArgs); // Reposition iterators possibly invalidated by mutation. FI = SILFunction::iterator(ApplyBlock); I = ApplyBlock->begin(); E = ApplyBlock->end(); ++NumMandatoryInlines; } } // Keep track of full inlined functions so we don't waste time recursively // reprocessing them. FullyInlinedSet.insert(F); return true; }
SILAnalysis::InvalidationKind processFunction(SILFunction &F, bool EnableDiagnostics, unsigned AssertConfiguration) { DEBUG(llvm::dbgs() << "*** ConstPropagation processing: " << F.getName() << "\n"); // This is the list of traits that this transformation might preserve. bool InvalidateBranches = false; bool InvalidateCalls = false; bool InvalidateInstructions = false; // Should we replace calls to assert_configuration by the assert // configuration. bool InstantiateAssertConfiguration = (AssertConfiguration != SILOptions::DisableReplacement); // The list of instructions whose evaluation resulted in error or warning. // This is used to avoid duplicate error reporting in case we reach the same // instruction from different entry points in the WorkList. llvm::DenseSet<SILInstruction *> ErrorSet; // The worklist of the constants that could be folded into their users. llvm::SetVector<SILInstruction *> WorkList; initializeWorklist(F, InstantiateAssertConfiguration, WorkList); llvm::SetVector<SILInstruction *> FoldedUsers; CastOptimizer CastOpt( [&](SILInstruction *I, ValueBase *V) { /* ReplaceInstUsesAction */ InvalidateInstructions = true; I->replaceAllUsesWith(V); }, [&](SILInstruction *I) { /* EraseAction */ auto *TI = dyn_cast<TermInst>(I); if (TI) { // Invalidate analysis information related to branches. Replacing // unconditional_check_branch type instructions by a trap will also // invalidate branches/the CFG. InvalidateBranches = true; } InvalidateInstructions = true; WorkList.remove(I); I->eraseFromParent(); }); while (!WorkList.empty()) { SILInstruction *I = WorkList.pop_back_val(); assert(I->getParent() && "SILInstruction must have parent."); DEBUG(llvm::dbgs() << "Visiting: " << *I); // Replace assert_configuration instructions by their constant value. We // want them to be replace even if we can't fully propagate the constant. if (InstantiateAssertConfiguration) if (auto *BI = dyn_cast<BuiltinInst>(I)) { if (isApplyOfBuiltin(*BI, BuiltinValueKind::AssertConf)) { // Instantiate the constant. SILBuilderWithScope B(BI); auto AssertConfInt = B.createIntegerLiteral( BI->getLoc(), BI->getType(), AssertConfiguration); BI->replaceAllUsesWith(AssertConfInt); // Schedule users for constant folding. WorkList.insert(AssertConfInt); // Delete the call. recursivelyDeleteTriviallyDeadInstructions(BI); InvalidateInstructions = true; continue; } // Kill calls to conditionallyUnreachable if we've folded assert // configuration calls. if (isApplyOfBuiltin(*BI, BuiltinValueKind::CondUnreachable)) { assert(BI->use_empty() && "use of conditionallyUnreachable?!"); recursivelyDeleteTriviallyDeadInstructions(BI, /*force*/ true); InvalidateInstructions = true; continue; } } if (auto *AI = dyn_cast<ApplyInst>(I)) { // Apply may only come from a string.concat invocation. if (constantFoldStringConcatenation(AI, WorkList)) { // Invalidate all analysis that's related to the call graph. InvalidateInstructions = true; } continue; } if (isa<CheckedCastBranchInst>(I) || isa<CheckedCastAddrBranchInst>(I) || isa<UnconditionalCheckedCastInst>(I) || isa<UnconditionalCheckedCastAddrInst>(I)) { // Try to perform cast optimizations. Invalidation is handled by a // callback inside the cast optimizer. ValueBase *Result = nullptr; switch(I->getKind()) { default: llvm_unreachable("Unexpected instruction for cast optimizations"); case ValueKind::CheckedCastBranchInst: Result = CastOpt.simplifyCheckedCastBranchInst(cast<CheckedCastBranchInst>(I)); break; case ValueKind::CheckedCastAddrBranchInst: Result = CastOpt.simplifyCheckedCastAddrBranchInst(cast<CheckedCastAddrBranchInst>(I)); break; case ValueKind::UnconditionalCheckedCastInst: Result = CastOpt.optimizeUnconditionalCheckedCastInst(cast<UnconditionalCheckedCastInst>(I)); break; case ValueKind::UnconditionalCheckedCastAddrInst: Result = CastOpt.optimizeUnconditionalCheckedCastAddrInst(cast<UnconditionalCheckedCastAddrInst>(I)); break; } if (Result) { if (isa<CheckedCastBranchInst>(Result) || isa<CheckedCastAddrBranchInst>(Result) || isa<UnconditionalCheckedCastInst>(Result) || isa<UnconditionalCheckedCastAddrInst>(Result)) WorkList.insert(cast<SILInstruction>(Result)); } continue; } // Go through all users of the constant and try to fold them. FoldedUsers.clear(); for (auto Use : I->getUses()) { SILInstruction *User = Use->getUser(); DEBUG(llvm::dbgs() << " User: " << *User); // It is possible that we had processed this user already. Do not try // to fold it again if we had previously produced an error while folding // it. It is not always possible to fold an instruction in case of error. if (ErrorSet.count(User)) continue; // Some constant users may indirectly cause folding of their users. if (isa<StructInst>(User) || isa<TupleInst>(User)) { WorkList.insert(User); continue; } // Always consider cond_fail instructions as potential for DCE. If the // expression feeding them is false, they are dead. We can't handle this // as part of the constant folding logic, because there is no value // they can produce (other than empty tuple, which is wasteful). if (isa<CondFailInst>(User)) FoldedUsers.insert(User); // Initialize ResultsInError as a None optional. // // We are essentially using this optional to represent 3 states: true, // false, and n/a. Optional<bool> ResultsInError; // If we are asked to emit diagnostics, override ResultsInError with a // Some optional initialized to false. if (EnableDiagnostics) ResultsInError = false; // Try to fold the user. If ResultsInError is None, we do not emit any // diagnostics. If ResultsInError is some, we use it as our return value. SILValue C = constantFoldInstruction(*User, ResultsInError); // If we did not pass in a None and the optional is set to true, add the // user to our error set. if (ResultsInError.hasValue() && ResultsInError.getValue()) ErrorSet.insert(User); // We failed to constant propagate... continue... if (!C) continue; // Ok, we have succeeded. Add user to the FoldedUsers list and perform the // necessary cleanups, RAUWs, etc. FoldedUsers.insert(User); ++NumInstFolded; InvalidateInstructions = true; // If the constant produced a tuple, be smarter than RAUW: explicitly nuke // any tuple_extract instructions using the apply. This is a common case // for functions returning multiple values. if (auto *TI = dyn_cast<TupleInst>(C)) { for (auto UI = User->use_begin(), E = User->use_end(); UI != E;) { Operand *O = *UI++; // If the user is a tuple_extract, just substitute the right value in. if (auto *TEI = dyn_cast<TupleExtractInst>(O->getUser())) { SILValue NewVal = TI->getOperand(TEI->getFieldNo()); TEI->replaceAllUsesWith(NewVal); TEI->dropAllReferences(); FoldedUsers.insert(TEI); if (auto *Inst = dyn_cast<SILInstruction>(NewVal)) WorkList.insert(Inst); } } if (User->use_empty()) FoldedUsers.insert(TI); } // We were able to fold, so all users should use the new folded value. User->replaceAllUsesWith(C); // The new constant could be further folded now, add it to the worklist. if (auto *Inst = dyn_cast<SILInstruction>(C)) WorkList.insert(Inst); } // Eagerly DCE. We do this after visiting all users to ensure we don't // invalidate the uses iterator. auto UserArray = ArrayRef<SILInstruction *>(&*FoldedUsers.begin(), FoldedUsers.size()); if (!UserArray.empty()) { InvalidateInstructions = true; } recursivelyDeleteTriviallyDeadInstructions(UserArray, false, [&](SILInstruction *DeadI) { WorkList.remove(DeadI); }); } // TODO: refactor this code outside of the method. Passes should not merge // invalidation kinds themselves. using InvalidationKind = SILAnalysis::InvalidationKind; unsigned Inv = InvalidationKind::Nothing; if (InvalidateInstructions) Inv |= (unsigned) InvalidationKind::Instructions; if (InvalidateCalls) Inv |= (unsigned) InvalidationKind::Calls; if (InvalidateBranches) Inv |= (unsigned) InvalidationKind::Branches; return InvalidationKind(Inv); }
/// Inlines all mandatory inlined functions into the body of a function, /// first recursively inlining all mandatory apply instructions in those /// functions into their bodies if necessary. /// /// \param F the function to be processed /// \param AI nullptr if this is being called from the top level; the relevant /// ApplyInst requiring the recursive call when non-null /// \param FullyInlinedSet the set of all functions already known to be fully /// processed, to avoid processing them over again /// \param SetFactory an instance of ImmutableFunctionSet::Factory /// \param CurrentInliningSet the set of functions currently being inlined in /// the current call stack of recursive calls /// /// \returns true if successful, false if failed due to circular inlining. static bool runOnFunctionRecursively(SILOptFunctionBuilder &FuncBuilder, SILFunction *F, FullApplySite AI, DenseFunctionSet &FullyInlinedSet, ImmutableFunctionSet::Factory &SetFactory, ImmutableFunctionSet CurrentInliningSet, ClassHierarchyAnalysis *CHA) { // Avoid reprocessing functions needlessly. if (FullyInlinedSet.count(F)) return true; // Prevent attempt to circularly inline. if (CurrentInliningSet.contains(F)) { // This cannot happen on a top-level call, so AI should be non-null. assert(AI && "Cannot have circular inline without apply"); SILLocation L = AI.getLoc(); assert(L && "Must have location for transparent inline apply"); diagnose(F->getModule().getASTContext(), L.getStartSourceLoc(), diag::circular_transparent); return false; } // Add to the current inlining set (immutably, so we only affect the set // during this call and recursive subcalls). CurrentInliningSet = SetFactory.add(CurrentInliningSet, F); SmallVector<std::pair<SILValue, ParameterConvention>, 16> CaptureArgs; SmallVector<SILValue, 32> FullArgs; // Visiting blocks in reverse order avoids revisiting instructions after block // splitting, which would be quadratic. for (auto BI = F->rbegin(), BE = F->rend(), nextBB = BI; BI != BE; BI = nextBB) { // After inlining, the block iterator will be adjusted to point to the last // block containing inlined instructions. This way, the inlined function // body will be reprocessed within the caller's context without revisiting // any original instructions. nextBB = std::next(BI); // While iterating over this block, instructions are inserted and deleted. // To avoid quadratic block splitting, instructions must be processed in // reverse order (block splitting reassigned the parent pointer of all // instructions below the split point). for (auto II = BI->rbegin(); II != BI->rend(); ++II) { FullApplySite InnerAI = FullApplySite::isa(&*II); if (!InnerAI) continue; // *NOTE* If devirtualization succeeds, devirtInst may not be InnerAI, // but a casted result of InnerAI or even a block argument due to // abstraction changes when calling the witness or class method. auto *devirtInst = tryDevirtualizeApplyHelper(InnerAI, CHA); // Restore II to the current apply site. II = devirtInst->getReverseIterator(); // If the devirtualized call result is no longer a invalid FullApplySite, // then it has succeeded, but the result is not immediately inlinable. InnerAI = FullApplySite::isa(devirtInst); if (!InnerAI) continue; SILValue CalleeValue = InnerAI.getCallee(); bool IsThick; PartialApplyInst *PAI; SILFunction *CalleeFunction = getCalleeFunction( F, InnerAI, IsThick, CaptureArgs, FullArgs, PAI); if (!CalleeFunction) continue; // Then recursively process it first before trying to inline it. if (!runOnFunctionRecursively(FuncBuilder, CalleeFunction, InnerAI, FullyInlinedSet, SetFactory, CurrentInliningSet, CHA)) { // If we failed due to circular inlining, then emit some notes to // trace back the failure if we have more information. // FIXME: possibly it could be worth recovering and attempting other // inlines within this same recursive call rather than simply // propagating the failure. if (AI) { SILLocation L = AI.getLoc(); assert(L && "Must have location for transparent inline apply"); diagnose(F->getModule().getASTContext(), L.getStartSourceLoc(), diag::note_while_inlining); } return false; } // Get our list of substitutions. auto Subs = (PAI ? PAI->getSubstitutionMap() : InnerAI.getSubstitutionMap()); SILOpenedArchetypesTracker OpenedArchetypesTracker(F); F->getModule().registerDeleteNotificationHandler( &OpenedArchetypesTracker); // The callee only needs to know about opened archetypes used in // the substitution list. OpenedArchetypesTracker.registerUsedOpenedArchetypes( InnerAI.getInstruction()); if (PAI) { OpenedArchetypesTracker.registerUsedOpenedArchetypes(PAI); } SILInliner Inliner(FuncBuilder, SILInliner::InlineKind::MandatoryInline, Subs, OpenedArchetypesTracker); if (!Inliner.canInlineApplySite(InnerAI)) continue; // Inline function at I, which also changes I to refer to the first // instruction inlined in the case that it succeeds. We purposely // process the inlined body after inlining, because the inlining may // have exposed new inlining opportunities beyond those present in // the inlined function when processed independently. LLVM_DEBUG(llvm::errs() << "Inlining @" << CalleeFunction->getName() << " into @" << InnerAI.getFunction()->getName() << "\n"); // If we intend to inline a thick function, then we need to balance the // reference counts for correctness. if (IsThick) { bool IsCalleeGuaranteed = PAI && PAI->getType().castTo<SILFunctionType>()->isCalleeGuaranteed(); fixupReferenceCounts(InnerAI.getInstruction(), CalleeValue, CaptureArgs, IsCalleeGuaranteed); } // Register a callback to record potentially unused function values after // inlining. ClosureCleanup closureCleanup; Inliner.setDeletionCallback([&closureCleanup](SILInstruction *I) { closureCleanup.recordDeadFunction(I); }); // Inlining deletes the apply, and can introduce multiple new basic // blocks. After this, CalleeValue and other instructions may be invalid. // nextBB will point to the last inlined block auto firstInlinedInstAndLastBB = Inliner.inlineFunction(CalleeFunction, InnerAI, FullArgs); nextBB = firstInlinedInstAndLastBB.second->getReverseIterator(); ++NumMandatoryInlines; // The IR is now valid, and trivial dead arguments are removed. However, // we may be able to remove dead callee computations (e.g. dead // partial_apply closures). closureCleanup.cleanupDeadClosures(F); // Resume inlining within nextBB, which contains only the inlined // instructions and possibly instructions in the original call block that // have not yet been visited. break; } } // Keep track of full inlined functions so we don't waste time recursively // reprocessing them. FullyInlinedSet.insert(F); return true; }
/// \brief Inlines the callee of a given ApplyInst (which must be the value of a /// FunctionRefInst referencing a function with a known body), into the caller /// containing the ApplyInst, which must be the same function as provided to the /// constructor of SILInliner. It only performs one step of inlining: it does /// not recursively inline functions called by the callee. /// /// It is the responsibility of the caller of this function to delete /// the given ApplyInst when inlining is successful. /// /// \returns true on success or false if it is unable to inline the function /// (for any reason). bool SILInliner::inlineFunction(FullApplySite AI, ArrayRef<SILValue> Args) { SILFunction *CalleeFunction = &Original; this->CalleeFunction = CalleeFunction; // Do not attempt to inline an apply into its parent function. if (AI.getFunction() == CalleeFunction) return false; SILFunction &F = getBuilder().getFunction(); if (CalleeFunction->getName() == "_TTSg5Vs4Int8___TFVs12_ArrayBufferg9_isNativeSb" && F.getName() == "_TTSg5Vs4Int8___TFVs12_ArrayBufferg8endIndexSi") llvm::errs(); assert(AI.getFunction() && AI.getFunction() == &F && "Inliner called on apply instruction in wrong function?"); assert(((CalleeFunction->getRepresentation() != SILFunctionTypeRepresentation::ObjCMethod && CalleeFunction->getRepresentation() != SILFunctionTypeRepresentation::CFunctionPointer) || IKind == InlineKind::PerformanceInline) && "Cannot inline Objective-C methods or C functions in mandatory " "inlining"); CalleeEntryBB = &*CalleeFunction->begin(); // Compute the SILLocation which should be used by all the inlined // instructions. if (IKind == InlineKind::PerformanceInline) { Loc = InlinedLocation::getInlinedLocation(AI.getLoc()); } else { assert(IKind == InlineKind::MandatoryInline && "Unknown InlineKind."); Loc = MandatoryInlinedLocation::getMandatoryInlinedLocation(AI.getLoc()); } auto AIScope = AI.getDebugScope(); // FIXME: Turn this into an assertion instead. if (!AIScope) AIScope = AI.getFunction()->getDebugScope(); if (IKind == InlineKind::MandatoryInline) { // Mandatory inlining: every instruction inherits scope/location // from the call site. CallSiteScope = AIScope; } else { // Performance inlining. Construct a proper inline scope pointing // back to the call site. CallSiteScope = new (F.getModule()) SILDebugScope(AI.getLoc(), &F, AIScope); assert(CallSiteScope->getParentFunction() == &F); } assert(CallSiteScope && "call site has no scope"); // Increment the ref count for the inlined function, so it doesn't // get deleted before we can emit abstract debug info for it. CalleeFunction->setInlined(); // If the caller's BB is not the last BB in the calling function, then keep // track of the next BB so we always insert new BBs before it; otherwise, // we just leave the new BBs at the end as they are by default. auto IBI = std::next(SILFunction::iterator(AI.getParent())); InsertBeforeBB = IBI != F.end() ? &*IBI : nullptr; // Clear argument map and map ApplyInst arguments to the arguments of the // callee's entry block. ValueMap.clear(); assert(CalleeEntryBB->bbarg_size() == Args.size() && "Unexpected number of arguments to entry block of function?"); auto BAI = CalleeEntryBB->bbarg_begin(); for (auto AI = Args.begin(), AE = Args.end(); AI != AE; ++AI, ++BAI) ValueMap.insert(std::make_pair(*BAI, *AI)); InstructionMap.clear(); BBMap.clear(); // Do not allow the entry block to be cloned again SILBasicBlock::iterator InsertPoint = SILBasicBlock::iterator(AI.getInstruction()); BBMap.insert(std::make_pair(CalleeEntryBB, AI.getParent())); getBuilder().setInsertionPoint(InsertPoint); // Recursively visit callee's BB in depth-first preorder, starting with the // entry block, cloning all instructions other than terminators. visitSILBasicBlock(CalleeEntryBB); // If we're inlining into a normal apply and the callee's entry // block ends in a return, then we can avoid a split. if (auto nonTryAI = dyn_cast<ApplyInst>(AI)) { if (ReturnInst *RI = dyn_cast<ReturnInst>(CalleeEntryBB->getTerminator())) { // Replace all uses of the apply instruction with the operands of the // return instruction, appropriately mapped. nonTryAI->replaceAllUsesWith(remapValue(RI->getOperand())); return true; } } // If we're inlining into a try_apply, we already have a return-to BB. SILBasicBlock *ReturnToBB; if (auto tryAI = dyn_cast<TryApplyInst>(AI)) { ReturnToBB = tryAI->getNormalBB(); // Otherwise, split the caller's basic block to create a return-to BB. } else { SILBasicBlock *CallerBB = AI.getParent(); // Split the BB and do NOT create a branch between the old and new // BBs; we will create the appropriate terminator manually later. ReturnToBB = CallerBB->splitBasicBlock(InsertPoint); // Place the return-to BB after all the other mapped BBs. if (InsertBeforeBB) F.getBlocks().splice(SILFunction::iterator(InsertBeforeBB), F.getBlocks(), SILFunction::iterator(ReturnToBB)); else F.getBlocks().splice(F.getBlocks().end(), F.getBlocks(), SILFunction::iterator(ReturnToBB)); // Create an argument on the return-to BB representing the returned value. auto *RetArg = new (F.getModule()) SILArgument(ReturnToBB, AI.getInstruction()->getType()); // Replace all uses of the ApplyInst with the new argument. AI.getInstruction()->replaceAllUsesWith(RetArg); } // Now iterate over the callee BBs and fix up the terminators. for (auto BI = BBMap.begin(), BE = BBMap.end(); BI != BE; ++BI) { getBuilder().setInsertionPoint(BI->second); // Modify return terminators to branch to the return-to BB, rather than // trying to clone the ReturnInst. if (ReturnInst *RI = dyn_cast<ReturnInst>(BI->first->getTerminator())) { auto thrownValue = remapValue(RI->getOperand()); getBuilder().createBranch(Loc.getValue(), ReturnToBB, thrownValue); continue; } // Modify throw terminators to branch to the error-return BB, rather than // trying to clone the ThrowInst. if (ThrowInst *TI = dyn_cast<ThrowInst>(BI->first->getTerminator())) { if (auto *A = dyn_cast<ApplyInst>(AI)) { (void)A; assert(A->isNonThrowing() && "apply of a function with error result must be non-throwing"); getBuilder().createUnreachable(Loc.getValue()); continue; } auto tryAI = cast<TryApplyInst>(AI); auto returnedValue = remapValue(TI->getOperand()); getBuilder().createBranch(Loc.getValue(), tryAI->getErrorBB(), returnedValue); continue; } // Otherwise use normal visitor, which clones the existing instruction // but remaps basic blocks and values. visit(BI->first->getTerminator()); } return true; }