// A utility function for cloning the apply instruction. static FullApplySite CloneApply(FullApplySite AI, SILBuilder &Builder) { // Clone the Apply. Builder.setCurrentDebugScope(AI.getDebugScope()); Builder.addOpenedArchetypeOperands(AI.getInstruction()); auto Args = AI.getArguments(); SmallVector<SILValue, 8> Ret(Args.size()); for (unsigned i = 0, e = Args.size(); i != e; ++i) Ret[i] = Args[i]; FullApplySite NAI; switch (AI.getInstruction()->getKind()) { case SILInstructionKind::ApplyInst: NAI = Builder.createApply(AI.getLoc(), AI.getCallee(), AI.getSubstitutions(), Ret, cast<ApplyInst>(AI)->isNonThrowing()); break; case SILInstructionKind::TryApplyInst: { auto *TryApplyI = cast<TryApplyInst>(AI.getInstruction()); NAI = Builder.createTryApply(AI.getLoc(), AI.getCallee(), AI.getSubstitutions(), Ret, TryApplyI->getNormalBB(), TryApplyI->getErrorBB()); } break; default: llvm_unreachable("Trying to clone an unsupported apply instruction"); } NAI.getInstruction(); return NAI; }
/// Attempt to devirtualize the given apply if possible, and return a /// new instruction in that case, or nullptr otherwise. DevirtualizationResult swift::tryDevirtualizeApply(FullApplySite AI, ClassHierarchyAnalysis *CHA) { DEBUG(llvm::dbgs() << " Trying to devirtualize: " << *AI.getInstruction()); // Devirtualize apply instructions that call witness_method instructions: // // %8 = witness_method $Optional<UInt16>, #LogicValue.boolValue!getter.1 // %9 = apply %8<Self = CodeUnit?>(%6#1) : ... // if (isa<WitnessMethodInst>(AI.getCallee())) return tryDevirtualizeWitnessMethod(AI); /// Optimize a class_method and alloc_ref pair into a direct function /// reference: /// /// \code /// %XX = alloc_ref $Foo /// %YY = class_method %XX : $Foo, #Foo.get!1 : $@convention(method)... /// \endcode /// /// or /// /// %XX = metatype $... /// %YY = class_method %XX : ... /// /// into /// /// %YY = function_ref @... if (auto *CMI = dyn_cast<ClassMethodInst>(AI.getCallee())) { auto &M = AI.getModule(); auto Instance = stripUpCasts(CMI->getOperand()); auto ClassType = Instance->getType(); if (ClassType.is<MetatypeType>()) ClassType = ClassType.getMetatypeInstanceType(M); auto *CD = ClassType.getClassOrBoundGenericClass(); if (isEffectivelyFinalMethod(AI, ClassType, CD, CHA)) return tryDevirtualizeClassMethod(AI, Instance); // Try to check if the exact dynamic type of the instance is statically // known. if (auto Instance = getInstanceWithExactDynamicType(CMI->getOperand(), CMI->getModule(), CHA)) return tryDevirtualizeClassMethod(AI, Instance); } if (isa<SuperMethodInst>(AI.getCallee())) { if (AI.hasSelfArgument()) { return tryDevirtualizeClassMethod(AI, AI.getSelfArgument()); } // It is an invocation of a class method. // Last operand is the metatype that should be used for dispatching. return tryDevirtualizeClassMethod(AI, AI.getArguments().back()); } return std::make_pair(nullptr, FullApplySite()); }
/// \brief Returns true, if a method implementation corresponding to /// the class_method applied to an instance of the class CD is /// effectively final, i.e. it is statically known to be not overridden /// by any subclasses of the class CD. /// /// \p AI invocation instruction /// \p ClassType type of the instance /// \p CD static class of the instance whose method is being invoked /// \p CHA class hierarchy analysis bool isEffectivelyFinalMethod(FullApplySite AI, SILType ClassType, ClassDecl *CD, ClassHierarchyAnalysis *CHA) { if (CD && CD->isFinal()) return true; const DeclContext *DC = AI.getModule().getAssociatedContext(); // Without an associated context we cannot perform any // access-based optimizations. if (!DC) return false; auto *CMI = cast<MethodInst>(AI.getCallee()); if (!calleesAreStaticallyKnowable(AI.getModule(), CMI->getMember())) return false; auto *Method = CMI->getMember().getAbstractFunctionDecl(); assert(Method && "Expected abstract function decl!"); assert(!Method->isFinal() && "Unexpected indirect call to final method!"); // If this method is not overridden in the module, // there is no other implementation. if (!Method->isOverridden()) return true; // Class declaration may be nullptr, e.g. for cases like: // func foo<C:Base>(c: C) {}, where C is a class, but // it does not have a class decl. if (!CD) return false; if (!CHA) return false; // This is a private or a module internal class. // // We can analyze the class hierarchy rooted at it and // eventually devirtualize a method call more efficiently. ClassHierarchyAnalysis::ClassList Subs; getAllSubclasses(CHA, CD, ClassType, AI.getModule(), Subs); // This is the implementation of the method to be used // if the exact class of the instance would be CD. auto *ImplMethod = CD->findImplementingMethod(Method); // First, analyze all direct subclasses. for (auto S : Subs) { // Check if the subclass overrides a method and provides // a different implementation. auto *ImplFD = S->findImplementingMethod(Method); if (ImplFD != ImplMethod) return false; } return true; }
bool AliasAnalysis::canApplyDecrementRefCount(FullApplySite FAS, SILValue Ptr) { // Treat applications of @noreturn functions as decrementing ref counts. This // causes the apply to become a sink barrier for ref count increments. if (FAS.getCallee().getType().getAs<SILFunctionType>()->isNoReturn()) return true; /// If the pointer cannot escape to the function we are done. if (!EA->canEscapeTo(Ptr, FAS)) return false; SideEffectAnalysis::FunctionEffects ApplyEffects; SEA->getEffects(ApplyEffects, FAS); auto &GlobalEffects = ApplyEffects.getGlobalEffects(); if (ApplyEffects.mayReadRC() || GlobalEffects.mayRelease()) return true; /// The function has no unidentified releases, so let's look at the arguments // in detail. for (unsigned Idx = 0, End = FAS.getNumArguments(); Idx < End; ++Idx) { auto &ArgEffect = ApplyEffects.getParameterEffects()[Idx]; if (ArgEffect.mayRelease()) { // The function may release this argument, so check if the pointer can // escape to it. if (EA->canEscapeToValue(Ptr, FAS.getArgument(Idx))) return true; } } return false; }
/// \brief Check if it is possible to devirtualize an Apply instruction /// and a class member obtained using the class_method instruction into /// a direct call to a specific member of a specific class. /// /// \p AI is the apply to devirtualize. /// \p ClassOrMetatypeType is the class type or metatype type we are /// devirtualizing for. /// return true if it is possible to devirtualize, false - otherwise. bool swift::canDevirtualizeClassMethod(FullApplySite AI, SILType ClassOrMetatypeType, OptRemark::Emitter *ORE, bool isEffectivelyFinalMethod) { LLVM_DEBUG(llvm::dbgs() << " Trying to devirtualize : " << *AI.getInstruction()); SILModule &Mod = AI.getModule(); // First attempt to lookup the origin for our class method. The origin should // either be a metatype or an alloc_ref. LLVM_DEBUG(llvm::dbgs() << " Origin Type: " << ClassOrMetatypeType); auto *MI = cast<MethodInst>(AI.getCallee()); // Find the implementation of the member which should be invoked. auto *F = getTargetClassMethod(Mod, ClassOrMetatypeType, MI); // If we do not find any such function, we have no function to devirtualize // to... so bail. if (!F) { LLVM_DEBUG(llvm::dbgs() << " FAIL: Could not find matching VTable " "or vtable method for this class.\n"); return false; } // We need to disable the “effectively final” opt if a function is inlinable if (isEffectivelyFinalMethod && AI.getFunction()->getResilienceExpansion() == ResilienceExpansion::Minimal) { LLVM_DEBUG(llvm::dbgs() << " FAIL: Could not optimize function " "because it is an effectively-final inlinable: " << AI.getFunction()->getName() << "\n"); return false; } // Mandatory inlining does class method devirtualization. I'm not sure if this // is really needed, but some test rely on this. // So even for Onone functions we have to do it if the SILStage is raw. if (F->getModule().getStage() != SILStage::Raw && !F->shouldOptimize()) { // Do not consider functions that should not be optimized. LLVM_DEBUG(llvm::dbgs() << " FAIL: Could not optimize function " << " because it is marked no-opt: " << F->getName() << "\n"); return false; } if (AI.getFunction()->isSerialized()) { // function_ref inside fragile function cannot reference a private or // hidden symbol. if (!F->hasValidLinkageForFragileRef()) return false; } return true; }
/// Attempt to devirtualize the given apply if possible, and return a /// new instruction in that case, or nullptr otherwise. DevirtualizationResult swift::tryDevirtualizeApply(FullApplySite AI) { DEBUG(llvm::dbgs() << " Trying to devirtualize: " << *AI.getInstruction()); // Devirtualize apply instructions that call witness_method instructions: // // %8 = witness_method $Optional<UInt16>, #LogicValue.boolValue!getter.1 // %9 = apply %8<Self = CodeUnit?>(%6#1) : ... // if (isa<WitnessMethodInst>(AI.getCallee())) return tryDevirtualizeWitnessMethod(AI); /// Optimize a class_method and alloc_ref pair into a direct function /// reference: /// /// \code /// %XX = alloc_ref $Foo /// %YY = class_method %XX : $Foo, #Foo.get!1 : $@convention(method)... /// \endcode /// /// or /// /// %XX = metatype $... /// %YY = class_method %XX : ... /// /// into /// /// %YY = function_ref @... if (auto *CMI = dyn_cast<ClassMethodInst>(AI.getCallee())) { // Check if the class member is known to be final. if (isKnownFinal(CMI->getModule(), CMI->getMember())) return tryDevirtualizeClassMethod(AI, CMI->getOperand()); // Try to check if the exact dynamic type of the instance is statically // known. if (auto Instance = getInstanceWithExactDynamicType(CMI->getOperand())) return tryDevirtualizeClassMethod(AI, Instance); } return std::make_pair(nullptr, FullApplySite()); }
/// \brief Check if it is possible to devirtualize an Apply instruction /// and a class member obtained using the class_method instruction into /// a direct call to a specific member of a specific class. /// /// \p AI is the apply to devirtualize. /// \p ClassOrMetatypeType is the class type or metatype type we are /// devirtualizing for. /// return true if it is possible to devirtualize, false - otherwise. bool swift::canDevirtualizeClassMethod(FullApplySite AI, SILType ClassOrMetatypeType) { DEBUG(llvm::dbgs() << " Trying to devirtualize : " << *AI.getInstruction()); SILModule &Mod = AI.getModule(); // First attempt to lookup the origin for our class method. The origin should // either be a metatype or an alloc_ref. DEBUG(llvm::dbgs() << " Origin Type: " << ClassOrMetatypeType); auto *MI = cast<MethodInst>(AI.getCallee()); // Find the implementation of the member which should be invoked. auto *F = getTargetClassMethod(Mod, ClassOrMetatypeType, MI); // If we do not find any such function, we have no function to devirtualize // to... so bail. if (!F) { DEBUG(llvm::dbgs() << " FAIL: Could not find matching VTable or " "vtable method for this class.\n"); return false; } if (!F->shouldOptimize()) { // Do not consider functions that should not be optimized. DEBUG(llvm::dbgs() << " FAIL: Could not optimize function " << " because it is marked no-opt: " << F->getName() << "\n"); return false; } if (AI.getFunction()->isSerialized()) { // function_ref inside fragile function cannot reference a private or // hidden symbol. if (!F->hasValidLinkageForFragileRef()) return false; } if (MI->isVolatile()) { // dynamic dispatch is semantically required, can't devirtualize return false; } return true; }
// Start with the substitutions from the apply. // Try to propagate them to find out the real substitutions required // to invoke the method. static ArrayRef<Substitution> getSubstitutionsForCallee(SILModule &M, CanSILFunctionType GenCalleeType, SILType ClassInstanceType, FullApplySite AI) { // *NOTE*: // Apply instruction substitutions are for the Member from a protocol or // class B, where this member was first defined, before it got overridden by // derived classes. // // The implementation F (the implementing method) which was found may have // a different set of generic parameters, e.g. because it is implemented by a // class D1 derived from B. // // ClassInstanceType may have a type different from both the type B // the Member belongs to and from the ClassInstanceType, e.g. if // ClassInstance is of a class D2, which is derived from D1, but does not // override the Member. // // As a result, substitutions provided by AI are for Member, whereas // substitutions in ClassInstanceType are for D2. And substitutions for D1 // are not available directly in a general case. Therefore, they have to // be computed. // // What we know for sure: // B is a superclass of D1 // D1 is a superclass of D2. // D1 can be the same as D2. D1 can be the same as B. // // So, substitutions from AI are for class B. // Substitutions for class D1 by means of bindSuperclass(), which starts // with a bound type ClassInstanceType and checks its superclasses until it // finds a bound superclass matching D1 and returns its substitutions. // Class F belongs to. CanType FSelfClass = GenCalleeType->getSelfParameter().getType(); SILType FSelfSubstType; Module *Module = M.getSwiftModule(); ArrayRef<Substitution> ClassSubs; if (GenCalleeType->isPolymorphic()) { // Declaration of the class F belongs to. if (auto *FSelfTypeDecl = FSelfClass.getNominalOrBoundGenericNominal()) { // Get the unbound generic type F belongs to. CanType FSelfGenericType = FSelfTypeDecl->getDeclaredType()->getCanonicalType(); assert((isa<BoundGenericType>(ClassInstanceType.getSwiftRValueType()) || isa<NominalType>(ClassInstanceType.getSwiftRValueType())) && "Self type should be either a bound generic type" "or a non-generic type"); assert((isa<UnboundGenericType>(FSelfGenericType) || isa<NominalType>(FSelfGenericType)) && "Method implementation self type should be generic"); if (isa<BoundGenericType>(ClassInstanceType.getSwiftRValueType())) { auto BoundBaseType = bindSuperclass(FSelfGenericType, ClassInstanceType); if (auto BoundTy = BoundBaseType->getAs<BoundGenericType>()) { ClassSubs = BoundTy->getSubstitutions(Module, nullptr); } } } } else { // If the callee is not polymorphic, no substitutions are required. return {}; } if (ClassSubs.empty()) return AI.getSubstitutions(); auto AISubs = AI.getSubstitutions(); CanSILFunctionType AIGenCalleeType = AI.getCallee().getType().castTo<SILFunctionType>(); CanType AISelfClass = AIGenCalleeType->getSelfParameter().getType(); unsigned NextMethodParamIdx = 0; unsigned NumMethodParams = 0; if (AIGenCalleeType->isPolymorphic()) { NextMethodParamIdx = 0; // Generic parameters of the method start after generic parameters // of the instance class. if (auto AISelfClassSig = AISelfClass.getClassBound()->getGenericSignature()) { NextMethodParamIdx = AISelfClassSig->getGenericParams().size(); } NumMethodParams = AISubs.size() - NextMethodParamIdx; } unsigned NumSubs = ClassSubs.size() + NumMethodParams; if (ClassSubs.size() == NumSubs) return ClassSubs; // Mix class subs with method specific subs from the AI substitutions. // Assumptions: AI substitutions contain first the substitutions for // a class of the method being invoked and then the substitutions // for a method being invoked. auto Subs = M.getASTContext().Allocate<Substitution>(NumSubs); unsigned i = 0; for (auto &S : ClassSubs) { Subs[i++] = S; } for (; i < NumSubs; ++i, ++NextMethodParamIdx) { Subs[i] = AISubs[NextMethodParamIdx]; } return Subs; }
/// \brief Inlines all mandatory inlined functions into the body of a function, /// first recursively inlining all mandatory apply instructions in those /// functions into their bodies if necessary. /// /// \param F the function to be processed /// \param AI nullptr if this is being called from the top level; the relevant /// ApplyInst requiring the recursive call when non-null /// \param FullyInlinedSet the set of all functions already known to be fully /// processed, to avoid processing them over again /// \param SetFactory an instance of ImmutableFunctionSet::Factory /// \param CurrentInliningSet the set of functions currently being inlined in /// the current call stack of recursive calls /// /// \returns true if successful, false if failed due to circular inlining. static bool runOnFunctionRecursively(SILFunction *F, FullApplySite AI, DenseFunctionSet &FullyInlinedSet, ImmutableFunctionSet::Factory &SetFactory, ImmutableFunctionSet CurrentInliningSet, ClassHierarchyAnalysis *CHA) { // Avoid reprocessing functions needlessly. if (FullyInlinedSet.count(F)) return true; // Prevent attempt to circularly inline. if (CurrentInliningSet.contains(F)) { // This cannot happen on a top-level call, so AI should be non-null. assert(AI && "Cannot have circular inline without apply"); SILLocation L = AI.getLoc(); assert(L && "Must have location for transparent inline apply"); diagnose(F->getModule().getASTContext(), L.getStartSourceLoc(), diag::circular_transparent); return false; } // Add to the current inlining set (immutably, so we only affect the set // during this call and recursive subcalls). CurrentInliningSet = SetFactory.add(CurrentInliningSet, F); SmallVector<std::pair<SILValue, ParameterConvention>, 16> CaptureArgs; SmallVector<SILValue, 32> FullArgs; for (auto BI = F->begin(), BE = F->end(); BI != BE; ++BI) { for (auto II = BI->begin(), IE = BI->end(); II != IE; ++II) { FullApplySite InnerAI = FullApplySite::isa(&*II); if (!InnerAI) continue; auto *ApplyBlock = InnerAI.getParent(); // *NOTE* If devirtualization succeeds, sometimes II will not be InnerAI, // but a casted result of InnerAI or even a block argument due to // abstraction changes when calling the witness or class method. We still // know that InnerAI dominates II though. std::tie(InnerAI, II) = tryDevirtualizeApplyHelper(InnerAI, II, CHA); if (!InnerAI) continue; SILValue CalleeValue = InnerAI.getCallee(); bool IsThick; PartialApplyInst *PAI; SILFunction *CalleeFunction = getCalleeFunction( F, InnerAI, IsThick, CaptureArgs, FullArgs, PAI); if (!CalleeFunction) continue; // Then recursively process it first before trying to inline it. if (!runOnFunctionRecursively(CalleeFunction, InnerAI, FullyInlinedSet, SetFactory, CurrentInliningSet, CHA)) { // If we failed due to circular inlining, then emit some notes to // trace back the failure if we have more information. // FIXME: possibly it could be worth recovering and attempting other // inlines within this same recursive call rather than simply // propagating the failure. if (AI) { SILLocation L = AI.getLoc(); assert(L && "Must have location for transparent inline apply"); diagnose(F->getModule().getASTContext(), L.getStartSourceLoc(), diag::note_while_inlining); } return false; } // Get our list of substitutions. auto Subs = (PAI ? PAI->getSubstitutionMap() : InnerAI.getSubstitutionMap()); SILOpenedArchetypesTracker OpenedArchetypesTracker(F); F->getModule().registerDeleteNotificationHandler( &OpenedArchetypesTracker); // The callee only needs to know about opened archetypes used in // the substitution list. OpenedArchetypesTracker.registerUsedOpenedArchetypes( InnerAI.getInstruction()); if (PAI) { OpenedArchetypesTracker.registerUsedOpenedArchetypes(PAI); } SILInliner Inliner(*F, *CalleeFunction, SILInliner::InlineKind::MandatoryInline, Subs, OpenedArchetypesTracker); if (!Inliner.canInlineFunction(InnerAI)) { // See comment above about casting when devirtualizing and how this // sometimes causes II and InnerAI to be different and even in different // blocks. II = InnerAI.getInstruction()->getIterator(); continue; } // Inline function at I, which also changes I to refer to the first // instruction inlined in the case that it succeeds. We purposely // process the inlined body after inlining, because the inlining may // have exposed new inlining opportunities beyond those present in // the inlined function when processed independently. LLVM_DEBUG(llvm::errs() << "Inlining @" << CalleeFunction->getName() << " into @" << InnerAI.getFunction()->getName() << "\n"); // If we intend to inline a thick function, then we need to balance the // reference counts for correctness. if (IsThick) { bool IsCalleeGuaranteed = PAI && PAI->getType().castTo<SILFunctionType>()->isCalleeGuaranteed(); fixupReferenceCounts(II, CalleeValue, CaptureArgs, IsCalleeGuaranteed); } // Decrement our iterator (carefully, to avoid going off the front) so it // is valid after inlining is done. Inlining deletes the apply, and can // introduce multiple new basic blocks. II = prev_or_default(II, ApplyBlock->begin(), ApplyBlock->end()); Inliner.inlineFunction(InnerAI, FullArgs); // We were able to inline successfully. Remove the apply. InnerAI.getInstruction()->eraseFromParent(); // Reestablish our iterator if it wrapped. if (II == ApplyBlock->end()) II = ApplyBlock->begin(); // Update the iterator when instructions are removed. DeleteInstructionsHandler DeletionHandler(II); // Now that the IR is correct, see if we can remove dead callee // computations (e.g. dead partial_apply closures). cleanupCalleeValue(CalleeValue, FullArgs); // Reposition iterators possibly invalidated by mutation. BI = SILFunction::iterator(ApplyBlock); IE = ApplyBlock->end(); assert(BI == SILFunction::iterator(II->getParent()) && "Mismatch between the instruction and basic block"); ++NumMandatoryInlines; } } // Keep track of full inlined functions so we don't waste time recursively // reprocessing them. FullyInlinedSet.insert(F); return true; }
/// Return true if inlining this call site is profitable. bool SILPerformanceInliner::isProfitableToInline(FullApplySite AI, unsigned loopDepthOfAI, DominanceAnalysis *DA, SILLoopAnalysis *LA, ConstantTracker &callerTracker, unsigned &NumCallerBlocks) { SILFunction *Callee = AI.getCalleeFunction(); if (Callee->getInlineStrategy() == AlwaysInline) return true; ConstantTracker constTracker(Callee, &callerTracker, AI); DominanceInfo *DT = DA->get(Callee); SILLoopInfo *LI = LA->get(Callee); DominanceOrder domOrder(&Callee->front(), DT, Callee->size()); // Calculate the inlining cost of the callee. unsigned CalleeCost = 0; unsigned Benefit = InlineCostThreshold > 0 ? InlineCostThreshold : RemovedCallBenefit; Benefit += loopDepthOfAI * LoopBenefitFactor; int testThreshold = TestThreshold; while (SILBasicBlock *block = domOrder.getNext()) { constTracker.beginBlock(); unsigned loopDepth = LI->getLoopDepth(block); for (SILInstruction &I : *block) { constTracker.trackInst(&I); auto ICost = instructionInlineCost(I); if (testThreshold >= 0) { // We are in test-mode: use a simplified cost model. CalleeCost += testCost(&I); } else { // Use the regular cost model. CalleeCost += unsigned(ICost); } if (ApplyInst *AI = dyn_cast<ApplyInst>(&I)) { // Check if the callee is passed as an argument. If so, increase the // threshold, because inlining will (probably) eliminate the closure. SILInstruction *def = constTracker.getDefInCaller(AI->getCallee()); if (def && (isa<FunctionRefInst>(def) || isa<PartialApplyInst>(def))) { DEBUG(llvm::dbgs() << " Boost: apply const function at" << *AI); Benefit += ConstCalleeBenefit + loopDepth * LoopBenefitFactor; testThreshold *= 2; } } } // Don't count costs in blocks which are dead after inlining. SILBasicBlock *takenBlock = getTakenBlock(block->getTerminator(), constTracker); if (takenBlock) { Benefit += ConstTerminatorBenefit + TestOpt; DEBUG(llvm::dbgs() << " Take bb" << takenBlock->getDebugID() << " of" << *block->getTerminator()); domOrder.pushChildrenIf(block, [=] (SILBasicBlock *child) { return child->getSinglePredecessor() != block || child == takenBlock; }); } else { domOrder.pushChildren(block); } } unsigned Threshold = Benefit; // The default. if (testThreshold >= 0) { // We are in testing mode. Threshold = testThreshold; } else if (AI.getFunction()->isThunk()) { // Only inline trivial functions into thunks (which will not increase the // code size). Threshold = TrivialFunctionThreshold; } else { // The default case. // We reduce the benefit if the caller is too large. For this we use a // cubic function on the number of caller blocks. This starts to prevent // inlining at about 800 - 1000 caller blocks. unsigned blockMinus = (NumCallerBlocks * NumCallerBlocks) / BlockLimitDenominator * NumCallerBlocks / BlockLimitDenominator; if (Threshold > blockMinus + TrivialFunctionThreshold) Threshold -= blockMinus; else Threshold = TrivialFunctionThreshold; } if (CalleeCost > Threshold) { DEBUG(llvm::dbgs() << " NO: Function too big to inline, " "cost: " << CalleeCost << ", threshold: " << Threshold << "\n"); return false; } DEBUG(llvm::dbgs() << " YES: ready to inline, " "cost: " << CalleeCost << ", threshold: " << Threshold << "\n"); NumCallerBlocks += Callee->size(); return true; }
/// Insert monomorphic inline caches for a specific class or metatype /// type \p SubClassTy. static FullApplySite speculateMonomorphicTarget(FullApplySite AI, SILType SubType, CheckedCastBranchInst *&CCBI) { CCBI = nullptr; // Bail if this class_method cannot be devirtualized. if (!canDevirtualizeClassMethod(AI, SubType)) return FullApplySite(); // Create a diamond shaped control flow and a checked_cast_branch // instruction that checks the exact type of the object. // This cast selects between two paths: one that calls the slow dynamic // dispatch and one that calls the specific method. auto It = AI.getInstruction()->getIterator(); SILFunction *F = AI.getFunction(); SILBasicBlock *Entry = AI.getParent(); // Iden is the basic block containing the direct call. SILBasicBlock *Iden = F->createBasicBlock(); // Virt is the block containing the slow virtual call. SILBasicBlock *Virt = F->createBasicBlock(); Iden->createBBArg(SubType); SILBasicBlock *Continue = Entry->splitBasicBlock(It); SILBuilderWithScope Builder(Entry, AI.getInstruction()); // Create the checked_cast_branch instruction that checks at runtime if the // class instance is identical to the SILType. ClassMethodInst *CMI = cast<ClassMethodInst>(AI.getCallee()); CCBI = Builder.createCheckedCastBranch(AI.getLoc(), /*exact*/ true, CMI->getOperand(), SubType, Iden, Virt); It = CCBI->getIterator(); SILBuilderWithScope VirtBuilder(Virt, AI.getInstruction()); SILBuilderWithScope IdenBuilder(Iden, AI.getInstruction()); // This is the class reference downcasted into subclass SubType. SILValue DownCastedClassInstance = Iden->getBBArg(0); // Copy the two apply instructions into the two blocks. FullApplySite IdenAI = CloneApply(AI, IdenBuilder); FullApplySite VirtAI = CloneApply(AI, VirtBuilder); // See if Continue has a release on self as the instruction right after the // apply. If it exists, move it into position in the diamond. if (auto *Release = dyn_cast<StrongReleaseInst>(std::next(Continue->begin()))) { if (Release->getOperand() == CMI->getOperand()) { VirtBuilder.createStrongRelease(Release->getLoc(), CMI->getOperand()); IdenBuilder.createStrongRelease(Release->getLoc(), DownCastedClassInstance); Release->eraseFromParent(); } } // Create a PHInode for returning the return value from both apply // instructions. SILArgument *Arg = Continue->createBBArg(AI.getType()); if (!isa<TryApplyInst>(AI)) { IdenBuilder.createBranch(AI.getLoc(), Continue, ArrayRef<SILValue>(IdenAI.getInstruction())); VirtBuilder.createBranch(AI.getLoc(), Continue, ArrayRef<SILValue>(VirtAI.getInstruction())); } // Remove the old Apply instruction. if (!isa<TryApplyInst>(AI)) AI.getInstruction()->replaceAllUsesWith(Arg); auto *OriginalBB = AI.getParent(); AI.getInstruction()->eraseFromParent(); if (OriginalBB->empty()) OriginalBB->removeFromParent(); // Update the stats. NumTargetsPredicted++; // Devirtualize the apply instruction on the identical path. auto NewInstPair = devirtualizeClassMethod(IdenAI, DownCastedClassInstance); assert(NewInstPair.first && "Expected to be able to devirtualize apply!"); replaceDeadApply(IdenAI, NewInstPair.first); // Split critical edges resulting from VirtAI. if (auto *TAI = dyn_cast<TryApplyInst>(VirtAI)) { auto *ErrorBB = TAI->getFunction()->createBasicBlock(); ErrorBB->createBBArg(TAI->getErrorBB()->getBBArg(0)->getType()); Builder.setInsertionPoint(ErrorBB); Builder.createBranch(TAI->getLoc(), TAI->getErrorBB(), {ErrorBB->getBBArg(0)}); auto *NormalBB = TAI->getFunction()->createBasicBlock(); NormalBB->createBBArg(TAI->getNormalBB()->getBBArg(0)->getType()); Builder.setInsertionPoint(NormalBB); Builder.createBranch(TAI->getLoc(), TAI->getNormalBB(), {NormalBB->getBBArg(0) }); Builder.setInsertionPoint(VirtAI.getInstruction()); SmallVector<SILValue, 4> Args; for (auto Arg : VirtAI.getArguments()) { Args.push_back(Arg); } FullApplySite NewVirtAI = Builder.createTryApply(VirtAI.getLoc(), VirtAI.getCallee(), VirtAI.getSubstCalleeSILType(), VirtAI.getSubstitutions(), Args, NormalBB, ErrorBB); VirtAI.getInstruction()->eraseFromParent(); VirtAI = NewVirtAI; } return VirtAI; }
/// \brief Inlines all mandatory inlined functions into the body of a function, /// first recursively inlining all mandatory apply instructions in those /// functions into their bodies if necessary. /// /// \param F the function to be processed /// \param AI nullptr if this is being called from the top level; the relevant /// ApplyInst requiring the recursive call when non-null /// \param FullyInlinedSet the set of all functions already known to be fully /// processed, to avoid processing them over again /// \param SetFactory an instance of ImmutableFunctionSet::Factory /// \param CurrentInliningSet the set of functions currently being inlined in /// the current call stack of recursive calls /// /// \returns true if successful, false if failed due to circular inlining. static bool runOnFunctionRecursively(SILFunction *F, FullApplySite AI, SILModule::LinkingMode Mode, DenseFunctionSet &FullyInlinedSet, ImmutableFunctionSet::Factory &SetFactory, ImmutableFunctionSet CurrentInliningSet, ClassHierarchyAnalysis *CHA) { // Avoid reprocessing functions needlessly. if (FullyInlinedSet.count(F)) return true; // Prevent attempt to circularly inline. if (CurrentInliningSet.contains(F)) { // This cannot happen on a top-level call, so AI should be non-null. assert(AI && "Cannot have circular inline without apply"); SILLocation L = AI.getLoc(); assert(L && "Must have location for transparent inline apply"); diagnose(F->getModule().getASTContext(), L.getStartSourceLoc(), diag::circular_transparent); return false; } // Add to the current inlining set (immutably, so we only affect the set // during this call and recursive subcalls). CurrentInliningSet = SetFactory.add(CurrentInliningSet, F); SmallVector<SILValue, 16> CaptureArgs; SmallVector<SILValue, 32> FullArgs; for (auto FI = F->begin(), FE = F->end(); FI != FE; ++FI) { for (auto I = FI->begin(), E = FI->end(); I != E; ++I) { FullApplySite InnerAI = FullApplySite::isa(&*I); if (!InnerAI) continue; auto *ApplyBlock = InnerAI.getParent(); auto NewInstPair = tryDevirtualizeApply(InnerAI, CHA); if (auto *NewInst = NewInstPair.first) { replaceDeadApply(InnerAI, NewInst); if (auto *II = dyn_cast<SILInstruction>(NewInst)) I = II->getIterator(); else I = NewInst->getParentBlock()->begin(); auto NewAI = FullApplySite::isa(NewInstPair.second.getInstruction()); if (!NewAI) continue; InnerAI = NewAI; } SILLocation Loc = InnerAI.getLoc(); SILValue CalleeValue = InnerAI.getCallee(); bool IsThick; PartialApplyInst *PAI; SILFunction *CalleeFunction = getCalleeFunction(InnerAI, IsThick, CaptureArgs, FullArgs, PAI, Mode); if (!CalleeFunction || CalleeFunction->isTransparent() == IsNotTransparent) continue; if (F->isFragile() && !CalleeFunction->hasValidLinkageForFragileRef()) { if (!CalleeFunction->hasValidLinkageForFragileInline()) { llvm::errs() << "caller: " << F->getName() << "\n"; llvm::errs() << "callee: " << CalleeFunction->getName() << "\n"; llvm_unreachable("Should never be inlining a resilient function into " "a fragile function"); } continue; } // Then recursively process it first before trying to inline it. if (!runOnFunctionRecursively(CalleeFunction, InnerAI, Mode, FullyInlinedSet, SetFactory, CurrentInliningSet, CHA)) { // If we failed due to circular inlining, then emit some notes to // trace back the failure if we have more information. // FIXME: possibly it could be worth recovering and attempting other // inlines within this same recursive call rather than simply // propagating the failure. if (AI) { SILLocation L = AI.getLoc(); assert(L && "Must have location for transparent inline apply"); diagnose(F->getModule().getASTContext(), L.getStartSourceLoc(), diag::note_while_inlining); } return false; } // Inline function at I, which also changes I to refer to the first // instruction inlined in the case that it succeeds. We purposely // process the inlined body after inlining, because the inlining may // have exposed new inlining opportunities beyond those present in // the inlined function when processed independently. DEBUG(llvm::errs() << "Inlining @" << CalleeFunction->getName() << " into @" << InnerAI.getFunction()->getName() << "\n"); // If we intend to inline a thick function, then we need to balance the // reference counts for correctness. if (IsThick && I != ApplyBlock->begin()) { // We need to find an appropriate location for our fix up code // We used to do this after inlining Without any modifications // This caused us to add a release in a wrong place: // It would release a value *before* retaining it! // It is really problematic to do this after inlining - // Finding a valid insertion point is tricky: // Inlining might add new basic blocks and/or remove the apply // We want to add the fix up *just before* where the current apply is! // Unfortunately, we *can't* add the fix up code here: // Inlining might fail for any reason - // If that occurred we'd need to undo our fix up code. // Instead, we split the current basic block - // Making sure we have a basic block that starts with our apply. SILBuilderWithScope B(I); ApplyBlock = splitBasicBlockAndBranch(B, &*I, nullptr, nullptr); I = ApplyBlock->begin(); } // Decrement our iterator (carefully, to avoid going off the front) so it // is valid after inlining is done. Inlining deletes the apply, and can // introduce multiple new basic blocks. if (I != ApplyBlock->begin()) --I; else I = ApplyBlock->end(); std::vector<Substitution> ApplySubs(InnerAI.getSubstitutions()); if (PAI) { auto PAISubs = PAI->getSubstitutions(); ApplySubs.insert(ApplySubs.end(), PAISubs.begin(), PAISubs.end()); } SILOpenedArchetypesTracker OpenedArchetypesTracker(*F); F->getModule().registerDeleteNotificationHandler( &OpenedArchetypesTracker); // The callee only needs to know about opened archetypes used in // the substitution list. OpenedArchetypesTracker.registerUsedOpenedArchetypes(InnerAI.getInstruction()); if (PAI) { OpenedArchetypesTracker.registerUsedOpenedArchetypes(PAI); } SILInliner Inliner(*F, *CalleeFunction, SILInliner::InlineKind::MandatoryInline, ApplySubs, OpenedArchetypesTracker); if (!Inliner.inlineFunction(InnerAI, FullArgs)) { I = InnerAI.getInstruction()->getIterator(); continue; } // Inlining was successful. Remove the apply. InnerAI.getInstruction()->eraseFromParent(); // Reestablish our iterator if it wrapped. if (I == ApplyBlock->end()) I = ApplyBlock->begin(); // Update the iterator when instructions are removed. DeleteInstructionsHandler DeletionHandler(I); // If the inlined apply was a thick function, then we need to balance the // reference counts for correctness. if (IsThick) fixupReferenceCounts(I, Loc, CalleeValue, CaptureArgs); // Now that the IR is correct, see if we can remove dead callee // computations (e.g. dead partial_apply closures). cleanupCalleeValue(CalleeValue, CaptureArgs, FullArgs); // Reposition iterators possibly invalidated by mutation. FI = SILFunction::iterator(ApplyBlock); E = ApplyBlock->end(); assert(FI == SILFunction::iterator(I->getParent()) && "Mismatch between the instruction and basic block"); ++NumMandatoryInlines; } } // Keep track of full inlined functions so we don't waste time recursively // reprocessing them. FullyInlinedSet.insert(F); return true; }
/// \brief Inlines all mandatory inlined functions into the body of a function, /// first recursively inlining all mandatory apply instructions in those /// functions into their bodies if necessary. /// /// \param F the function to be processed /// \param AI nullptr if this is being called from the top level; the relevant /// ApplyInst requiring the recursive call when non-null /// \param FullyInlinedSet the set of all functions already known to be fully /// processed, to avoid processing them over again /// \param SetFactory an instance of ImmutableFunctionSet::Factory /// \param CurrentInliningSet the set of functions currently being inlined in /// the current call stack of recursive calls /// /// \returns true if successful, false if failed due to circular inlining. static bool runOnFunctionRecursively(SILFunction *F, FullApplySite AI, SILModule::LinkingMode Mode, DenseFunctionSet &FullyInlinedSet, ImmutableFunctionSet::Factory &SetFactory, ImmutableFunctionSet CurrentInliningSet, ClassHierarchyAnalysis *CHA) { // Avoid reprocessing functions needlessly. if (FullyInlinedSet.count(F)) return true; // Prevent attempt to circularly inline. if (CurrentInliningSet.contains(F)) { // This cannot happen on a top-level call, so AI should be non-null. assert(AI && "Cannot have circular inline without apply"); SILLocation L = AI.getLoc(); assert(L && "Must have location for transparent inline apply"); diagnose(F->getModule().getASTContext(), L.getStartSourceLoc(), diag::circular_transparent); return false; } // Add to the current inlining set (immutably, so we only affect the set // during this call and recursive subcalls). CurrentInliningSet = SetFactory.add(CurrentInliningSet, F); SmallVector<SILValue, 16> CaptureArgs; SmallVector<SILValue, 32> FullArgs; for (auto FI = F->begin(), FE = F->end(); FI != FE; ++FI) { for (auto I = FI->begin(), E = FI->end(); I != E; ++I) { FullApplySite InnerAI = FullApplySite::isa(&*I); if (!InnerAI) continue; auto *ApplyBlock = InnerAI.getParent(); auto NewInstPair = tryDevirtualizeApply(InnerAI, CHA); if (auto *NewInst = NewInstPair.first) { replaceDeadApply(InnerAI, NewInst); if (auto *II = dyn_cast<SILInstruction>(NewInst)) I = II->getIterator(); else I = NewInst->getParentBB()->begin(); auto NewAI = FullApplySite::isa(NewInstPair.second.getInstruction()); if (!NewAI) continue; InnerAI = NewAI; } SILLocation Loc = InnerAI.getLoc(); SILValue CalleeValue = InnerAI.getCallee(); bool IsThick; PartialApplyInst *PAI; SILFunction *CalleeFunction = getCalleeFunction(InnerAI, IsThick, CaptureArgs, FullArgs, PAI, Mode); if (!CalleeFunction || CalleeFunction->isTransparent() == IsNotTransparent) continue; // Then recursively process it first before trying to inline it. if (!runOnFunctionRecursively(CalleeFunction, InnerAI, Mode, FullyInlinedSet, SetFactory, CurrentInliningSet, CHA)) { // If we failed due to circular inlining, then emit some notes to // trace back the failure if we have more information. // FIXME: possibly it could be worth recovering and attempting other // inlines within this same recursive call rather than simply // propagating the failure. if (AI) { SILLocation L = AI.getLoc(); assert(L && "Must have location for transparent inline apply"); diagnose(F->getModule().getASTContext(), L.getStartSourceLoc(), diag::note_while_inlining); } return false; } // Inline function at I, which also changes I to refer to the first // instruction inlined in the case that it succeeds. We purposely // process the inlined body after inlining, because the inlining may // have exposed new inlining opportunities beyond those present in // the inlined function when processed independently. DEBUG(llvm::errs() << "Inlining @" << CalleeFunction->getName() << " into @" << InnerAI.getFunction()->getName() << "\n"); // Decrement our iterator (carefully, to avoid going off the front) so it // is valid after inlining is done. Inlining deletes the apply, and can // introduce multiple new basic blocks. if (I != ApplyBlock->begin()) --I; else I = ApplyBlock->end(); TypeSubstitutionMap ContextSubs; std::vector<Substitution> ApplySubs(InnerAI.getSubstitutions()); if (PAI) { auto PAISubs = PAI->getSubstitutions(); ApplySubs.insert(ApplySubs.end(), PAISubs.begin(), PAISubs.end()); } ContextSubs.copyFrom(CalleeFunction->getContextGenericParams() ->getSubstitutionMap(ApplySubs)); SILInliner Inliner(*F, *CalleeFunction, SILInliner::InlineKind::MandatoryInline, ContextSubs, ApplySubs); if (!Inliner.inlineFunction(InnerAI, FullArgs)) { I = InnerAI.getInstruction()->getIterator(); continue; } // Inlining was successful. Remove the apply. InnerAI.getInstruction()->eraseFromParent(); // Reestablish our iterator if it wrapped. if (I == ApplyBlock->end()) I = ApplyBlock->begin(); else ++I; // If the inlined apply was a thick function, then we need to balance the // reference counts for correctness. if (IsThick) fixupReferenceCounts(I, Loc, CalleeValue, CaptureArgs); // Now that the IR is correct, see if we can remove dead callee // computations (e.g. dead partial_apply closures). cleanupCalleeValue(CalleeValue, CaptureArgs, FullArgs); // Reposition iterators possibly invalidated by mutation. FI = SILFunction::iterator(ApplyBlock); I = ApplyBlock->begin(); E = ApplyBlock->end(); ++NumMandatoryInlines; } } // Keep track of full inlined functions so we don't waste time recursively // reprocessing them. FullyInlinedSet.insert(F); return true; }
/// \brief Check if it is possible to devirtualize an Apply instruction /// and a class member obtained using the class_method instruction into /// a direct call to a specific member of a specific class. /// /// \p AI is the apply to devirtualize. /// \p ClassOrMetatypeType is the class type or metatype type we are /// devirtualizing for. /// return true if it is possible to devirtualize, false - otherwise. bool swift::canDevirtualizeClassMethod(FullApplySite AI, SILType ClassOrMetatypeType) { DEBUG(llvm::dbgs() << " Trying to devirtualize : " << *AI.getInstruction()); SILModule &Mod = AI.getModule(); // Bail if any generic types parameters of the class instance type are // unbound. // We cannot devirtualize unbound generic calls yet. if (isClassWithUnboundGenericParameters(ClassOrMetatypeType, Mod)) return false; // First attempt to lookup the origin for our class method. The origin should // either be a metatype or an alloc_ref. DEBUG(llvm::dbgs() << " Origin Type: " << ClassOrMetatypeType); auto *CMI = cast<ClassMethodInst>(AI.getCallee()); // Find the implementation of the member which should be invoked. auto *F = getTargetClassMethod(Mod, ClassOrMetatypeType, CMI->getMember()); // If we do not find any such function, we have no function to devirtualize // to... so bail. if (!F) { DEBUG(llvm::dbgs() << " FAIL: Could not find matching VTable or " "vtable method for this class.\n"); return false; } if (AI.getFunction()->isFragile()) { // function_ref inside fragile function cannot reference a private or // hidden symbol. if (!(F->isFragile() || isValidLinkageForFragileRef(F->getLinkage()) || F->isExternalDeclaration())) return false; } CanSILFunctionType GenCalleeType = F->getLoweredFunctionType(); auto Subs = getSubstitutionsForCallee(Mod, GenCalleeType, ClassOrMetatypeType, AI); // For polymorphic functions, bail if the number of substitutions is // not the same as the number of expected generic parameters. if (GenCalleeType->isPolymorphic()) { auto GenericSig = GenCalleeType->getGenericSignature(); // Get the number of expected generic parameters, which // is a sum of the number of explicit generic parameters // and the number of their recursive member types exposed // through protocol requirements. auto DepTypes = GenericSig->getAllDependentTypes(); unsigned ExpectedGenParamsNum = 0; for (auto DT: DepTypes) { (void)DT; ExpectedGenParamsNum++; } if (ExpectedGenParamsNum != Subs.size()) return false; } // Check if the optimizer knows how to cast the return type. CanSILFunctionType SubstCalleeType = GenCalleeType; if (GenCalleeType->isPolymorphic()) SubstCalleeType = GenCalleeType->substGenericArgs(Mod, Mod.getSwiftModule(), Subs); // If we have a direct return type, make sure we use the subst callee return // type. If we have an indirect return type, AI's return type of the empty // tuple should be ok. SILType ReturnType = AI.getType(); if (!SubstCalleeType->hasIndirectResult()) { ReturnType = SubstCalleeType->getSILResult(); } if (!canCastValueToABICompatibleType(Mod, ReturnType, AI.getType())) return false; return true; }
/// \brief Devirtualize an apply of a class method. /// /// \p AI is the apply to devirtualize. /// \p ClassOrMetatype is a class value or metatype value that is the /// self argument of the apply we will devirtualize. /// return the result value of the new ApplyInst if created one or null. FullApplySite swift::devirtualizeClassMethod(FullApplySite AI, SILValue ClassOrMetatype, OptRemark::Emitter *ORE) { LLVM_DEBUG(llvm::dbgs() << " Trying to devirtualize : " << *AI.getInstruction()); SILModule &Mod = AI.getModule(); auto *MI = cast<MethodInst>(AI.getCallee()); auto ClassOrMetatypeType = ClassOrMetatype->getType(); auto *F = getTargetClassMethod(Mod, ClassOrMetatypeType, MI); CanSILFunctionType GenCalleeType = F->getLoweredFunctionType(); SubstitutionMap Subs = getSubstitutionsForCallee(Mod, GenCalleeType, ClassOrMetatypeType.getASTType(), AI); CanSILFunctionType SubstCalleeType = GenCalleeType; if (GenCalleeType->isPolymorphic()) SubstCalleeType = GenCalleeType->substGenericArgs(Mod, Subs); SILFunctionConventions substConv(SubstCalleeType, Mod); SILBuilderWithScope B(AI.getInstruction()); SILLocation Loc = AI.getLoc(); FunctionRefInst *FRI = B.createFunctionRef(Loc, F); // Create the argument list for the new apply, casting when needed // in order to handle covariant indirect return types and // contravariant argument types. llvm::SmallVector<SILValue, 8> NewArgs; auto IndirectResultArgIter = AI.getIndirectSILResults().begin(); for (auto ResultTy : substConv.getIndirectSILResultTypes()) { NewArgs.push_back( castValueToABICompatibleType(&B, Loc, *IndirectResultArgIter, IndirectResultArgIter->getType(), ResultTy)); ++IndirectResultArgIter; } auto ParamArgIter = AI.getArgumentsWithoutIndirectResults().begin(); // Skip the last parameter, which is `self`. Add it below. for (auto param : substConv.getParameters().drop_back()) { auto paramType = substConv.getSILType(param); NewArgs.push_back( castValueToABICompatibleType(&B, Loc, *ParamArgIter, ParamArgIter->getType(), paramType)); ++ParamArgIter; } // Add the self argument, upcasting if required because we're // calling a base class's method. auto SelfParamTy = substConv.getSILType(SubstCalleeType->getSelfParameter()); NewArgs.push_back(castValueToABICompatibleType(&B, Loc, ClassOrMetatype, ClassOrMetatypeType, SelfParamTy)); ApplySite NewAS = replaceApplySite(B, Loc, AI, FRI, Subs, NewArgs, substConv); FullApplySite NewAI = FullApplySite::isa(NewAS.getInstruction()); assert(NewAI); LLVM_DEBUG(llvm::dbgs() << " SUCCESS: " << F->getName() << "\n"); if (ORE) ORE->emit([&]() { using namespace OptRemark; return RemarkPassed("ClassMethodDevirtualized", *AI.getInstruction()) << "Devirtualized call to class method " << NV("Method", F); }); NumClassDevirt++; return NewAI; }
/// Attempt to devirtualize the given apply if possible, and return a /// new instruction in that case, or nullptr otherwise. ApplySite swift::tryDevirtualizeApply(ApplySite AI, ClassHierarchyAnalysis *CHA, OptRemark::Emitter *ORE) { LLVM_DEBUG(llvm::dbgs() << " Trying to devirtualize: " << *AI.getInstruction()); // Devirtualize apply instructions that call witness_method instructions: // // %8 = witness_method $Optional<UInt16>, #LogicValue.boolValue!getter.1 // %9 = apply %8<Self = CodeUnit?>(%6#1) : ... // if (isa<WitnessMethodInst>(AI.getCallee())) return tryDevirtualizeWitnessMethod(AI, ORE); // TODO: check if we can also de-virtualize partial applies of class methods. FullApplySite FAS = FullApplySite::isa(AI.getInstruction()); if (!FAS) return ApplySite(); /// Optimize a class_method and alloc_ref pair into a direct function /// reference: /// /// \code /// %XX = alloc_ref $Foo /// %YY = class_method %XX : $Foo, #Foo.get!1 : $@convention(method)... /// \endcode /// /// or /// /// %XX = metatype $... /// %YY = class_method %XX : ... /// /// into /// /// %YY = function_ref @... if (auto *CMI = dyn_cast<ClassMethodInst>(FAS.getCallee())) { auto &M = FAS.getModule(); auto Instance = stripUpCasts(CMI->getOperand()); auto ClassType = Instance->getType(); if (ClassType.is<MetatypeType>()) ClassType = ClassType.getMetatypeInstanceType(M); auto *CD = ClassType.getClassOrBoundGenericClass(); if (isEffectivelyFinalMethod(FAS, ClassType, CD, CHA)) return tryDevirtualizeClassMethod(FAS, Instance, ORE, true /*isEffectivelyFinalMethod*/); // Try to check if the exact dynamic type of the instance is statically // known. if (auto Instance = getInstanceWithExactDynamicType(CMI->getOperand(), CMI->getModule(), CHA)) return tryDevirtualizeClassMethod(FAS, Instance, ORE); if (auto ExactTy = getExactDynamicType(CMI->getOperand(), CMI->getModule(), CHA)) { if (ExactTy == CMI->getOperand()->getType()) return tryDevirtualizeClassMethod(FAS, CMI->getOperand(), ORE); } } if (isa<SuperMethodInst>(FAS.getCallee())) { if (FAS.hasSelfArgument()) { return tryDevirtualizeClassMethod(FAS, FAS.getSelfArgument(), ORE); } // It is an invocation of a class method. // Last operand is the metatype that should be used for dispatching. return tryDevirtualizeClassMethod(FAS, FAS.getArguments().back(), ORE); } return ApplySite(); }
bool SILPerformanceInliner::isProfitableToInline(FullApplySite AI, Weight CallerWeight, ConstantTracker &callerTracker, int &NumCallerBlocks, bool IsGeneric) { SILFunction *Callee = AI.getReferencedFunction(); SILLoopInfo *LI = LA->get(Callee); ShortestPathAnalysis *SPA = getSPA(Callee, LI); assert(SPA->isValid()); ConstantTracker constTracker(Callee, &callerTracker, AI); DominanceInfo *DT = DA->get(Callee); SILBasicBlock *CalleeEntry = &Callee->front(); DominanceOrder domOrder(CalleeEntry, DT, Callee->size()); // Calculate the inlining cost of the callee. int CalleeCost = 0; int Benefit = 0; // Start with a base benefit. int BaseBenefit = RemovedCallBenefit; const SILOptions &Opts = Callee->getModule().getOptions(); // For some reason -Ounchecked can accept a higher base benefit without // increasing the code size too much. if (Opts.Optimization == SILOptions::SILOptMode::OptimizeUnchecked) BaseBenefit *= 2; CallerWeight.updateBenefit(Benefit, BaseBenefit); // Go through all blocks of the function, accumulate the cost and find // benefits. while (SILBasicBlock *block = domOrder.getNext()) { constTracker.beginBlock(); Weight BlockW = SPA->getWeight(block, CallerWeight); for (SILInstruction &I : *block) { constTracker.trackInst(&I); CalleeCost += (int)instructionInlineCost(I); if (FullApplySite AI = FullApplySite::isa(&I)) { // Check if the callee is passed as an argument. If so, increase the // threshold, because inlining will (probably) eliminate the closure. SILInstruction *def = constTracker.getDefInCaller(AI.getCallee()); if (def && (isa<FunctionRefInst>(def) || isa<PartialApplyInst>(def))) BlockW.updateBenefit(Benefit, RemovedClosureBenefit); } else if (auto *LI = dyn_cast<LoadInst>(&I)) { // Check if it's a load from a stack location in the caller. Such a load // might be optimized away if inlined. if (constTracker.isStackAddrInCaller(LI->getOperand())) BlockW.updateBenefit(Benefit, RemovedLoadBenefit); } else if (auto *SI = dyn_cast<StoreInst>(&I)) { // Check if it's a store to a stack location in the caller. Such a load // might be optimized away if inlined. if (constTracker.isStackAddrInCaller(SI->getDest())) BlockW.updateBenefit(Benefit, RemovedStoreBenefit); } else if (isa<StrongReleaseInst>(&I) || isa<ReleaseValueInst>(&I)) { SILValue Op = stripCasts(I.getOperand(0)); if (SILArgument *Arg = dyn_cast<SILArgument>(Op)) { if (Arg->isFunctionArg() && Arg->getArgumentConvention() == SILArgumentConvention::Direct_Guaranteed) { BlockW.updateBenefit(Benefit, RefCountBenefit); } } } else if (auto *BI = dyn_cast<BuiltinInst>(&I)) { if (BI->getBuiltinInfo().ID == BuiltinValueKind::OnFastPath) BlockW.updateBenefit(Benefit, FastPathBuiltinBenefit); } } // Don't count costs in blocks which are dead after inlining. SILBasicBlock *takenBlock = constTracker.getTakenBlock(block->getTerminator()); if (takenBlock) { BlockW.updateBenefit(Benefit, RemovedTerminatorBenefit); domOrder.pushChildrenIf(block, [=] (SILBasicBlock *child) { return child->getSinglePredecessor() != block || child == takenBlock; }); } else { domOrder.pushChildren(block); } } if (AI.getFunction()->isThunk()) { // Only inline trivial functions into thunks (which will not increase the // code size). if (CalleeCost > TrivialFunctionThreshold) return false; DEBUG( dumpCaller(AI.getFunction()); llvm::dbgs() << " decision {" << CalleeCost << " into thunk} " << Callee->getName() << '\n'; ); return true; }
/// \brief Devirtualize an apply of a class method. /// /// \p AI is the apply to devirtualize. /// \p ClassOrMetatype is a class value or metatype value that is the /// self argument of the apply we will devirtualize. /// return the result value of the new ApplyInst if created one or null. DevirtualizationResult swift::devirtualizeClassMethod(FullApplySite AI, SILValue ClassOrMetatype) { DEBUG(llvm::dbgs() << " Trying to devirtualize : " << *AI.getInstruction()); SILModule &Mod = AI.getModule(); auto *CMI = cast<ClassMethodInst>(AI.getCallee()); auto ClassOrMetatypeType = ClassOrMetatype.getType(); auto *F = getTargetClassMethod(Mod, ClassOrMetatypeType, CMI->getMember()); CanSILFunctionType GenCalleeType = F->getLoweredFunctionType(); auto Subs = getSubstitutionsForCallee(Mod, GenCalleeType, ClassOrMetatypeType, AI); CanSILFunctionType SubstCalleeType = GenCalleeType; if (GenCalleeType->isPolymorphic()) SubstCalleeType = GenCalleeType->substGenericArgs(Mod, Mod.getSwiftModule(), Subs); SILBuilderWithScope B(AI.getInstruction()); FunctionRefInst *FRI = B.createFunctionRef(AI.getLoc(), F); // Create the argument list for the new apply, casting when needed // in order to handle covariant indirect return types and // contravariant argument types. llvm::SmallVector<SILValue, 8> NewArgs; auto Args = AI.getArguments(); auto ParamTypes = SubstCalleeType->getParameterSILTypes(); for (unsigned i = 0, e = Args.size() - 1; i != e; ++i) NewArgs.push_back(castValueToABICompatibleType(&B, AI.getLoc(), Args[i], Args[i].getType(), ParamTypes[i]).getValue()); // Add the self argument, upcasting if required because we're // calling a base class's method. auto SelfParamTy = SubstCalleeType->getSelfParameter().getSILType(); NewArgs.push_back(castValueToABICompatibleType(&B, AI.getLoc(), ClassOrMetatype, ClassOrMetatypeType, SelfParamTy).getValue()); // If we have a direct return type, make sure we use the subst callee return // type. If we have an indirect return type, AI's return type of the empty // tuple should be ok. SILType ResultTy = AI.getType(); if (!SubstCalleeType->hasIndirectResult()) { ResultTy = SubstCalleeType->getSILResult(); } SILType SubstCalleeSILType = SILType::getPrimitiveObjectType(SubstCalleeType); FullApplySite NewAI; SILBasicBlock *ResultBB = nullptr; SILBasicBlock *NormalBB = nullptr; SILValue ResultValue; bool ResultCastRequired = false; SmallVector<Operand *, 4> OriginalResultUses; if (!isa<TryApplyInst>(AI)) { NewAI = B.createApply(AI.getLoc(), FRI, SubstCalleeSILType, ResultTy, Subs, NewArgs, cast<ApplyInst>(AI)->isNonThrowing()); ResultValue = SILValue(NewAI.getInstruction(), 0); } else { auto *TAI = cast<TryApplyInst>(AI); // Create new normal and error BBs only if: // - re-using a BB would create a critical edge // - or, the result of the new apply would be of different // type than the argument of the original normal BB. if (TAI->getNormalBB()->getSinglePredecessor()) ResultBB = TAI->getNormalBB(); else { ResultBB = B.getFunction().createBasicBlock(); ResultBB->createBBArg(ResultTy); } NormalBB = TAI->getNormalBB(); SILBasicBlock *ErrorBB = nullptr; if (TAI->getErrorBB()->getSinglePredecessor()) ErrorBB = TAI->getErrorBB(); else { ErrorBB = B.getFunction().createBasicBlock(); ErrorBB->createBBArg(TAI->getErrorBB()->getBBArg(0)->getType()); } NewAI = B.createTryApply(AI.getLoc(), FRI, SubstCalleeSILType, Subs, NewArgs, ResultBB, ErrorBB); if (ErrorBB != TAI->getErrorBB()) { B.setInsertionPoint(ErrorBB); B.createBranch(TAI->getLoc(), TAI->getErrorBB(), {ErrorBB->getBBArg(0)}); } // Does the result value need to be casted? ResultCastRequired = ResultTy != NormalBB->getBBArg(0)->getType(); if (ResultBB != NormalBB) B.setInsertionPoint(ResultBB); else if (ResultCastRequired) { B.setInsertionPoint(NormalBB->begin()); // Collect all uses, before casting. for (auto *Use : NormalBB->getBBArg(0)->getUses()) { OriginalResultUses.push_back(Use); } NormalBB->getBBArg(0)->replaceAllUsesWith(SILUndef::get(AI.getType(), Mod)); NormalBB->replaceBBArg(0, ResultTy, nullptr); } // The result value is passed as a parameter to the normal block. ResultValue = ResultBB->getBBArg(0); } // Check if any casting is required for the return value. ResultValue = castValueToABICompatibleType(&B, NewAI.getLoc(), ResultValue, ResultTy, AI.getType()).getValue(); DEBUG(llvm::dbgs() << " SUCCESS: " << F->getName() << "\n"); NumClassDevirt++; if (NormalBB) { if (NormalBB != ResultBB) { // If artificial normal BB was introduced, branch // to the original normal BB. B.createBranch(NewAI.getLoc(), NormalBB, { ResultValue }); } else if (ResultCastRequired) { // Update all original uses by the new value. for(auto *Use: OriginalResultUses) { Use->set(ResultValue); } } return std::make_pair(NewAI.getInstruction(), NewAI); } // We need to return a pair of values here: // - the first one is the actual result of the devirtualized call, possibly // casted into an appropriate type. This SILValue may be a BB arg, if it // was a cast between optional types. // - the second one is the new apply site. return std::make_pair(ResultValue.getDef(), NewAI); }
/// \brief Returns true, if a method implementation to be called by the /// default case handler of a speculative devirtualization is statically /// known. This happens if it can be proven that generated /// checked_cast_br instructions cover all other possible cases. /// /// \p CHA class hierarchy analysis to be used /// \p AI invocation instruction /// \p CD static class of the instance whose method is being invoked /// \p Subs set of direct subclasses of this class static bool isDefaultCaseKnown(ClassHierarchyAnalysis *CHA, FullApplySite AI, ClassDecl *CD, ClassHierarchyAnalysis::ClassList &Subs) { ClassMethodInst *CMI = cast<ClassMethodInst>(AI.getCallee()); auto *Method = CMI->getMember().getFuncDecl(); const DeclContext *DC = AI.getModule().getAssociatedContext(); if (CD->isFinal()) return true; // Without an associated context we cannot perform any // access-based optimizations. if (!DC) return false; // Only handle classes defined within the SILModule's associated context. if (!CD->isChildContextOf(DC)) return false; if (!CD->hasAccessibility()) return false; // Only consider 'private' members, unless we are in whole-module compilation. switch (CD->getEffectiveAccess()) { case Accessibility::Public: return false; case Accessibility::Internal: if (!AI.getModule().isWholeModule()) return false; break; case Accessibility::Private: break; } // This is a private or a module internal class. // // We can analyze the class hierarchy rooted at it and // eventually devirtualize a method call more efficiently. // First, analyze all direct subclasses. // We know that a dedicated checked_cast_br check is // generated for each direct subclass by tryToSpeculateTarget. for (auto S : Subs) { // Check if the subclass overrides a method auto *FD = S->findOverridingDecl(Method); if (!FD) continue; if (CHA->hasKnownDirectSubclasses(S)) { // This subclass has its own subclasses and // they will use this implementation or provide // their own. In either case it is not covered by // checked_cast_br instructions generated by // tryToSpeculateTarget. Therefore it increases // the number of remaining cases to be handled // by the default case handler. return false; } } // Then, analyze indirect subclasses. // Set of indirect subclasses for the class. auto &IndirectSubs = CHA->getIndirectSubClasses(CD); // Check if any indirect subclasses use an implementation // of the method different from the implementation in // the current class. If this is the case, then such // an indirect subclass would need a dedicated // checked_cast_br check to be devirtualized. But this is // not done by tryToSpeculateTarget yet and therefore // such a subclass should be handled by the "default" // case handler, which essentially means that "default" // case cannot be devirtualized since it covers more // then one alternative. for (auto S : IndirectSubs) { auto *ImplFD = S->findImplementingMethod(Method); if (ImplFD != Method) { // Different implementation is used by a subclass. // Therefore, the default case is not known. return false; } } return true; }
// Return the list of functions that can be called via the given apply // site. CalleeList CalleeCache::getCalleeList(FullApplySite FAS) const { return getCalleeListForCalleeKind(FAS.getCallee()); }
/// \brief Try to speculate the call target for the call \p AI. This function /// returns true if a change was made. static bool tryToSpeculateTarget(FullApplySite AI, ClassHierarchyAnalysis *CHA) { ClassMethodInst *CMI = cast<ClassMethodInst>(AI.getCallee()); // We cannot devirtualize in cases where dynamic calls are // semantically required. if (CMI->isVolatile()) return false; // Strip any upcasts off of our 'self' value, potentially leaving us // with a value whose type is closer (in the class hierarchy) to the // actual dynamic type. auto SubTypeValue = CMI->getOperand().stripUpCasts(); SILType SubType = SubTypeValue.getType(); // Bail if any generic types parameters of the class instance type are // unbound. // We cannot devirtualize unbound generic calls yet. if (isNominalTypeWithUnboundGenericParameters(SubType, AI.getModule())) return false; auto &M = CMI->getModule(); auto ClassType = SubType; if (SubType.is<MetatypeType>()) ClassType = SubType.getMetatypeInstanceType(M); CheckedCastBranchInst *LastCCBI = nullptr; ClassDecl *CD = ClassType.getClassOrBoundGenericClass(); assert(CD && "Expected decl for class type!"); if (!CHA->hasKnownDirectSubclasses(CD)) { // If there is only one possible alternative for this method, // try to devirtualize it completely. ClassHierarchyAnalysis::ClassList Subs; if (isDefaultCaseKnown(CHA, AI, CD, Subs)) { auto NewInstPair = tryDevirtualizeClassMethod(AI, SubTypeValue); if (NewInstPair.first) replaceDeadApply(AI, NewInstPair.first); return NewInstPair.second.getInstruction() != nullptr; } DEBUG(llvm::dbgs() << "Inserting monomorphic speculative call for class " << CD->getName() << "\n"); return !!speculateMonomorphicTarget(AI, SubType, LastCCBI); } // True if any instructions were changed or generated. bool Changed = false; // Collect the direct and indirect subclasses for the class. // Sort these subclasses in the order they should be tested by the // speculative devirtualization. Different strategies could be used, // E.g. breadth-first, depth-first, etc. // Currently, let's use the breadth-first strategy. // The exact static type of the instance should be tested first. auto &DirectSubs = CHA->getDirectSubClasses(CD); auto &IndirectSubs = CHA->getIndirectSubClasses(CD); SmallVector<ClassDecl *, 8> Subs(DirectSubs); Subs.append(IndirectSubs.begin(), IndirectSubs.end()); if (isa<BoundGenericClassType>(ClassType.getSwiftRValueType())) { // Filter out any subclassses that do not inherit from this // specific bound class. auto RemovedIt = std::remove_if(Subs.begin(), Subs.end(), [&ClassType, &M](ClassDecl *Sub){ auto SubCanTy = Sub->getDeclaredType()->getCanonicalType(); // Unbound generic type can override a method from // a bound generic class, but this unbound generic // class is not considered to be a subclass of a // bound generic class in a general case. if (isa<UnboundGenericType>(SubCanTy)) return false; // Handle the usual case here: the class in question // should be a real subclass of a bound generic class. return !ClassType.isSuperclassOf( SILType::getPrimitiveObjectType(SubCanTy)); }); Subs.erase(RemovedIt, Subs.end()); } // Number of subclasses which cannot be handled by checked_cast_br checks. int NotHandledSubsNum = 0; if (Subs.size() > MaxNumSpeculativeTargets) { DEBUG(llvm::dbgs() << "Class " << CD->getName() << " has too many (" << Subs.size() << ") subclasses. Performing speculative " "devirtualization only for the first " << MaxNumSpeculativeTargets << " of them.\n"); NotHandledSubsNum += (Subs.size() - MaxNumSpeculativeTargets); Subs.erase(&Subs[MaxNumSpeculativeTargets], Subs.end()); } DEBUG(llvm::dbgs() << "Class " << CD->getName() << " is a superclass. " "Inserting polymorphic speculative call.\n"); // Try to devirtualize the static class of instance // if it is possible. auto FirstAI = speculateMonomorphicTarget(AI, SubType, LastCCBI); if (FirstAI) { Changed = true; AI = FirstAI; } // Perform a speculative devirtualization of a method invocation. // It replaces an indirect class_method-based call by a code to perform // a direct call of the method implementation based on the dynamic class // of the instance. // // The code is generated according to the following principles: // // - For each direct subclass, a dedicated checked_cast_br instruction // is generated to check if a dynamic class of the instance is exactly // this subclass. // // - If this check succeeds, then it jumps to the code which performs a // direct call of a method implementation specific to this subclass. // // - If this check fails, then a different subclass is checked by means of // checked_cast_br in a similar way. // // - Finally, if the instance does not exactly match any of the direct // subclasses, the "default" case code is generated, which should handle // all remaining alternatives, i.e. it should be able to dispatch to any // possible remaining method implementations. Typically this is achieved by // using a class_method instruction, which performs an indirect invocation. // But if it can be proven that only one specific implementation of // a method will be always invoked by this code, then a class_method-based // call can be devirtualized and replaced by a more efficient direct // invocation of this specific method implementation. // // Remark: With the current implementation of a speculative devirtualization, // if devirtualization of the "default" case is possible, then it would // by construction directly invoke the implementation of the method // corresponding to the static type of the instance. This may change // in the future, if we start using PGO for ordering of checked_cast_br // checks. // TODO: The ordering of checks may benefit from using a PGO, because // the most probable alternatives could be checked first. for (auto S : Subs) { DEBUG(llvm::dbgs() << "Inserting a speculative call for class " << CD->getName() << " and subclass " << S->getName() << "\n"); CanType CanClassType = S->getDeclaredType()->getCanonicalType(); SILType ClassType = SILType::getPrimitiveObjectType(CanClassType); if (!ClassType.getClassOrBoundGenericClass()) { // This subclass cannot be handled. This happens e.g. if it is // a generic class. NotHandledSubsNum++; continue; } auto ClassOrMetatypeType = ClassType; if (auto EMT = SubType.getAs<AnyMetatypeType>()) { auto InstTy = ClassType.getSwiftRValueType(); auto *MetaTy = MetatypeType::get(InstTy, EMT->getRepresentation()); auto CanMetaTy = CanMetatypeType::CanTypeWrapper(MetaTy); ClassOrMetatypeType = SILType::getPrimitiveObjectType(CanMetaTy); } // Pass the metatype of the subclass. auto NewAI = speculateMonomorphicTarget(AI, ClassOrMetatypeType, LastCCBI); if (!NewAI) { NotHandledSubsNum++; continue; } AI = NewAI; Changed = true; } // Check if there is only a single statically known implementation // of the method which can be called by the default case handler. if (NotHandledSubsNum || !isDefaultCaseKnown(CHA, AI, CD, Subs)) { // Devirtualization of remaining cases is not possible, // because more than one implementation of the method // needs to be handled here. Thus, an indirect call through // the class_method cannot be eliminated completely. // return Changed; } // At this point it is known that there is only one remaining method // implementation which is not covered by checked_cast_br checks yet. // So, it is safe to replace a class_method invocation by // a direct call of this remaining implementation. if (LastCCBI && SubTypeValue == LastCCBI->getOperand()) { // Remove last checked_cast_br, because it will always succeed. SILBuilderWithScope B(LastCCBI); auto CastedValue = B.createUncheckedBitCast(LastCCBI->getLoc(), LastCCBI->getOperand(), LastCCBI->getCastType()); B.createBranch(LastCCBI->getLoc(), LastCCBI->getSuccessBB(), {CastedValue}); LastCCBI->eraseFromParent(); return true; } auto NewInstPair = tryDevirtualizeClassMethod(AI, SubTypeValue); assert(NewInstPair.first && "Expected to be able to devirtualize apply!"); replaceDeadApply(AI, NewInstPair.first); return true; }
/// Inlines all mandatory inlined functions into the body of a function, /// first recursively inlining all mandatory apply instructions in those /// functions into their bodies if necessary. /// /// \param F the function to be processed /// \param AI nullptr if this is being called from the top level; the relevant /// ApplyInst requiring the recursive call when non-null /// \param FullyInlinedSet the set of all functions already known to be fully /// processed, to avoid processing them over again /// \param SetFactory an instance of ImmutableFunctionSet::Factory /// \param CurrentInliningSet the set of functions currently being inlined in /// the current call stack of recursive calls /// /// \returns true if successful, false if failed due to circular inlining. static bool runOnFunctionRecursively(SILOptFunctionBuilder &FuncBuilder, SILFunction *F, FullApplySite AI, DenseFunctionSet &FullyInlinedSet, ImmutableFunctionSet::Factory &SetFactory, ImmutableFunctionSet CurrentInliningSet, ClassHierarchyAnalysis *CHA) { // Avoid reprocessing functions needlessly. if (FullyInlinedSet.count(F)) return true; // Prevent attempt to circularly inline. if (CurrentInliningSet.contains(F)) { // This cannot happen on a top-level call, so AI should be non-null. assert(AI && "Cannot have circular inline without apply"); SILLocation L = AI.getLoc(); assert(L && "Must have location for transparent inline apply"); diagnose(F->getModule().getASTContext(), L.getStartSourceLoc(), diag::circular_transparent); return false; } // Add to the current inlining set (immutably, so we only affect the set // during this call and recursive subcalls). CurrentInliningSet = SetFactory.add(CurrentInliningSet, F); SmallVector<std::pair<SILValue, ParameterConvention>, 16> CaptureArgs; SmallVector<SILValue, 32> FullArgs; // Visiting blocks in reverse order avoids revisiting instructions after block // splitting, which would be quadratic. for (auto BI = F->rbegin(), BE = F->rend(), nextBB = BI; BI != BE; BI = nextBB) { // After inlining, the block iterator will be adjusted to point to the last // block containing inlined instructions. This way, the inlined function // body will be reprocessed within the caller's context without revisiting // any original instructions. nextBB = std::next(BI); // While iterating over this block, instructions are inserted and deleted. // To avoid quadratic block splitting, instructions must be processed in // reverse order (block splitting reassigned the parent pointer of all // instructions below the split point). for (auto II = BI->rbegin(); II != BI->rend(); ++II) { FullApplySite InnerAI = FullApplySite::isa(&*II); if (!InnerAI) continue; // *NOTE* If devirtualization succeeds, devirtInst may not be InnerAI, // but a casted result of InnerAI or even a block argument due to // abstraction changes when calling the witness or class method. auto *devirtInst = tryDevirtualizeApplyHelper(InnerAI, CHA); // Restore II to the current apply site. II = devirtInst->getReverseIterator(); // If the devirtualized call result is no longer a invalid FullApplySite, // then it has succeeded, but the result is not immediately inlinable. InnerAI = FullApplySite::isa(devirtInst); if (!InnerAI) continue; SILValue CalleeValue = InnerAI.getCallee(); bool IsThick; PartialApplyInst *PAI; SILFunction *CalleeFunction = getCalleeFunction( F, InnerAI, IsThick, CaptureArgs, FullArgs, PAI); if (!CalleeFunction) continue; // Then recursively process it first before trying to inline it. if (!runOnFunctionRecursively(FuncBuilder, CalleeFunction, InnerAI, FullyInlinedSet, SetFactory, CurrentInliningSet, CHA)) { // If we failed due to circular inlining, then emit some notes to // trace back the failure if we have more information. // FIXME: possibly it could be worth recovering and attempting other // inlines within this same recursive call rather than simply // propagating the failure. if (AI) { SILLocation L = AI.getLoc(); assert(L && "Must have location for transparent inline apply"); diagnose(F->getModule().getASTContext(), L.getStartSourceLoc(), diag::note_while_inlining); } return false; } // Get our list of substitutions. auto Subs = (PAI ? PAI->getSubstitutionMap() : InnerAI.getSubstitutionMap()); SILOpenedArchetypesTracker OpenedArchetypesTracker(F); F->getModule().registerDeleteNotificationHandler( &OpenedArchetypesTracker); // The callee only needs to know about opened archetypes used in // the substitution list. OpenedArchetypesTracker.registerUsedOpenedArchetypes( InnerAI.getInstruction()); if (PAI) { OpenedArchetypesTracker.registerUsedOpenedArchetypes(PAI); } SILInliner Inliner(FuncBuilder, SILInliner::InlineKind::MandatoryInline, Subs, OpenedArchetypesTracker); if (!Inliner.canInlineApplySite(InnerAI)) continue; // Inline function at I, which also changes I to refer to the first // instruction inlined in the case that it succeeds. We purposely // process the inlined body after inlining, because the inlining may // have exposed new inlining opportunities beyond those present in // the inlined function when processed independently. LLVM_DEBUG(llvm::errs() << "Inlining @" << CalleeFunction->getName() << " into @" << InnerAI.getFunction()->getName() << "\n"); // If we intend to inline a thick function, then we need to balance the // reference counts for correctness. if (IsThick) { bool IsCalleeGuaranteed = PAI && PAI->getType().castTo<SILFunctionType>()->isCalleeGuaranteed(); fixupReferenceCounts(InnerAI.getInstruction(), CalleeValue, CaptureArgs, IsCalleeGuaranteed); } // Register a callback to record potentially unused function values after // inlining. ClosureCleanup closureCleanup; Inliner.setDeletionCallback([&closureCleanup](SILInstruction *I) { closureCleanup.recordDeadFunction(I); }); // Inlining deletes the apply, and can introduce multiple new basic // blocks. After this, CalleeValue and other instructions may be invalid. // nextBB will point to the last inlined block auto firstInlinedInstAndLastBB = Inliner.inlineFunction(CalleeFunction, InnerAI, FullArgs); nextBB = firstInlinedInstAndLastBB.second->getReverseIterator(); ++NumMandatoryInlines; // The IR is now valid, and trivial dead arguments are removed. However, // we may be able to remove dead callee computations (e.g. dead // partial_apply closures). closureCleanup.cleanupDeadClosures(F); // Resume inlining within nextBB, which contains only the inlined // instructions and possibly instructions in the original call block that // have not yet been visited. break; } } // Keep track of full inlined functions so we don't waste time recursively // reprocessing them. FullyInlinedSet.insert(F); return true; }
/// \brief Check if it is possible to devirtualize an Apply instruction /// and a class member obtained using the class_method instruction into /// a direct call to a specific member of a specific class. /// /// \p AI is the apply to devirtualize. /// \p ClassOrMetatypeType is the class type or metatype type we are /// devirtualizing for. /// return true if it is possible to devirtualize, false - otherwise. bool swift::canDevirtualizeClassMethod(FullApplySite AI, SILType ClassOrMetatypeType) { DEBUG(llvm::dbgs() << " Trying to devirtualize : " << *AI.getInstruction()); SILModule &Mod = AI.getModule(); // First attempt to lookup the origin for our class method. The origin should // either be a metatype or an alloc_ref. DEBUG(llvm::dbgs() << " Origin Type: " << ClassOrMetatypeType); auto *MI = cast<MethodInst>(AI.getCallee()); // Find the implementation of the member which should be invoked. auto *F = getTargetClassMethod(Mod, ClassOrMetatypeType, MI); // If we do not find any such function, we have no function to devirtualize // to... so bail. if (!F) { DEBUG(llvm::dbgs() << " FAIL: Could not find matching VTable or " "vtable method for this class.\n"); return false; } if (!F->shouldOptimize()) { // Do not consider functions that should not be optimized. DEBUG(llvm::dbgs() << " FAIL: Could not optimize function " << " because it is marked no-opt: " << F->getName() << "\n"); return false; } if (AI.getFunction()->isFragile()) { // function_ref inside fragile function cannot reference a private or // hidden symbol. if (!F->hasValidLinkageForFragileRef()) return false; } // Type of the actual function to be called. CanSILFunctionType GenCalleeType = F->getLoweredFunctionType(); // Type of the actual function to be called with substitutions applied. CanSILFunctionType SubstCalleeType = GenCalleeType; // For polymorphic functions, bail if the number of substitutions is // not the same as the number of expected generic parameters. if (GenCalleeType->isPolymorphic()) { // First, find proper list of substitutions for the concrete // method to be called. SmallVector<Substitution, 4> Subs; getSubstitutionsForCallee(Mod, GenCalleeType, ClassOrMetatypeType.getSwiftRValueType(), AI, Subs); SubstCalleeType = GenCalleeType->substGenericArgs(Mod, Subs); } // Check if the optimizer knows how to cast the return type. SILType ReturnType = SubstCalleeType->getSILResult(); if (!canCastValueToABICompatibleType(Mod, ReturnType, AI.getType())) return false; return true; }