// A utility function for cloning the apply instruction. static FullApplySite CloneApply(FullApplySite AI, SILBuilder &Builder) { // Clone the Apply. Builder.setCurrentDebugScope(AI.getDebugScope()); Builder.addOpenedArchetypeOperands(AI.getInstruction()); auto Args = AI.getArguments(); SmallVector<SILValue, 8> Ret(Args.size()); for (unsigned i = 0, e = Args.size(); i != e; ++i) Ret[i] = Args[i]; FullApplySite NAI; switch (AI.getInstruction()->getKind()) { case SILInstructionKind::ApplyInst: NAI = Builder.createApply(AI.getLoc(), AI.getCallee(), AI.getSubstitutions(), Ret, cast<ApplyInst>(AI)->isNonThrowing()); break; case SILInstructionKind::TryApplyInst: { auto *TryApplyI = cast<TryApplyInst>(AI.getInstruction()); NAI = Builder.createTryApply(AI.getLoc(), AI.getCallee(), AI.getSubstitutions(), Ret, TryApplyI->getNormalBB(), TryApplyI->getErrorBB()); } break; default: llvm_unreachable("Trying to clone an unsupported apply instruction"); } NAI.getInstruction(); return NAI; }
void SILCombiner::eraseApply(FullApplySite FAS, const UserListTy &Users) { // Make sure to release and destroy any owned or in-arguments. auto FuncType = FAS.getOrigCalleeType(); assert(FuncType->getParameters().size() == FAS.getNumArguments() && "mismatching number of arguments"); for (int i = 0, e = FAS.getNumArguments(); i < e; ++i) { SILParameterInfo PI = FuncType->getParameters()[i]; auto Arg = FAS.getArgument(i); switch (PI.getConvention()) { case ParameterConvention::Indirect_In: Builder.createDestroyAddr(FAS.getLoc(), Arg); break; case ParameterConvention::Direct_Owned: Builder.createReleaseValue(FAS.getLoc(), Arg, Atomicity::Atomic); break; case ParameterConvention::Indirect_In_Guaranteed: case ParameterConvention::Indirect_Inout: case ParameterConvention::Indirect_InoutAliasable: case ParameterConvention::Direct_Unowned: case ParameterConvention::Direct_Deallocating: case ParameterConvention::Direct_Guaranteed: break; } } // Erase all of the reference counting instructions (in reverse order to have // no dangling uses). for (auto rit = Users.rbegin(), re = Users.rend(); rit != re; ++rit) eraseInstFromFunction(**rit); // And the Apply itself. eraseInstFromFunction(*FAS.getInstruction()); }
void SideEffectAnalysis::getEffects(FunctionEffects &ApplyEffects, FullApplySite FAS) { assert(ApplyEffects.ParamEffects.size() == 0 && "Not using a new ApplyEffects?"); ApplyEffects.ParamEffects.resize(FAS.getNumArguments()); // Is this a call to a semantics function? ArraySemanticsCall ASC(FAS.getInstruction()); if (ASC && ASC.hasSelf()) { if (getSemanticEffects(ApplyEffects, ASC)) return; } if (SILFunction *SingleCallee = FAS.getCalleeFunction()) { // Does the function have any @effects? if (getDefinedEffects(ApplyEffects, SingleCallee)) return; } auto Callees = BCA->getCalleeList(FAS); if (!Callees.allCalleesVisible() || // @callee_owned function calls implicitly release the context, which // may call deinits of boxed values. // TODO: be less conservative about what destructors might be called. FAS.getOrigCalleeType()->isCalleeConsumed()) { ApplyEffects.setWorstEffects(); return; } // We can see all the callees. So we just merge the effects from all of // them. for (auto *Callee : Callees) { const FunctionEffects &CalleeFE = getEffects(Callee); ApplyEffects.mergeFrom(CalleeFE); } }
/// Attempt to devirtualize the given apply if possible, and return a /// new instruction in that case, or nullptr otherwise. DevirtualizationResult swift::tryDevirtualizeApply(FullApplySite AI, ClassHierarchyAnalysis *CHA) { DEBUG(llvm::dbgs() << " Trying to devirtualize: " << *AI.getInstruction()); // Devirtualize apply instructions that call witness_method instructions: // // %8 = witness_method $Optional<UInt16>, #LogicValue.boolValue!getter.1 // %9 = apply %8<Self = CodeUnit?>(%6#1) : ... // if (isa<WitnessMethodInst>(AI.getCallee())) return tryDevirtualizeWitnessMethod(AI); /// Optimize a class_method and alloc_ref pair into a direct function /// reference: /// /// \code /// %XX = alloc_ref $Foo /// %YY = class_method %XX : $Foo, #Foo.get!1 : $@convention(method)... /// \endcode /// /// or /// /// %XX = metatype $... /// %YY = class_method %XX : ... /// /// into /// /// %YY = function_ref @... if (auto *CMI = dyn_cast<ClassMethodInst>(AI.getCallee())) { auto &M = AI.getModule(); auto Instance = stripUpCasts(CMI->getOperand()); auto ClassType = Instance->getType(); if (ClassType.is<MetatypeType>()) ClassType = ClassType.getMetatypeInstanceType(M); auto *CD = ClassType.getClassOrBoundGenericClass(); if (isEffectivelyFinalMethod(AI, ClassType, CD, CHA)) return tryDevirtualizeClassMethod(AI, Instance); // Try to check if the exact dynamic type of the instance is statically // known. if (auto Instance = getInstanceWithExactDynamicType(CMI->getOperand(), CMI->getModule(), CHA)) return tryDevirtualizeClassMethod(AI, Instance); } if (isa<SuperMethodInst>(AI.getCallee())) { if (AI.hasSelfArgument()) { return tryDevirtualizeClassMethod(AI, AI.getSelfArgument()); } // It is an invocation of a class method. // Last operand is the metatype that should be used for dispatching. return tryDevirtualizeClassMethod(AI, AI.getArguments().back()); } return std::make_pair(nullptr, FullApplySite()); }
void SideEffectAnalysis::getEffects(FunctionEffects &ApplyEffects, FullApplySite FAS) { assert(ApplyEffects.ParamEffects.size() == 0 && "Not using a new ApplyEffects?"); ApplyEffects.ParamEffects.resize(FAS.getNumArguments()); // Is this a call to a semantics function? ArraySemanticsCall ASC(FAS.getInstruction()); if (ASC && ASC.hasSelf()) { if (getSemanticEffects(ApplyEffects, ASC)) return; } if (SILFunction *SingleCallee = FAS.getCalleeFunction()) { // Does the function have any @effects? if (getDefinedEffects(ApplyEffects, SingleCallee)) return; } auto Callees = BCA->getCalleeList(FAS); if (!Callees.allCalleesVisible()) { ApplyEffects.setWorstEffects(); return; } // We can see all the callees. So we just merge the effects from all of // them. for (auto *Callee : Callees) { const FunctionEffects &CalleeFE = getEffects(Callee); ApplyEffects.mergeFrom(CalleeFE); } }
/// \brief Check if it is possible to devirtualize an Apply instruction /// and a class member obtained using the class_method instruction into /// a direct call to a specific member of a specific class. /// /// \p AI is the apply to devirtualize. /// \p ClassOrMetatypeType is the class type or metatype type we are /// devirtualizing for. /// return true if it is possible to devirtualize, false - otherwise. bool swift::canDevirtualizeClassMethod(FullApplySite AI, SILType ClassOrMetatypeType, OptRemark::Emitter *ORE, bool isEffectivelyFinalMethod) { LLVM_DEBUG(llvm::dbgs() << " Trying to devirtualize : " << *AI.getInstruction()); SILModule &Mod = AI.getModule(); // First attempt to lookup the origin for our class method. The origin should // either be a metatype or an alloc_ref. LLVM_DEBUG(llvm::dbgs() << " Origin Type: " << ClassOrMetatypeType); auto *MI = cast<MethodInst>(AI.getCallee()); // Find the implementation of the member which should be invoked. auto *F = getTargetClassMethod(Mod, ClassOrMetatypeType, MI); // If we do not find any such function, we have no function to devirtualize // to... so bail. if (!F) { LLVM_DEBUG(llvm::dbgs() << " FAIL: Could not find matching VTable " "or vtable method for this class.\n"); return false; } // We need to disable the “effectively final” opt if a function is inlinable if (isEffectivelyFinalMethod && AI.getFunction()->getResilienceExpansion() == ResilienceExpansion::Minimal) { LLVM_DEBUG(llvm::dbgs() << " FAIL: Could not optimize function " "because it is an effectively-final inlinable: " << AI.getFunction()->getName() << "\n"); return false; } // Mandatory inlining does class method devirtualization. I'm not sure if this // is really needed, but some test rely on this. // So even for Onone functions we have to do it if the SILStage is raw. if (F->getModule().getStage() != SILStage::Raw && !F->shouldOptimize()) { // Do not consider functions that should not be optimized. LLVM_DEBUG(llvm::dbgs() << " FAIL: Could not optimize function " << " because it is marked no-opt: " << F->getName() << "\n"); return false; } if (AI.getFunction()->isSerialized()) { // function_ref inside fragile function cannot reference a private or // hidden symbol. if (!F->hasValidLinkageForFragileRef()) return false; } return true; }
/// \brief Check if it is possible to devirtualize an Apply instruction /// and a class member obtained using the class_method instruction into /// a direct call to a specific member of a specific class. /// /// \p AI is the apply to devirtualize. /// \p ClassOrMetatypeType is the class type or metatype type we are /// devirtualizing for. /// return true if it is possible to devirtualize, false - otherwise. bool swift::canDevirtualizeClassMethod(FullApplySite AI, SILType ClassOrMetatypeType) { DEBUG(llvm::dbgs() << " Trying to devirtualize : " << *AI.getInstruction()); SILModule &Mod = AI.getModule(); // First attempt to lookup the origin for our class method. The origin should // either be a metatype or an alloc_ref. DEBUG(llvm::dbgs() << " Origin Type: " << ClassOrMetatypeType); auto *MI = cast<MethodInst>(AI.getCallee()); // Find the implementation of the member which should be invoked. auto *F = getTargetClassMethod(Mod, ClassOrMetatypeType, MI); // If we do not find any such function, we have no function to devirtualize // to... so bail. if (!F) { DEBUG(llvm::dbgs() << " FAIL: Could not find matching VTable or " "vtable method for this class.\n"); return false; } if (!F->shouldOptimize()) { // Do not consider functions that should not be optimized. DEBUG(llvm::dbgs() << " FAIL: Could not optimize function " << " because it is marked no-opt: " << F->getName() << "\n"); return false; } if (AI.getFunction()->isSerialized()) { // function_ref inside fragile function cannot reference a private or // hidden symbol. if (!F->hasValidLinkageForFragileRef()) return false; } if (MI->isVolatile()) { // dynamic dispatch is semantically required, can't devirtualize return false; } return true; }
SILValue SILInliner::borrowFunctionArgument(SILValue callArg, FullApplySite AI) { if (!AI.getFunction()->hasQualifiedOwnership() || callArg.getOwnershipKind() != ValueOwnershipKind::Owned) { return callArg; } auto *borrow = getBuilder().createBeginBorrow(AI.getLoc(), callArg); if (auto *tryAI = dyn_cast<TryApplyInst>(AI)) { SILBuilder returnBuilder(tryAI->getNormalBB()->begin()); returnBuilder.createEndBorrow(AI.getLoc(), borrow, callArg); SILBuilder throwBuilder(tryAI->getErrorBB()->begin()); throwBuilder.createEndBorrow(AI.getLoc(), borrow, callArg); } else { SILBuilder returnBuilder(std::next(AI.getInstruction()->getIterator())); returnBuilder.createEndBorrow(AI.getLoc(), borrow, callArg); } return borrow; }
// Summarize the callee side effects of a call instruction using this // FunctionSideEffects object without analyzing the callee function bodies or // scheduling the callees for bottom-up propagation. // // Return true if this call-site's effects are summarized without visiting the // callee. bool FunctionSideEffects::summarizeCall(FullApplySite fullApply) { assert(ParamEffects.empty() && "Expect uninitialized effects."); ParamEffects.resize(fullApply.getNumArguments()); // Is this a call to a semantics function? if (auto apply = dyn_cast<ApplyInst>(fullApply.getInstruction())) { ArraySemanticsCall ASC(apply); if (ASC && ASC.hasSelf()) { if (setSemanticEffects(ASC)) return true; } } if (SILFunction *SingleCallee = fullApply.getReferencedFunction()) { // Does the function have any @effects? if (setDefinedEffects(SingleCallee)) return true; } return false; }
/// Attempt to devirtualize the given apply if possible, and return a /// new instruction in that case, or nullptr otherwise. DevirtualizationResult swift::tryDevirtualizeApply(FullApplySite AI) { DEBUG(llvm::dbgs() << " Trying to devirtualize: " << *AI.getInstruction()); // Devirtualize apply instructions that call witness_method instructions: // // %8 = witness_method $Optional<UInt16>, #LogicValue.boolValue!getter.1 // %9 = apply %8<Self = CodeUnit?>(%6#1) : ... // if (isa<WitnessMethodInst>(AI.getCallee())) return tryDevirtualizeWitnessMethod(AI); /// Optimize a class_method and alloc_ref pair into a direct function /// reference: /// /// \code /// %XX = alloc_ref $Foo /// %YY = class_method %XX : $Foo, #Foo.get!1 : $@convention(method)... /// \endcode /// /// or /// /// %XX = metatype $... /// %YY = class_method %XX : ... /// /// into /// /// %YY = function_ref @... if (auto *CMI = dyn_cast<ClassMethodInst>(AI.getCallee())) { // Check if the class member is known to be final. if (isKnownFinal(CMI->getModule(), CMI->getMember())) return tryDevirtualizeClassMethod(AI, CMI->getOperand()); // Try to check if the exact dynamic type of the instance is statically // known. if (auto Instance = getInstanceWithExactDynamicType(CMI->getOperand())) return tryDevirtualizeClassMethod(AI, Instance); } return std::make_pair(nullptr, FullApplySite()); }
// Returns true if a given apply site should be skipped during the // early inlining pass. // // NOTE: Add here the checks for any specific @_semantics/@_effects // attributes causing a given callee to be excluded from the inlining // during the early inlining pass. static bool shouldSkipApplyDuringEarlyInlining(FullApplySite AI) { // Add here the checks for any specific @_semantics attributes that need // to be skipped during the early inlining pass. ArraySemanticsCall ASC(AI.getInstruction()); if (ASC && !ASC.canInlineEarly()) return true; SILFunction *Callee = AI.getReferencedFunction(); if (!Callee) return false; if (Callee->hasSemanticsAttr("self_no_escaping_closure") || Callee->hasSemanticsAttr("pair_no_escaping_closure")) return true; // Add here the checks for any specific @_effects attributes that need // to be skipped during the early inlining pass. if (Callee->hasEffectsKind()) return true; return false; }
/// \brief Devirtualize an apply of a class method. /// /// \p AI is the apply to devirtualize. /// \p ClassOrMetatype is a class value or metatype value that is the /// self argument of the apply we will devirtualize. /// return the result value of the new ApplyInst if created one or null. DevirtualizationResult swift::devirtualizeClassMethod(FullApplySite AI, SILValue ClassOrMetatype) { DEBUG(llvm::dbgs() << " Trying to devirtualize : " << *AI.getInstruction()); SILModule &Mod = AI.getModule(); auto *CMI = cast<ClassMethodInst>(AI.getCallee()); auto ClassOrMetatypeType = ClassOrMetatype.getType(); auto *F = getTargetClassMethod(Mod, ClassOrMetatypeType, CMI->getMember()); CanSILFunctionType GenCalleeType = F->getLoweredFunctionType(); auto Subs = getSubstitutionsForCallee(Mod, GenCalleeType, ClassOrMetatypeType, AI); CanSILFunctionType SubstCalleeType = GenCalleeType; if (GenCalleeType->isPolymorphic()) SubstCalleeType = GenCalleeType->substGenericArgs(Mod, Mod.getSwiftModule(), Subs); SILBuilderWithScope B(AI.getInstruction()); FunctionRefInst *FRI = B.createFunctionRef(AI.getLoc(), F); // Create the argument list for the new apply, casting when needed // in order to handle covariant indirect return types and // contravariant argument types. llvm::SmallVector<SILValue, 8> NewArgs; auto Args = AI.getArguments(); auto ParamTypes = SubstCalleeType->getParameterSILTypes(); for (unsigned i = 0, e = Args.size() - 1; i != e; ++i) NewArgs.push_back(castValueToABICompatibleType(&B, AI.getLoc(), Args[i], Args[i].getType(), ParamTypes[i]).getValue()); // Add the self argument, upcasting if required because we're // calling a base class's method. auto SelfParamTy = SubstCalleeType->getSelfParameter().getSILType(); NewArgs.push_back(castValueToABICompatibleType(&B, AI.getLoc(), ClassOrMetatype, ClassOrMetatypeType, SelfParamTy).getValue()); // If we have a direct return type, make sure we use the subst callee return // type. If we have an indirect return type, AI's return type of the empty // tuple should be ok. SILType ResultTy = AI.getType(); if (!SubstCalleeType->hasIndirectResult()) { ResultTy = SubstCalleeType->getSILResult(); } SILType SubstCalleeSILType = SILType::getPrimitiveObjectType(SubstCalleeType); FullApplySite NewAI; SILBasicBlock *ResultBB = nullptr; SILBasicBlock *NormalBB = nullptr; SILValue ResultValue; bool ResultCastRequired = false; SmallVector<Operand *, 4> OriginalResultUses; if (!isa<TryApplyInst>(AI)) { NewAI = B.createApply(AI.getLoc(), FRI, SubstCalleeSILType, ResultTy, Subs, NewArgs, cast<ApplyInst>(AI)->isNonThrowing()); ResultValue = SILValue(NewAI.getInstruction(), 0); } else { auto *TAI = cast<TryApplyInst>(AI); // Create new normal and error BBs only if: // - re-using a BB would create a critical edge // - or, the result of the new apply would be of different // type than the argument of the original normal BB. if (TAI->getNormalBB()->getSinglePredecessor()) ResultBB = TAI->getNormalBB(); else { ResultBB = B.getFunction().createBasicBlock(); ResultBB->createBBArg(ResultTy); } NormalBB = TAI->getNormalBB(); SILBasicBlock *ErrorBB = nullptr; if (TAI->getErrorBB()->getSinglePredecessor()) ErrorBB = TAI->getErrorBB(); else { ErrorBB = B.getFunction().createBasicBlock(); ErrorBB->createBBArg(TAI->getErrorBB()->getBBArg(0)->getType()); } NewAI = B.createTryApply(AI.getLoc(), FRI, SubstCalleeSILType, Subs, NewArgs, ResultBB, ErrorBB); if (ErrorBB != TAI->getErrorBB()) { B.setInsertionPoint(ErrorBB); B.createBranch(TAI->getLoc(), TAI->getErrorBB(), {ErrorBB->getBBArg(0)}); } // Does the result value need to be casted? ResultCastRequired = ResultTy != NormalBB->getBBArg(0)->getType(); if (ResultBB != NormalBB) B.setInsertionPoint(ResultBB); else if (ResultCastRequired) { B.setInsertionPoint(NormalBB->begin()); // Collect all uses, before casting. for (auto *Use : NormalBB->getBBArg(0)->getUses()) { OriginalResultUses.push_back(Use); } NormalBB->getBBArg(0)->replaceAllUsesWith(SILUndef::get(AI.getType(), Mod)); NormalBB->replaceBBArg(0, ResultTy, nullptr); } // The result value is passed as a parameter to the normal block. ResultValue = ResultBB->getBBArg(0); } // Check if any casting is required for the return value. ResultValue = castValueToABICompatibleType(&B, NewAI.getLoc(), ResultValue, ResultTy, AI.getType()).getValue(); DEBUG(llvm::dbgs() << " SUCCESS: " << F->getName() << "\n"); NumClassDevirt++; if (NormalBB) { if (NormalBB != ResultBB) { // If artificial normal BB was introduced, branch // to the original normal BB. B.createBranch(NewAI.getLoc(), NormalBB, { ResultValue }); } else if (ResultCastRequired) { // Update all original uses by the new value. for(auto *Use: OriginalResultUses) { Use->set(ResultValue); } } return std::make_pair(NewAI.getInstruction(), NewAI); } // We need to return a pair of values here: // - the first one is the actual result of the devirtualized call, possibly // casted into an appropriate type. This SILValue may be a BB arg, if it // was a cast between optional types. // - the second one is the new apply site. return std::make_pair(ResultValue.getDef(), NewAI); }
/// \brief Check if it is possible to devirtualize an Apply instruction /// and a class member obtained using the class_method instruction into /// a direct call to a specific member of a specific class. /// /// \p AI is the apply to devirtualize. /// \p ClassOrMetatypeType is the class type or metatype type we are /// devirtualizing for. /// return true if it is possible to devirtualize, false - otherwise. bool swift::canDevirtualizeClassMethod(FullApplySite AI, SILType ClassOrMetatypeType) { DEBUG(llvm::dbgs() << " Trying to devirtualize : " << *AI.getInstruction()); SILModule &Mod = AI.getModule(); // First attempt to lookup the origin for our class method. The origin should // either be a metatype or an alloc_ref. DEBUG(llvm::dbgs() << " Origin Type: " << ClassOrMetatypeType); auto *MI = cast<MethodInst>(AI.getCallee()); // Find the implementation of the member which should be invoked. auto *F = getTargetClassMethod(Mod, ClassOrMetatypeType, MI); // If we do not find any such function, we have no function to devirtualize // to... so bail. if (!F) { DEBUG(llvm::dbgs() << " FAIL: Could not find matching VTable or " "vtable method for this class.\n"); return false; } if (!F->shouldOptimize()) { // Do not consider functions that should not be optimized. DEBUG(llvm::dbgs() << " FAIL: Could not optimize function " << " because it is marked no-opt: " << F->getName() << "\n"); return false; } if (AI.getFunction()->isFragile()) { // function_ref inside fragile function cannot reference a private or // hidden symbol. if (!F->hasValidLinkageForFragileRef()) return false; } // Type of the actual function to be called. CanSILFunctionType GenCalleeType = F->getLoweredFunctionType(); // Type of the actual function to be called with substitutions applied. CanSILFunctionType SubstCalleeType = GenCalleeType; // For polymorphic functions, bail if the number of substitutions is // not the same as the number of expected generic parameters. if (GenCalleeType->isPolymorphic()) { // First, find proper list of substitutions for the concrete // method to be called. SmallVector<Substitution, 4> Subs; getSubstitutionsForCallee(Mod, GenCalleeType, ClassOrMetatypeType.getSwiftRValueType(), AI, Subs); SubstCalleeType = GenCalleeType->substGenericArgs(Mod, Subs); } // Check if the optimizer knows how to cast the return type. SILType ReturnType = SubstCalleeType->getSILResult(); if (!canCastValueToABICompatibleType(Mod, ReturnType, AI.getType())) return false; return true; }
/// \brief Check if it is possible to devirtualize an Apply instruction /// and a class member obtained using the class_method instruction into /// a direct call to a specific member of a specific class. /// /// \p AI is the apply to devirtualize. /// \p ClassOrMetatypeType is the class type or metatype type we are /// devirtualizing for. /// return true if it is possible to devirtualize, false - otherwise. bool swift::canDevirtualizeClassMethod(FullApplySite AI, SILType ClassOrMetatypeType) { DEBUG(llvm::dbgs() << " Trying to devirtualize : " << *AI.getInstruction()); SILModule &Mod = AI.getModule(); // Bail if any generic types parameters of the class instance type are // unbound. // We cannot devirtualize unbound generic calls yet. if (isClassWithUnboundGenericParameters(ClassOrMetatypeType, Mod)) return false; // First attempt to lookup the origin for our class method. The origin should // either be a metatype or an alloc_ref. DEBUG(llvm::dbgs() << " Origin Type: " << ClassOrMetatypeType); auto *CMI = cast<ClassMethodInst>(AI.getCallee()); // Find the implementation of the member which should be invoked. auto *F = getTargetClassMethod(Mod, ClassOrMetatypeType, CMI->getMember()); // If we do not find any such function, we have no function to devirtualize // to... so bail. if (!F) { DEBUG(llvm::dbgs() << " FAIL: Could not find matching VTable or " "vtable method for this class.\n"); return false; } if (AI.getFunction()->isFragile()) { // function_ref inside fragile function cannot reference a private or // hidden symbol. if (!(F->isFragile() || isValidLinkageForFragileRef(F->getLinkage()) || F->isExternalDeclaration())) return false; } CanSILFunctionType GenCalleeType = F->getLoweredFunctionType(); auto Subs = getSubstitutionsForCallee(Mod, GenCalleeType, ClassOrMetatypeType, AI); // For polymorphic functions, bail if the number of substitutions is // not the same as the number of expected generic parameters. if (GenCalleeType->isPolymorphic()) { auto GenericSig = GenCalleeType->getGenericSignature(); // Get the number of expected generic parameters, which // is a sum of the number of explicit generic parameters // and the number of their recursive member types exposed // through protocol requirements. auto DepTypes = GenericSig->getAllDependentTypes(); unsigned ExpectedGenParamsNum = 0; for (auto DT: DepTypes) { (void)DT; ExpectedGenParamsNum++; } if (ExpectedGenParamsNum != Subs.size()) return false; } // Check if the optimizer knows how to cast the return type. CanSILFunctionType SubstCalleeType = GenCalleeType; if (GenCalleeType->isPolymorphic()) SubstCalleeType = GenCalleeType->substGenericArgs(Mod, Mod.getSwiftModule(), Subs); // If we have a direct return type, make sure we use the subst callee return // type. If we have an indirect return type, AI's return type of the empty // tuple should be ok. SILType ReturnType = AI.getType(); if (!SubstCalleeType->hasIndirectResult()) { ReturnType = SubstCalleeType->getSILResult(); } if (!canCastValueToABICompatibleType(Mod, ReturnType, AI.getType())) return false; return true; }
/// Update the callsite to pass in the correct arguments. static void rewriteApplyInst(const CallSiteDescriptor &CSDesc, SILFunction *NewF) { FullApplySite AI = CSDesc.getApplyInst(); SingleValueInstruction *Closure = CSDesc.getClosure(); SILBuilderWithScope Builder(Closure); FunctionRefInst *FRI = Builder.createFunctionRef(AI.getLoc(), NewF); // Create the args for the new apply by removing the closure argument... llvm::SmallVector<SILValue, 8> NewArgs; unsigned Index = 0; for (auto Arg : AI.getArguments()) { if (Index != CSDesc.getClosureIndex()) NewArgs.push_back(Arg); Index++; } // ... and appending the captured arguments. We also insert retains here at // the location of the original closure. This is needed to balance the // implicit release of all captured arguments that occurs when the partial // apply is destroyed. SILModule &M = NewF->getModule(); auto ClosureCalleeConv = CSDesc.getClosureCallee()->getConventions(); unsigned ClosureArgIdx = ClosureCalleeConv.getNumSILArguments() - CSDesc.getNumArguments(); for (auto Arg : CSDesc.getArguments()) { SILType ArgTy = Arg->getType(); // If our argument is of trivial type, continue... if (ArgTy.isTrivial(M)) { NewArgs.push_back(Arg); ++ClosureArgIdx; continue; } auto ArgConvention = ClosureCalleeConv.getSILArgumentConvention(ClosureArgIdx); // Non-inout indirect arguments are not supported yet. assert(ArgTy.isObject() || !isNonInoutIndirectSILArgument(Arg, ArgConvention)); // If argument is not an object and it is an inout parameter, // continue... if (!ArgTy.isObject() && !isNonInoutIndirectSILArgument(Arg, ArgConvention)) { NewArgs.push_back(Arg); ++ClosureArgIdx; continue; } // TODO: When we support address types, this code path will need to be // updated. // We need to balance the consumed argument of the new partial_apply in the // specialized callee by a retain. If both the original partial_apply and // the apply of the callee are in the same basic block we can assume they // are executed the same number of times. Therefore it is sufficient to just // retain the argument at the site of the original partial_apply. // // %closure = partial_apply (%arg) // = apply %callee(%closure) // => // retain %arg // %closure = partial_apply (%arg) // apply %specialized_callee(..., %arg) // // However, if they are not in the same basic block the callee might be // executed more frequently than the closure (for example, if the closure is // created in a loop preheader and the callee taking the closure is executed // in the loop). In such a case we must keep the argument live across the // call site of the callee and emit a matching retain for every invocation // of the callee. // // %closure = partial_apply (%arg) // // while () { // = %callee(%closure) // } // => // retain %arg // %closure = partial_apply (%arg) // // while () { // retain %arg // apply %specialized_callee(.., %arg) // } // release %arg // if (AI.getParent() != Closure->getParent()) { // Emit the retain and release that keeps the argument life across the // callee using the closure. CSDesc.extendArgumentLifetime(Arg, ArgConvention); // Emit the retain that matches the captured argument by the // partial_apply // in the callee that is consumed by the partial_apply. Builder.setInsertionPoint(AI.getInstruction()); Builder.createRetainValue(Closure->getLoc(), Arg, Builder.getDefaultAtomicity()); } else { Builder.createRetainValue(Closure->getLoc(), Arg, Builder.getDefaultAtomicity()); } NewArgs.push_back(Arg); ++ClosureArgIdx; } Builder.setInsertionPoint(AI.getInstruction()); FullApplySite NewAI; switch (AI.getKind()) { case FullApplySiteKind::TryApplyInst: { auto *TAI = cast<TryApplyInst>(AI); NewAI = Builder.createTryApply(AI.getLoc(), FRI, SubstitutionMap(), NewArgs, TAI->getNormalBB(), TAI->getErrorBB()); // If we passed in the original closure as @owned, then insert a release // right after NewAI. This is to balance the +1 from being an @owned // argument to AI. if (!CSDesc.isClosureConsumed() || !CSDesc.closureHasRefSemanticContext()) { break; } Builder.setInsertionPoint(TAI->getNormalBB()->begin()); Builder.createReleaseValue(Closure->getLoc(), Closure, Builder.getDefaultAtomicity()); Builder.setInsertionPoint(TAI->getErrorBB()->begin()); Builder.createReleaseValue(Closure->getLoc(), Closure, Builder.getDefaultAtomicity()); Builder.setInsertionPoint(AI.getInstruction()); break; } case FullApplySiteKind::ApplyInst: { auto oldApply = cast<ApplyInst>(AI); auto newApply = Builder.createApply(oldApply->getLoc(), FRI, SubstitutionMap(), NewArgs, oldApply->isNonThrowing()); // If we passed in the original closure as @owned, then insert a release // right after NewAI. This is to balance the +1 from being an @owned // argument to AI. if (CSDesc.isClosureConsumed() && CSDesc.closureHasRefSemanticContext()) Builder.createReleaseValue(Closure->getLoc(), Closure, Builder.getDefaultAtomicity()); // Replace all uses of the old apply with the new apply. oldApply->replaceAllUsesWith(newApply); break; } case FullApplySiteKind::BeginApplyInst: llvm_unreachable("Unhandled case"); } // Erase the old apply. AI.getInstruction()->eraseFromParent(); // TODO: Maybe include invalidation code for CallSiteDescriptor after we erase // AI from parent? }
/// \brief Inlines all mandatory inlined functions into the body of a function, /// first recursively inlining all mandatory apply instructions in those /// functions into their bodies if necessary. /// /// \param F the function to be processed /// \param AI nullptr if this is being called from the top level; the relevant /// ApplyInst requiring the recursive call when non-null /// \param FullyInlinedSet the set of all functions already known to be fully /// processed, to avoid processing them over again /// \param SetFactory an instance of ImmutableFunctionSet::Factory /// \param CurrentInliningSet the set of functions currently being inlined in /// the current call stack of recursive calls /// /// \returns true if successful, false if failed due to circular inlining. static bool runOnFunctionRecursively(SILFunction *F, FullApplySite AI, DenseFunctionSet &FullyInlinedSet, ImmutableFunctionSet::Factory &SetFactory, ImmutableFunctionSet CurrentInliningSet, ClassHierarchyAnalysis *CHA) { // Avoid reprocessing functions needlessly. if (FullyInlinedSet.count(F)) return true; // Prevent attempt to circularly inline. if (CurrentInliningSet.contains(F)) { // This cannot happen on a top-level call, so AI should be non-null. assert(AI && "Cannot have circular inline without apply"); SILLocation L = AI.getLoc(); assert(L && "Must have location for transparent inline apply"); diagnose(F->getModule().getASTContext(), L.getStartSourceLoc(), diag::circular_transparent); return false; } // Add to the current inlining set (immutably, so we only affect the set // during this call and recursive subcalls). CurrentInliningSet = SetFactory.add(CurrentInliningSet, F); SmallVector<std::pair<SILValue, ParameterConvention>, 16> CaptureArgs; SmallVector<SILValue, 32> FullArgs; for (auto BI = F->begin(), BE = F->end(); BI != BE; ++BI) { for (auto II = BI->begin(), IE = BI->end(); II != IE; ++II) { FullApplySite InnerAI = FullApplySite::isa(&*II); if (!InnerAI) continue; auto *ApplyBlock = InnerAI.getParent(); // *NOTE* If devirtualization succeeds, sometimes II will not be InnerAI, // but a casted result of InnerAI or even a block argument due to // abstraction changes when calling the witness or class method. We still // know that InnerAI dominates II though. std::tie(InnerAI, II) = tryDevirtualizeApplyHelper(InnerAI, II, CHA); if (!InnerAI) continue; SILValue CalleeValue = InnerAI.getCallee(); bool IsThick; PartialApplyInst *PAI; SILFunction *CalleeFunction = getCalleeFunction( F, InnerAI, IsThick, CaptureArgs, FullArgs, PAI); if (!CalleeFunction) continue; // Then recursively process it first before trying to inline it. if (!runOnFunctionRecursively(CalleeFunction, InnerAI, FullyInlinedSet, SetFactory, CurrentInliningSet, CHA)) { // If we failed due to circular inlining, then emit some notes to // trace back the failure if we have more information. // FIXME: possibly it could be worth recovering and attempting other // inlines within this same recursive call rather than simply // propagating the failure. if (AI) { SILLocation L = AI.getLoc(); assert(L && "Must have location for transparent inline apply"); diagnose(F->getModule().getASTContext(), L.getStartSourceLoc(), diag::note_while_inlining); } return false; } // Get our list of substitutions. auto Subs = (PAI ? PAI->getSubstitutionMap() : InnerAI.getSubstitutionMap()); SILOpenedArchetypesTracker OpenedArchetypesTracker(F); F->getModule().registerDeleteNotificationHandler( &OpenedArchetypesTracker); // The callee only needs to know about opened archetypes used in // the substitution list. OpenedArchetypesTracker.registerUsedOpenedArchetypes( InnerAI.getInstruction()); if (PAI) { OpenedArchetypesTracker.registerUsedOpenedArchetypes(PAI); } SILInliner Inliner(*F, *CalleeFunction, SILInliner::InlineKind::MandatoryInline, Subs, OpenedArchetypesTracker); if (!Inliner.canInlineFunction(InnerAI)) { // See comment above about casting when devirtualizing and how this // sometimes causes II and InnerAI to be different and even in different // blocks. II = InnerAI.getInstruction()->getIterator(); continue; } // Inline function at I, which also changes I to refer to the first // instruction inlined in the case that it succeeds. We purposely // process the inlined body after inlining, because the inlining may // have exposed new inlining opportunities beyond those present in // the inlined function when processed independently. LLVM_DEBUG(llvm::errs() << "Inlining @" << CalleeFunction->getName() << " into @" << InnerAI.getFunction()->getName() << "\n"); // If we intend to inline a thick function, then we need to balance the // reference counts for correctness. if (IsThick) { bool IsCalleeGuaranteed = PAI && PAI->getType().castTo<SILFunctionType>()->isCalleeGuaranteed(); fixupReferenceCounts(II, CalleeValue, CaptureArgs, IsCalleeGuaranteed); } // Decrement our iterator (carefully, to avoid going off the front) so it // is valid after inlining is done. Inlining deletes the apply, and can // introduce multiple new basic blocks. II = prev_or_default(II, ApplyBlock->begin(), ApplyBlock->end()); Inliner.inlineFunction(InnerAI, FullArgs); // We were able to inline successfully. Remove the apply. InnerAI.getInstruction()->eraseFromParent(); // Reestablish our iterator if it wrapped. if (II == ApplyBlock->end()) II = ApplyBlock->begin(); // Update the iterator when instructions are removed. DeleteInstructionsHandler DeletionHandler(II); // Now that the IR is correct, see if we can remove dead callee // computations (e.g. dead partial_apply closures). cleanupCalleeValue(CalleeValue, FullArgs); // Reposition iterators possibly invalidated by mutation. BI = SILFunction::iterator(ApplyBlock); IE = ApplyBlock->end(); assert(BI == SILFunction::iterator(II->getParent()) && "Mismatch between the instruction and basic block"); ++NumMandatoryInlines; } } // Keep track of full inlined functions so we don't waste time recursively // reprocessing them. FullyInlinedSet.insert(F); return true; }
/// \brief Inlines all mandatory inlined functions into the body of a function, /// first recursively inlining all mandatory apply instructions in those /// functions into their bodies if necessary. /// /// \param F the function to be processed /// \param AI nullptr if this is being called from the top level; the relevant /// ApplyInst requiring the recursive call when non-null /// \param FullyInlinedSet the set of all functions already known to be fully /// processed, to avoid processing them over again /// \param SetFactory an instance of ImmutableFunctionSet::Factory /// \param CurrentInliningSet the set of functions currently being inlined in /// the current call stack of recursive calls /// /// \returns true if successful, false if failed due to circular inlining. static bool runOnFunctionRecursively(SILFunction *F, FullApplySite AI, SILModule::LinkingMode Mode, DenseFunctionSet &FullyInlinedSet, ImmutableFunctionSet::Factory &SetFactory, ImmutableFunctionSet CurrentInliningSet, ClassHierarchyAnalysis *CHA) { // Avoid reprocessing functions needlessly. if (FullyInlinedSet.count(F)) return true; // Prevent attempt to circularly inline. if (CurrentInliningSet.contains(F)) { // This cannot happen on a top-level call, so AI should be non-null. assert(AI && "Cannot have circular inline without apply"); SILLocation L = AI.getLoc(); assert(L && "Must have location for transparent inline apply"); diagnose(F->getModule().getASTContext(), L.getStartSourceLoc(), diag::circular_transparent); return false; } // Add to the current inlining set (immutably, so we only affect the set // during this call and recursive subcalls). CurrentInliningSet = SetFactory.add(CurrentInliningSet, F); SmallVector<SILValue, 16> CaptureArgs; SmallVector<SILValue, 32> FullArgs; for (auto FI = F->begin(), FE = F->end(); FI != FE; ++FI) { for (auto I = FI->begin(), E = FI->end(); I != E; ++I) { FullApplySite InnerAI = FullApplySite::isa(&*I); if (!InnerAI) continue; auto *ApplyBlock = InnerAI.getParent(); auto NewInstPair = tryDevirtualizeApply(InnerAI, CHA); if (auto *NewInst = NewInstPair.first) { replaceDeadApply(InnerAI, NewInst); if (auto *II = dyn_cast<SILInstruction>(NewInst)) I = II->getIterator(); else I = NewInst->getParentBlock()->begin(); auto NewAI = FullApplySite::isa(NewInstPair.second.getInstruction()); if (!NewAI) continue; InnerAI = NewAI; } SILLocation Loc = InnerAI.getLoc(); SILValue CalleeValue = InnerAI.getCallee(); bool IsThick; PartialApplyInst *PAI; SILFunction *CalleeFunction = getCalleeFunction(InnerAI, IsThick, CaptureArgs, FullArgs, PAI, Mode); if (!CalleeFunction || CalleeFunction->isTransparent() == IsNotTransparent) continue; if (F->isFragile() && !CalleeFunction->hasValidLinkageForFragileRef()) { if (!CalleeFunction->hasValidLinkageForFragileInline()) { llvm::errs() << "caller: " << F->getName() << "\n"; llvm::errs() << "callee: " << CalleeFunction->getName() << "\n"; llvm_unreachable("Should never be inlining a resilient function into " "a fragile function"); } continue; } // Then recursively process it first before trying to inline it. if (!runOnFunctionRecursively(CalleeFunction, InnerAI, Mode, FullyInlinedSet, SetFactory, CurrentInliningSet, CHA)) { // If we failed due to circular inlining, then emit some notes to // trace back the failure if we have more information. // FIXME: possibly it could be worth recovering and attempting other // inlines within this same recursive call rather than simply // propagating the failure. if (AI) { SILLocation L = AI.getLoc(); assert(L && "Must have location for transparent inline apply"); diagnose(F->getModule().getASTContext(), L.getStartSourceLoc(), diag::note_while_inlining); } return false; } // Inline function at I, which also changes I to refer to the first // instruction inlined in the case that it succeeds. We purposely // process the inlined body after inlining, because the inlining may // have exposed new inlining opportunities beyond those present in // the inlined function when processed independently. DEBUG(llvm::errs() << "Inlining @" << CalleeFunction->getName() << " into @" << InnerAI.getFunction()->getName() << "\n"); // If we intend to inline a thick function, then we need to balance the // reference counts for correctness. if (IsThick && I != ApplyBlock->begin()) { // We need to find an appropriate location for our fix up code // We used to do this after inlining Without any modifications // This caused us to add a release in a wrong place: // It would release a value *before* retaining it! // It is really problematic to do this after inlining - // Finding a valid insertion point is tricky: // Inlining might add new basic blocks and/or remove the apply // We want to add the fix up *just before* where the current apply is! // Unfortunately, we *can't* add the fix up code here: // Inlining might fail for any reason - // If that occurred we'd need to undo our fix up code. // Instead, we split the current basic block - // Making sure we have a basic block that starts with our apply. SILBuilderWithScope B(I); ApplyBlock = splitBasicBlockAndBranch(B, &*I, nullptr, nullptr); I = ApplyBlock->begin(); } // Decrement our iterator (carefully, to avoid going off the front) so it // is valid after inlining is done. Inlining deletes the apply, and can // introduce multiple new basic blocks. if (I != ApplyBlock->begin()) --I; else I = ApplyBlock->end(); std::vector<Substitution> ApplySubs(InnerAI.getSubstitutions()); if (PAI) { auto PAISubs = PAI->getSubstitutions(); ApplySubs.insert(ApplySubs.end(), PAISubs.begin(), PAISubs.end()); } SILOpenedArchetypesTracker OpenedArchetypesTracker(*F); F->getModule().registerDeleteNotificationHandler( &OpenedArchetypesTracker); // The callee only needs to know about opened archetypes used in // the substitution list. OpenedArchetypesTracker.registerUsedOpenedArchetypes(InnerAI.getInstruction()); if (PAI) { OpenedArchetypesTracker.registerUsedOpenedArchetypes(PAI); } SILInliner Inliner(*F, *CalleeFunction, SILInliner::InlineKind::MandatoryInline, ApplySubs, OpenedArchetypesTracker); if (!Inliner.inlineFunction(InnerAI, FullArgs)) { I = InnerAI.getInstruction()->getIterator(); continue; } // Inlining was successful. Remove the apply. InnerAI.getInstruction()->eraseFromParent(); // Reestablish our iterator if it wrapped. if (I == ApplyBlock->end()) I = ApplyBlock->begin(); // Update the iterator when instructions are removed. DeleteInstructionsHandler DeletionHandler(I); // If the inlined apply was a thick function, then we need to balance the // reference counts for correctness. if (IsThick) fixupReferenceCounts(I, Loc, CalleeValue, CaptureArgs); // Now that the IR is correct, see if we can remove dead callee // computations (e.g. dead partial_apply closures). cleanupCalleeValue(CalleeValue, CaptureArgs, FullArgs); // Reposition iterators possibly invalidated by mutation. FI = SILFunction::iterator(ApplyBlock); E = ApplyBlock->end(); assert(FI == SILFunction::iterator(I->getParent()) && "Mismatch between the instruction and basic block"); ++NumMandatoryInlines; } } // Keep track of full inlined functions so we don't waste time recursively // reprocessing them. FullyInlinedSet.insert(F); return true; }
/// Create a new apply instructions that uses the concrete type instead /// of the existential type. SILInstruction * SILCombiner::createApplyWithConcreteType(FullApplySite AI, SILValue NewSelf, SILValue Self, CanType ConcreteType, SILValue ConcreteTypeDef, ProtocolConformanceRef Conformance, CanType OpenedArchetype) { // Create a set of arguments. SmallVector<SILValue, 8> Args; for (auto Arg : AI.getArgumentsWithoutSelf()) { Args.push_back(Arg); } Args.push_back(NewSelf); // Form a new set of substitutions where Self is // replaced by a concrete type. SmallVector<Substitution, 8> Substitutions; for (auto Subst : AI.getSubstitutions()) { if (Subst.getReplacement().getCanonicalTypeOrNull() == OpenedArchetype) { auto Conformances = AI.getModule().getASTContext() .AllocateUninitialized<ProtocolConformanceRef>(1); Conformances[0] = Conformance; Substitution NewSubst(ConcreteType, Conformances); Substitutions.push_back(NewSubst); } else Substitutions.push_back(Subst); } SILType SubstCalleeType = AI.getSubstCalleeSILType(); SILType NewSubstCalleeType; auto FnTy = AI.getCallee()->getType().getAs<SILFunctionType>(); if (FnTy && FnTy->isPolymorphic()) { // Handle polymorphic functions by properly substituting // their parameter types. CanSILFunctionType SFT = FnTy->substGenericArgs( AI.getModule(), AI.getModule().getSwiftModule(), Substitutions); NewSubstCalleeType = SILType::getPrimitiveObjectType(SFT); } else { TypeSubstitutionMap TypeSubstitutions; TypeSubstitutions[OpenedArchetype.getPointer()] = ConcreteType; NewSubstCalleeType = SubstCalleeType.subst(AI.getModule(), AI.getModule().getSwiftModule(), TypeSubstitutions); } FullApplySite NewAI; Builder.setCurrentDebugScope(AI.getDebugScope()); if (auto *TAI = dyn_cast<TryApplyInst>(AI)) NewAI = Builder.createTryApply(AI.getLoc(), AI.getCallee(), NewSubstCalleeType, Substitutions, Args, TAI->getNormalBB(), TAI->getErrorBB()); else NewAI = Builder.createApply(AI.getLoc(), AI.getCallee(), NewSubstCalleeType, AI.getType(), Substitutions, Args, cast<ApplyInst>(AI)->isNonThrowing()); if (isa<ApplyInst>(NewAI)) replaceInstUsesWith(*AI.getInstruction(), NewAI.getInstruction()); eraseInstFromFunction(*AI.getInstruction()); return NewAI.getInstruction(); }
/// Process an apply instruction which uses a partial_apply /// as its callee. /// Returns true on success. bool PartialApplyCombiner::processSingleApply(FullApplySite AI) { Builder.setInsertionPoint(AI.getInstruction()); Builder.setCurrentDebugScope(AI.getDebugScope()); // Prepare the args. SmallVector<SILValue, 8> Args; // First the ApplyInst args. for (auto Op : AI.getArguments()) Args.push_back(Op); SILInstruction *InsertionPoint = &*Builder.getInsertionPoint(); // Next, the partial apply args. // Pre-process partial_apply arguments only once, lazily. if (isFirstTime) { isFirstTime = false; if (!allocateTemporaries()) return false; } // Now, copy over the partial apply args. for (auto Op : PAI->getArguments()) { auto Arg = Op; // If there is new temporary for this argument, use it instead. if (isa<AllocStackInst>(Arg)) { if (ArgToTmp.count(Arg)) { Op = ArgToTmp.lookup(Arg); } } Args.push_back(Op); } Builder.setInsertionPoint(InsertionPoint); Builder.setCurrentDebugScope(AI.getDebugScope()); // The thunk that implements the partial apply calls the closure function // that expects all arguments to be consumed by the function. However, the // captured arguments are not arguments of *this* apply, so they are not // pre-incremented. When we combine the partial_apply and this apply into // a new apply we need to retain all of the closure non-address type // arguments. auto ParamInfo = PAI->getSubstCalleeType()->getParameters(); auto PartialApplyArgs = PAI->getArguments(); // Set of arguments that need to be released after each invocation. SmallVector<SILValue, 8> ToBeReleasedArgs; for (unsigned i = 0, e = PartialApplyArgs.size(); i < e; ++i) { SILValue Arg = PartialApplyArgs[i]; if (!Arg->getType().isAddress()) { // Retain the argument as the callee may consume it. Builder.emitRetainValueOperation(PAI->getLoc(), Arg); // For non consumed parameters (e.g. guaranteed), we also need to // insert releases after each apply instruction that we create. if (!ParamInfo[ParamInfo.size() - PartialApplyArgs.size() + i]. isConsumed()) ToBeReleasedArgs.push_back(Arg); } } auto *F = FRI->getReferencedFunction(); SILType FnType = F->getLoweredType(); SILType ResultTy = F->getLoweredFunctionType()->getSILResult(); ArrayRef<Substitution> Subs = PAI->getSubstitutions(); if (!Subs.empty()) { FnType = FnType.substGenericArgs(PAI->getModule(), Subs); ResultTy = FnType.getAs<SILFunctionType>()->getSILResult(); } FullApplySite NAI; if (auto *TAI = dyn_cast<TryApplyInst>(AI)) NAI = Builder.createTryApply(AI.getLoc(), FRI, FnType, Subs, Args, TAI->getNormalBB(), TAI->getErrorBB()); else NAI = Builder.createApply(AI.getLoc(), FRI, FnType, ResultTy, Subs, Args, cast<ApplyInst>(AI)->isNonThrowing()); // We also need to release the partial_apply instruction itself because it // is consumed by the apply_instruction. if (auto *TAI = dyn_cast<TryApplyInst>(AI)) { Builder.setInsertionPoint(TAI->getNormalBB()->begin()); for (auto Arg : ToBeReleasedArgs) { Builder.emitReleaseValueOperation(PAI->getLoc(), Arg); } Builder.createStrongRelease(AI.getLoc(), PAI, Atomicity::Atomic); Builder.setInsertionPoint(TAI->getErrorBB()->begin()); // Release the non-consumed parameters. for (auto Arg : ToBeReleasedArgs) { Builder.emitReleaseValueOperation(PAI->getLoc(), Arg); } Builder.createStrongRelease(AI.getLoc(), PAI, Atomicity::Atomic); Builder.setInsertionPoint(AI.getInstruction()); } else { // Release the non-consumed parameters. for (auto Arg : ToBeReleasedArgs) { Builder.emitReleaseValueOperation(PAI->getLoc(), Arg); } Builder.createStrongRelease(AI.getLoc(), PAI, Atomicity::Atomic); } SilCombiner->replaceInstUsesWith(*AI.getInstruction(), NAI.getInstruction()); SilCombiner->eraseInstFromFunction(*AI.getInstruction()); return true; }
/// \brief Inlines the callee of a given ApplyInst (which must be the value of a /// FunctionRefInst referencing a function with a known body), into the caller /// containing the ApplyInst, which must be the same function as provided to the /// constructor of SILInliner. It only performs one step of inlining: it does /// not recursively inline functions called by the callee. /// /// It is the responsibility of the caller of this function to delete /// the given ApplyInst when inlining is successful. /// /// \returns true on success or false if it is unable to inline the function /// (for any reason). bool SILInliner::inlineFunction(FullApplySite AI, ArrayRef<SILValue> Args) { SILFunction *CalleeFunction = &Original; this->CalleeFunction = CalleeFunction; // Do not attempt to inline an apply into its parent function. if (AI.getFunction() == CalleeFunction) return false; SILFunction &F = getBuilder().getFunction(); if (CalleeFunction->getName() == "_TTSg5Vs4Int8___TFVs12_ArrayBufferg9_isNativeSb" && F.getName() == "_TTSg5Vs4Int8___TFVs12_ArrayBufferg8endIndexSi") llvm::errs(); assert(AI.getFunction() && AI.getFunction() == &F && "Inliner called on apply instruction in wrong function?"); assert(((CalleeFunction->getRepresentation() != SILFunctionTypeRepresentation::ObjCMethod && CalleeFunction->getRepresentation() != SILFunctionTypeRepresentation::CFunctionPointer) || IKind == InlineKind::PerformanceInline) && "Cannot inline Objective-C methods or C functions in mandatory " "inlining"); CalleeEntryBB = &*CalleeFunction->begin(); // Compute the SILLocation which should be used by all the inlined // instructions. if (IKind == InlineKind::PerformanceInline) { Loc = InlinedLocation::getInlinedLocation(AI.getLoc()); } else { assert(IKind == InlineKind::MandatoryInline && "Unknown InlineKind."); Loc = MandatoryInlinedLocation::getMandatoryInlinedLocation(AI.getLoc()); } auto AIScope = AI.getDebugScope(); // FIXME: Turn this into an assertion instead. if (!AIScope) AIScope = AI.getFunction()->getDebugScope(); if (IKind == InlineKind::MandatoryInline) { // Mandatory inlining: every instruction inherits scope/location // from the call site. CallSiteScope = AIScope; } else { // Performance inlining. Construct a proper inline scope pointing // back to the call site. CallSiteScope = new (F.getModule()) SILDebugScope(AI.getLoc(), &F, AIScope); assert(CallSiteScope->getParentFunction() == &F); } assert(CallSiteScope && "call site has no scope"); // Increment the ref count for the inlined function, so it doesn't // get deleted before we can emit abstract debug info for it. CalleeFunction->setInlined(); // If the caller's BB is not the last BB in the calling function, then keep // track of the next BB so we always insert new BBs before it; otherwise, // we just leave the new BBs at the end as they are by default. auto IBI = std::next(SILFunction::iterator(AI.getParent())); InsertBeforeBB = IBI != F.end() ? &*IBI : nullptr; // Clear argument map and map ApplyInst arguments to the arguments of the // callee's entry block. ValueMap.clear(); assert(CalleeEntryBB->bbarg_size() == Args.size() && "Unexpected number of arguments to entry block of function?"); auto BAI = CalleeEntryBB->bbarg_begin(); for (auto AI = Args.begin(), AE = Args.end(); AI != AE; ++AI, ++BAI) ValueMap.insert(std::make_pair(*BAI, *AI)); InstructionMap.clear(); BBMap.clear(); // Do not allow the entry block to be cloned again SILBasicBlock::iterator InsertPoint = SILBasicBlock::iterator(AI.getInstruction()); BBMap.insert(std::make_pair(CalleeEntryBB, AI.getParent())); getBuilder().setInsertionPoint(InsertPoint); // Recursively visit callee's BB in depth-first preorder, starting with the // entry block, cloning all instructions other than terminators. visitSILBasicBlock(CalleeEntryBB); // If we're inlining into a normal apply and the callee's entry // block ends in a return, then we can avoid a split. if (auto nonTryAI = dyn_cast<ApplyInst>(AI)) { if (ReturnInst *RI = dyn_cast<ReturnInst>(CalleeEntryBB->getTerminator())) { // Replace all uses of the apply instruction with the operands of the // return instruction, appropriately mapped. nonTryAI->replaceAllUsesWith(remapValue(RI->getOperand())); return true; } } // If we're inlining into a try_apply, we already have a return-to BB. SILBasicBlock *ReturnToBB; if (auto tryAI = dyn_cast<TryApplyInst>(AI)) { ReturnToBB = tryAI->getNormalBB(); // Otherwise, split the caller's basic block to create a return-to BB. } else { SILBasicBlock *CallerBB = AI.getParent(); // Split the BB and do NOT create a branch between the old and new // BBs; we will create the appropriate terminator manually later. ReturnToBB = CallerBB->splitBasicBlock(InsertPoint); // Place the return-to BB after all the other mapped BBs. if (InsertBeforeBB) F.getBlocks().splice(SILFunction::iterator(InsertBeforeBB), F.getBlocks(), SILFunction::iterator(ReturnToBB)); else F.getBlocks().splice(F.getBlocks().end(), F.getBlocks(), SILFunction::iterator(ReturnToBB)); // Create an argument on the return-to BB representing the returned value. auto *RetArg = new (F.getModule()) SILArgument(ReturnToBB, AI.getInstruction()->getType()); // Replace all uses of the ApplyInst with the new argument. AI.getInstruction()->replaceAllUsesWith(RetArg); } // Now iterate over the callee BBs and fix up the terminators. for (auto BI = BBMap.begin(), BE = BBMap.end(); BI != BE; ++BI) { getBuilder().setInsertionPoint(BI->second); // Modify return terminators to branch to the return-to BB, rather than // trying to clone the ReturnInst. if (ReturnInst *RI = dyn_cast<ReturnInst>(BI->first->getTerminator())) { auto thrownValue = remapValue(RI->getOperand()); getBuilder().createBranch(Loc.getValue(), ReturnToBB, thrownValue); continue; } // Modify throw terminators to branch to the error-return BB, rather than // trying to clone the ThrowInst. if (ThrowInst *TI = dyn_cast<ThrowInst>(BI->first->getTerminator())) { if (auto *A = dyn_cast<ApplyInst>(AI)) { (void)A; assert(A->isNonThrowing() && "apply of a function with error result must be non-throwing"); getBuilder().createUnreachable(Loc.getValue()); continue; } auto tryAI = cast<TryApplyInst>(AI); auto returnedValue = remapValue(TI->getOperand()); getBuilder().createBranch(Loc.getValue(), tryAI->getErrorBB(), returnedValue); continue; } // Otherwise use normal visitor, which clones the existing instruction // but remaps basic blocks and values. visit(BI->first->getTerminator()); } return true; }
/// \brief Devirtualize an apply of a class method. /// /// \p AI is the apply to devirtualize. /// \p ClassOrMetatype is a class value or metatype value that is the /// self argument of the apply we will devirtualize. /// return the result value of the new ApplyInst if created one or null. FullApplySite swift::devirtualizeClassMethod(FullApplySite AI, SILValue ClassOrMetatype, OptRemark::Emitter *ORE) { LLVM_DEBUG(llvm::dbgs() << " Trying to devirtualize : " << *AI.getInstruction()); SILModule &Mod = AI.getModule(); auto *MI = cast<MethodInst>(AI.getCallee()); auto ClassOrMetatypeType = ClassOrMetatype->getType(); auto *F = getTargetClassMethod(Mod, ClassOrMetatypeType, MI); CanSILFunctionType GenCalleeType = F->getLoweredFunctionType(); SubstitutionMap Subs = getSubstitutionsForCallee(Mod, GenCalleeType, ClassOrMetatypeType.getASTType(), AI); CanSILFunctionType SubstCalleeType = GenCalleeType; if (GenCalleeType->isPolymorphic()) SubstCalleeType = GenCalleeType->substGenericArgs(Mod, Subs); SILFunctionConventions substConv(SubstCalleeType, Mod); SILBuilderWithScope B(AI.getInstruction()); SILLocation Loc = AI.getLoc(); FunctionRefInst *FRI = B.createFunctionRef(Loc, F); // Create the argument list for the new apply, casting when needed // in order to handle covariant indirect return types and // contravariant argument types. llvm::SmallVector<SILValue, 8> NewArgs; auto IndirectResultArgIter = AI.getIndirectSILResults().begin(); for (auto ResultTy : substConv.getIndirectSILResultTypes()) { NewArgs.push_back( castValueToABICompatibleType(&B, Loc, *IndirectResultArgIter, IndirectResultArgIter->getType(), ResultTy)); ++IndirectResultArgIter; } auto ParamArgIter = AI.getArgumentsWithoutIndirectResults().begin(); // Skip the last parameter, which is `self`. Add it below. for (auto param : substConv.getParameters().drop_back()) { auto paramType = substConv.getSILType(param); NewArgs.push_back( castValueToABICompatibleType(&B, Loc, *ParamArgIter, ParamArgIter->getType(), paramType)); ++ParamArgIter; } // Add the self argument, upcasting if required because we're // calling a base class's method. auto SelfParamTy = substConv.getSILType(SubstCalleeType->getSelfParameter()); NewArgs.push_back(castValueToABICompatibleType(&B, Loc, ClassOrMetatype, ClassOrMetatypeType, SelfParamTy)); ApplySite NewAS = replaceApplySite(B, Loc, AI, FRI, Subs, NewArgs, substConv); FullApplySite NewAI = FullApplySite::isa(NewAS.getInstruction()); assert(NewAI); LLVM_DEBUG(llvm::dbgs() << " SUCCESS: " << F->getName() << "\n"); if (ORE) ORE->emit([&]() { using namespace OptRemark; return RemarkPassed("ClassMethodDevirtualized", *AI.getInstruction()) << "Devirtualized call to class method " << NV("Method", F); }); NumClassDevirt++; return NewAI; }
/// \brief Inlines all mandatory inlined functions into the body of a function, /// first recursively inlining all mandatory apply instructions in those /// functions into their bodies if necessary. /// /// \param F the function to be processed /// \param AI nullptr if this is being called from the top level; the relevant /// ApplyInst requiring the recursive call when non-null /// \param FullyInlinedSet the set of all functions already known to be fully /// processed, to avoid processing them over again /// \param SetFactory an instance of ImmutableFunctionSet::Factory /// \param CurrentInliningSet the set of functions currently being inlined in /// the current call stack of recursive calls /// /// \returns true if successful, false if failed due to circular inlining. static bool runOnFunctionRecursively(SILFunction *F, FullApplySite AI, SILModule::LinkingMode Mode, DenseFunctionSet &FullyInlinedSet, ImmutableFunctionSet::Factory &SetFactory, ImmutableFunctionSet CurrentInliningSet, ClassHierarchyAnalysis *CHA) { // Avoid reprocessing functions needlessly. if (FullyInlinedSet.count(F)) return true; // Prevent attempt to circularly inline. if (CurrentInliningSet.contains(F)) { // This cannot happen on a top-level call, so AI should be non-null. assert(AI && "Cannot have circular inline without apply"); SILLocation L = AI.getLoc(); assert(L && "Must have location for transparent inline apply"); diagnose(F->getModule().getASTContext(), L.getStartSourceLoc(), diag::circular_transparent); return false; } // Add to the current inlining set (immutably, so we only affect the set // during this call and recursive subcalls). CurrentInliningSet = SetFactory.add(CurrentInliningSet, F); SmallVector<SILValue, 16> CaptureArgs; SmallVector<SILValue, 32> FullArgs; for (auto FI = F->begin(), FE = F->end(); FI != FE; ++FI) { for (auto I = FI->begin(), E = FI->end(); I != E; ++I) { FullApplySite InnerAI = FullApplySite::isa(&*I); if (!InnerAI) continue; auto *ApplyBlock = InnerAI.getParent(); auto NewInstPair = tryDevirtualizeApply(InnerAI, CHA); if (auto *NewInst = NewInstPair.first) { replaceDeadApply(InnerAI, NewInst); if (auto *II = dyn_cast<SILInstruction>(NewInst)) I = II->getIterator(); else I = NewInst->getParentBB()->begin(); auto NewAI = FullApplySite::isa(NewInstPair.second.getInstruction()); if (!NewAI) continue; InnerAI = NewAI; } SILLocation Loc = InnerAI.getLoc(); SILValue CalleeValue = InnerAI.getCallee(); bool IsThick; PartialApplyInst *PAI; SILFunction *CalleeFunction = getCalleeFunction(InnerAI, IsThick, CaptureArgs, FullArgs, PAI, Mode); if (!CalleeFunction || CalleeFunction->isTransparent() == IsNotTransparent) continue; // Then recursively process it first before trying to inline it. if (!runOnFunctionRecursively(CalleeFunction, InnerAI, Mode, FullyInlinedSet, SetFactory, CurrentInliningSet, CHA)) { // If we failed due to circular inlining, then emit some notes to // trace back the failure if we have more information. // FIXME: possibly it could be worth recovering and attempting other // inlines within this same recursive call rather than simply // propagating the failure. if (AI) { SILLocation L = AI.getLoc(); assert(L && "Must have location for transparent inline apply"); diagnose(F->getModule().getASTContext(), L.getStartSourceLoc(), diag::note_while_inlining); } return false; } // Inline function at I, which also changes I to refer to the first // instruction inlined in the case that it succeeds. We purposely // process the inlined body after inlining, because the inlining may // have exposed new inlining opportunities beyond those present in // the inlined function when processed independently. DEBUG(llvm::errs() << "Inlining @" << CalleeFunction->getName() << " into @" << InnerAI.getFunction()->getName() << "\n"); // Decrement our iterator (carefully, to avoid going off the front) so it // is valid after inlining is done. Inlining deletes the apply, and can // introduce multiple new basic blocks. if (I != ApplyBlock->begin()) --I; else I = ApplyBlock->end(); TypeSubstitutionMap ContextSubs; std::vector<Substitution> ApplySubs(InnerAI.getSubstitutions()); if (PAI) { auto PAISubs = PAI->getSubstitutions(); ApplySubs.insert(ApplySubs.end(), PAISubs.begin(), PAISubs.end()); } ContextSubs.copyFrom(CalleeFunction->getContextGenericParams() ->getSubstitutionMap(ApplySubs)); SILInliner Inliner(*F, *CalleeFunction, SILInliner::InlineKind::MandatoryInline, ContextSubs, ApplySubs); if (!Inliner.inlineFunction(InnerAI, FullArgs)) { I = InnerAI.getInstruction()->getIterator(); continue; } // Inlining was successful. Remove the apply. InnerAI.getInstruction()->eraseFromParent(); // Reestablish our iterator if it wrapped. if (I == ApplyBlock->end()) I = ApplyBlock->begin(); else ++I; // If the inlined apply was a thick function, then we need to balance the // reference counts for correctness. if (IsThick) fixupReferenceCounts(I, Loc, CalleeValue, CaptureArgs); // Now that the IR is correct, see if we can remove dead callee // computations (e.g. dead partial_apply closures). cleanupCalleeValue(CalleeValue, CaptureArgs, FullArgs); // Reposition iterators possibly invalidated by mutation. FI = SILFunction::iterator(ApplyBlock); I = ApplyBlock->begin(); E = ApplyBlock->end(); ++NumMandatoryInlines; } } // Keep track of full inlined functions so we don't waste time recursively // reprocessing them. FullyInlinedSet.insert(F); return true; }
/// Insert monomorphic inline caches for a specific class or metatype /// type \p SubClassTy. static FullApplySite speculateMonomorphicTarget(FullApplySite AI, SILType SubType, CheckedCastBranchInst *&CCBI) { CCBI = nullptr; // Bail if this class_method cannot be devirtualized. if (!canDevirtualizeClassMethod(AI, SubType)) return FullApplySite(); // Create a diamond shaped control flow and a checked_cast_branch // instruction that checks the exact type of the object. // This cast selects between two paths: one that calls the slow dynamic // dispatch and one that calls the specific method. auto It = AI.getInstruction()->getIterator(); SILFunction *F = AI.getFunction(); SILBasicBlock *Entry = AI.getParent(); // Iden is the basic block containing the direct call. SILBasicBlock *Iden = F->createBasicBlock(); // Virt is the block containing the slow virtual call. SILBasicBlock *Virt = F->createBasicBlock(); Iden->createBBArg(SubType); SILBasicBlock *Continue = Entry->splitBasicBlock(It); SILBuilderWithScope Builder(Entry, AI.getInstruction()); // Create the checked_cast_branch instruction that checks at runtime if the // class instance is identical to the SILType. ClassMethodInst *CMI = cast<ClassMethodInst>(AI.getCallee()); CCBI = Builder.createCheckedCastBranch(AI.getLoc(), /*exact*/ true, CMI->getOperand(), SubType, Iden, Virt); It = CCBI->getIterator(); SILBuilderWithScope VirtBuilder(Virt, AI.getInstruction()); SILBuilderWithScope IdenBuilder(Iden, AI.getInstruction()); // This is the class reference downcasted into subclass SubType. SILValue DownCastedClassInstance = Iden->getBBArg(0); // Copy the two apply instructions into the two blocks. FullApplySite IdenAI = CloneApply(AI, IdenBuilder); FullApplySite VirtAI = CloneApply(AI, VirtBuilder); // See if Continue has a release on self as the instruction right after the // apply. If it exists, move it into position in the diamond. if (auto *Release = dyn_cast<StrongReleaseInst>(std::next(Continue->begin()))) { if (Release->getOperand() == CMI->getOperand()) { VirtBuilder.createStrongRelease(Release->getLoc(), CMI->getOperand()); IdenBuilder.createStrongRelease(Release->getLoc(), DownCastedClassInstance); Release->eraseFromParent(); } } // Create a PHInode for returning the return value from both apply // instructions. SILArgument *Arg = Continue->createBBArg(AI.getType()); if (!isa<TryApplyInst>(AI)) { IdenBuilder.createBranch(AI.getLoc(), Continue, ArrayRef<SILValue>(IdenAI.getInstruction())); VirtBuilder.createBranch(AI.getLoc(), Continue, ArrayRef<SILValue>(VirtAI.getInstruction())); } // Remove the old Apply instruction. if (!isa<TryApplyInst>(AI)) AI.getInstruction()->replaceAllUsesWith(Arg); auto *OriginalBB = AI.getParent(); AI.getInstruction()->eraseFromParent(); if (OriginalBB->empty()) OriginalBB->removeFromParent(); // Update the stats. NumTargetsPredicted++; // Devirtualize the apply instruction on the identical path. auto NewInstPair = devirtualizeClassMethod(IdenAI, DownCastedClassInstance); assert(NewInstPair.first && "Expected to be able to devirtualize apply!"); replaceDeadApply(IdenAI, NewInstPair.first); // Split critical edges resulting from VirtAI. if (auto *TAI = dyn_cast<TryApplyInst>(VirtAI)) { auto *ErrorBB = TAI->getFunction()->createBasicBlock(); ErrorBB->createBBArg(TAI->getErrorBB()->getBBArg(0)->getType()); Builder.setInsertionPoint(ErrorBB); Builder.createBranch(TAI->getLoc(), TAI->getErrorBB(), {ErrorBB->getBBArg(0)}); auto *NormalBB = TAI->getFunction()->createBasicBlock(); NormalBB->createBBArg(TAI->getNormalBB()->getBBArg(0)->getType()); Builder.setInsertionPoint(NormalBB); Builder.createBranch(TAI->getLoc(), TAI->getNormalBB(), {NormalBB->getBBArg(0) }); Builder.setInsertionPoint(VirtAI.getInstruction()); SmallVector<SILValue, 4> Args; for (auto Arg : VirtAI.getArguments()) { Args.push_back(Arg); } FullApplySite NewVirtAI = Builder.createTryApply(VirtAI.getLoc(), VirtAI.getCallee(), VirtAI.getSubstCalleeSILType(), VirtAI.getSubstitutions(), Args, NormalBB, ErrorBB); VirtAI.getInstruction()->eraseFromParent(); VirtAI = NewVirtAI; } return VirtAI; }
/// Inlines all mandatory inlined functions into the body of a function, /// first recursively inlining all mandatory apply instructions in those /// functions into their bodies if necessary. /// /// \param F the function to be processed /// \param AI nullptr if this is being called from the top level; the relevant /// ApplyInst requiring the recursive call when non-null /// \param FullyInlinedSet the set of all functions already known to be fully /// processed, to avoid processing them over again /// \param SetFactory an instance of ImmutableFunctionSet::Factory /// \param CurrentInliningSet the set of functions currently being inlined in /// the current call stack of recursive calls /// /// \returns true if successful, false if failed due to circular inlining. static bool runOnFunctionRecursively(SILOptFunctionBuilder &FuncBuilder, SILFunction *F, FullApplySite AI, DenseFunctionSet &FullyInlinedSet, ImmutableFunctionSet::Factory &SetFactory, ImmutableFunctionSet CurrentInliningSet, ClassHierarchyAnalysis *CHA) { // Avoid reprocessing functions needlessly. if (FullyInlinedSet.count(F)) return true; // Prevent attempt to circularly inline. if (CurrentInliningSet.contains(F)) { // This cannot happen on a top-level call, so AI should be non-null. assert(AI && "Cannot have circular inline without apply"); SILLocation L = AI.getLoc(); assert(L && "Must have location for transparent inline apply"); diagnose(F->getModule().getASTContext(), L.getStartSourceLoc(), diag::circular_transparent); return false; } // Add to the current inlining set (immutably, so we only affect the set // during this call and recursive subcalls). CurrentInliningSet = SetFactory.add(CurrentInliningSet, F); SmallVector<std::pair<SILValue, ParameterConvention>, 16> CaptureArgs; SmallVector<SILValue, 32> FullArgs; // Visiting blocks in reverse order avoids revisiting instructions after block // splitting, which would be quadratic. for (auto BI = F->rbegin(), BE = F->rend(), nextBB = BI; BI != BE; BI = nextBB) { // After inlining, the block iterator will be adjusted to point to the last // block containing inlined instructions. This way, the inlined function // body will be reprocessed within the caller's context without revisiting // any original instructions. nextBB = std::next(BI); // While iterating over this block, instructions are inserted and deleted. // To avoid quadratic block splitting, instructions must be processed in // reverse order (block splitting reassigned the parent pointer of all // instructions below the split point). for (auto II = BI->rbegin(); II != BI->rend(); ++II) { FullApplySite InnerAI = FullApplySite::isa(&*II); if (!InnerAI) continue; // *NOTE* If devirtualization succeeds, devirtInst may not be InnerAI, // but a casted result of InnerAI or even a block argument due to // abstraction changes when calling the witness or class method. auto *devirtInst = tryDevirtualizeApplyHelper(InnerAI, CHA); // Restore II to the current apply site. II = devirtInst->getReverseIterator(); // If the devirtualized call result is no longer a invalid FullApplySite, // then it has succeeded, but the result is not immediately inlinable. InnerAI = FullApplySite::isa(devirtInst); if (!InnerAI) continue; SILValue CalleeValue = InnerAI.getCallee(); bool IsThick; PartialApplyInst *PAI; SILFunction *CalleeFunction = getCalleeFunction( F, InnerAI, IsThick, CaptureArgs, FullArgs, PAI); if (!CalleeFunction) continue; // Then recursively process it first before trying to inline it. if (!runOnFunctionRecursively(FuncBuilder, CalleeFunction, InnerAI, FullyInlinedSet, SetFactory, CurrentInliningSet, CHA)) { // If we failed due to circular inlining, then emit some notes to // trace back the failure if we have more information. // FIXME: possibly it could be worth recovering and attempting other // inlines within this same recursive call rather than simply // propagating the failure. if (AI) { SILLocation L = AI.getLoc(); assert(L && "Must have location for transparent inline apply"); diagnose(F->getModule().getASTContext(), L.getStartSourceLoc(), diag::note_while_inlining); } return false; } // Get our list of substitutions. auto Subs = (PAI ? PAI->getSubstitutionMap() : InnerAI.getSubstitutionMap()); SILOpenedArchetypesTracker OpenedArchetypesTracker(F); F->getModule().registerDeleteNotificationHandler( &OpenedArchetypesTracker); // The callee only needs to know about opened archetypes used in // the substitution list. OpenedArchetypesTracker.registerUsedOpenedArchetypes( InnerAI.getInstruction()); if (PAI) { OpenedArchetypesTracker.registerUsedOpenedArchetypes(PAI); } SILInliner Inliner(FuncBuilder, SILInliner::InlineKind::MandatoryInline, Subs, OpenedArchetypesTracker); if (!Inliner.canInlineApplySite(InnerAI)) continue; // Inline function at I, which also changes I to refer to the first // instruction inlined in the case that it succeeds. We purposely // process the inlined body after inlining, because the inlining may // have exposed new inlining opportunities beyond those present in // the inlined function when processed independently. LLVM_DEBUG(llvm::errs() << "Inlining @" << CalleeFunction->getName() << " into @" << InnerAI.getFunction()->getName() << "\n"); // If we intend to inline a thick function, then we need to balance the // reference counts for correctness. if (IsThick) { bool IsCalleeGuaranteed = PAI && PAI->getType().castTo<SILFunctionType>()->isCalleeGuaranteed(); fixupReferenceCounts(InnerAI.getInstruction(), CalleeValue, CaptureArgs, IsCalleeGuaranteed); } // Register a callback to record potentially unused function values after // inlining. ClosureCleanup closureCleanup; Inliner.setDeletionCallback([&closureCleanup](SILInstruction *I) { closureCleanup.recordDeadFunction(I); }); // Inlining deletes the apply, and can introduce multiple new basic // blocks. After this, CalleeValue and other instructions may be invalid. // nextBB will point to the last inlined block auto firstInlinedInstAndLastBB = Inliner.inlineFunction(CalleeFunction, InnerAI, FullArgs); nextBB = firstInlinedInstAndLastBB.second->getReverseIterator(); ++NumMandatoryInlines; // The IR is now valid, and trivial dead arguments are removed. However, // we may be able to remove dead callee computations (e.g. dead // partial_apply closures). closureCleanup.cleanupDeadClosures(F); // Resume inlining within nextBB, which contains only the inlined // instructions and possibly instructions in the original call block that // have not yet been visited. break; } } // Keep track of full inlined functions so we don't waste time recursively // reprocessing them. FullyInlinedSet.insert(F); return true; }
/// Update the callsite to pass in the correct arguments. static void rewriteApplyInst(const CallSiteDescriptor &CSDesc, SILFunction *NewF) { FullApplySite AI = CSDesc.getApplyInst(); SILInstruction *Closure = CSDesc.getClosure(); SILBuilderWithScope Builder(Closure); FunctionRefInst *FRI = Builder.createFunctionRef(AI.getLoc(), NewF); // Create the args for the new apply by removing the closure argument... llvm::SmallVector<SILValue, 8> NewArgs; unsigned Index = 0; for (auto Arg : AI.getArguments()) { if (Index != CSDesc.getClosureIndex()) NewArgs.push_back(Arg); Index++; } // ... and appending the captured arguments. We also insert retains here at // the location of the original closure. This is needed to balance the // implicit release of all captured arguments that occurs when the partial // apply is destroyed. SILModule &M = NewF->getModule(); for (auto Arg : CSDesc.getArguments()) { NewArgs.push_back(Arg); SILType ArgTy = Arg->getType(); // If our argument is of trivial type, continue... if (ArgTy.isTrivial(M)) continue; // TODO: When we support address types, this code path will need to be // updated. // We need to balance the consumed argument of the new partial_apply in the // specialized callee by a retain. If both the original partial_apply and // the apply of the callee are in the same basic block we can assume they // are executed the same number of times. Therefore it is sufficient to just // retain the argument at the site of the original partial_apply. // // %closure = partial_apply (%arg) // = apply %callee(%closure) // => // retain %arg // %closure = partial_apply (%arg) // apply %specialized_callee(..., %arg) // // However, if they are not in the same basic block the callee might be // executed more frequently than the closure (for example, if the closure is // created in a loop preheader and the callee taking the closure is executed // in the loop). In such a case we must keep the argument live across the // call site of the callee and emit a matching retain for every invocation // of the callee. // // %closure = partial_apply (%arg) // // while () { // = %callee(%closure) // } // => // retain %arg // %closure = partial_apply (%arg) // // while () { // retain %arg // apply %specialized_callee(.., %arg) // } // release %arg // if (AI.getParent() != Closure->getParent()) { // Emit the retain and release that keeps the argument life across the // callee using the closure. CSDesc.extendArgumentLifetime(Arg); // Emit the retain that matches the captured argument by the partial_apply // in the callee that is consumed by the partial_apply. Builder.setInsertionPoint(AI.getInstruction()); Builder.createRetainValue(Closure->getLoc(), Arg, Atomicity::Atomic); } else { Builder.createRetainValue(Closure->getLoc(), Arg, Atomicity::Atomic); } } SILType LoweredType = NewF->getLoweredType(); SILType ResultType = LoweredType.castTo<SILFunctionType>()->getSILResult(); Builder.setInsertionPoint(AI.getInstruction()); FullApplySite NewAI; if (auto *TAI = dyn_cast<TryApplyInst>(AI)) { NewAI = Builder.createTryApply(AI.getLoc(), FRI, LoweredType, ArrayRef<Substitution>(), NewArgs, TAI->getNormalBB(), TAI->getErrorBB()); // If we passed in the original closure as @owned, then insert a release // right after NewAI. This is to balance the +1 from being an @owned // argument to AI. if (CSDesc.isClosureConsumed() && CSDesc.closureHasRefSemanticContext()) { Builder.setInsertionPoint(TAI->getNormalBB()->begin()); Builder.createReleaseValue(Closure->getLoc(), Closure, Atomicity::Atomic); Builder.setInsertionPoint(TAI->getErrorBB()->begin()); Builder.createReleaseValue(Closure->getLoc(), Closure, Atomicity::Atomic); Builder.setInsertionPoint(AI.getInstruction()); } } else { NewAI = Builder.createApply(AI.getLoc(), FRI, LoweredType, ResultType, ArrayRef<Substitution>(), NewArgs, cast<ApplyInst>(AI)->isNonThrowing()); // If we passed in the original closure as @owned, then insert a release // right after NewAI. This is to balance the +1 from being an @owned // argument to AI. if (CSDesc.isClosureConsumed() && CSDesc.closureHasRefSemanticContext()) Builder.createReleaseValue(Closure->getLoc(), Closure, Atomicity::Atomic); } // Replace all uses of the old apply with the new apply. if (isa<ApplyInst>(AI)) AI.getInstruction()->replaceAllUsesWith(NewAI.getInstruction()); // Erase the old apply. AI.getInstruction()->eraseFromParent(); // TODO: Maybe include invalidation code for CallSiteDescriptor after we erase // AI from parent? }