void SILPerformanceInliner::collectAppliesToInline( SILFunction *Caller, SmallVectorImpl<FullApplySite> &Applies, DominanceAnalysis *DA, SILLoopAnalysis *LA) { DominanceInfo *DT = DA->get(Caller); SILLoopInfo *LI = LA->get(Caller); ConstantTracker constTracker(Caller); DominanceOrder domOrder(&Caller->front(), DT, Caller->size()); // Go through all instructions and find candidates for inlining. // We do this in dominance order for the constTracker. SmallVector<FullApplySite, 8> InitialCandidates; while (SILBasicBlock *block = domOrder.getNext()) { constTracker.beginBlock(); unsigned loopDepth = LI->getLoopDepth(block); for (auto I = block->begin(), E = block->end(); I != E; ++I) { constTracker.trackInst(&*I); if (!FullApplySite::isa(&*I)) continue; FullApplySite AI = FullApplySite(&*I); DEBUG(llvm::dbgs() << " Check:" << *I); auto *Callee = getEligibleFunction(AI); if (Callee) { if (isProfitableToInline(AI, loopDepth, DA, LA, constTracker)) InitialCandidates.push_back(AI); } } domOrder.pushChildrenIf(block, [&] (SILBasicBlock *child) { if (ColdBlockInfo::isSlowPath(block, child)) { // Handle cold blocks separately. visitColdBlocks(InitialCandidates, child, DT); return false; } return true; }); } // Calculate how many times a callee is called from this caller. llvm::DenseMap<SILFunction *, unsigned> CalleeCount; for (auto AI : InitialCandidates) { SILFunction *Callee = AI.getCalleeFunction(); assert(Callee && "apply_inst does not have a direct callee anymore"); CalleeCount[Callee]++; } // Now copy each candidate callee that has a small enough number of // call sites into the final set of call sites. for (auto AI : InitialCandidates) { SILFunction *Callee = AI.getCalleeFunction(); assert(Callee && "apply_inst does not have a direct callee anymore"); const unsigned CallsToCalleeThreshold = 1024; if (CalleeCount[Callee] <= CallsToCalleeThreshold) Applies.push_back(AI); } }
/// Return true if inlining this call site is profitable. bool SILPerformanceInliner::isProfitableToInline(FullApplySite AI, unsigned loopDepthOfAI, DominanceAnalysis *DA, SILLoopAnalysis *LA, ConstantTracker &callerTracker, unsigned &NumCallerBlocks) { SILFunction *Callee = AI.getCalleeFunction(); if (Callee->getInlineStrategy() == AlwaysInline) return true; ConstantTracker constTracker(Callee, &callerTracker, AI); DominanceInfo *DT = DA->get(Callee); SILLoopInfo *LI = LA->get(Callee); DominanceOrder domOrder(&Callee->front(), DT, Callee->size()); // Calculate the inlining cost of the callee. unsigned CalleeCost = 0; unsigned Benefit = InlineCostThreshold > 0 ? InlineCostThreshold : RemovedCallBenefit; Benefit += loopDepthOfAI * LoopBenefitFactor; int testThreshold = TestThreshold; while (SILBasicBlock *block = domOrder.getNext()) { constTracker.beginBlock(); unsigned loopDepth = LI->getLoopDepth(block); for (SILInstruction &I : *block) { constTracker.trackInst(&I); auto ICost = instructionInlineCost(I); if (testThreshold >= 0) { // We are in test-mode: use a simplified cost model. CalleeCost += testCost(&I); } else { // Use the regular cost model. CalleeCost += unsigned(ICost); } if (ApplyInst *AI = dyn_cast<ApplyInst>(&I)) { // Check if the callee is passed as an argument. If so, increase the // threshold, because inlining will (probably) eliminate the closure. SILInstruction *def = constTracker.getDefInCaller(AI->getCallee()); if (def && (isa<FunctionRefInst>(def) || isa<PartialApplyInst>(def))) { DEBUG(llvm::dbgs() << " Boost: apply const function at" << *AI); Benefit += ConstCalleeBenefit + loopDepth * LoopBenefitFactor; testThreshold *= 2; } } } // Don't count costs in blocks which are dead after inlining. SILBasicBlock *takenBlock = getTakenBlock(block->getTerminator(), constTracker); if (takenBlock) { Benefit += ConstTerminatorBenefit + TestOpt; DEBUG(llvm::dbgs() << " Take bb" << takenBlock->getDebugID() << " of" << *block->getTerminator()); domOrder.pushChildrenIf(block, [=] (SILBasicBlock *child) { return child->getSinglePredecessor() != block || child == takenBlock; }); } else { domOrder.pushChildren(block); } } unsigned Threshold = Benefit; // The default. if (testThreshold >= 0) { // We are in testing mode. Threshold = testThreshold; } else if (AI.getFunction()->isThunk()) { // Only inline trivial functions into thunks (which will not increase the // code size). Threshold = TrivialFunctionThreshold; } else { // The default case. // We reduce the benefit if the caller is too large. For this we use a // cubic function on the number of caller blocks. This starts to prevent // inlining at about 800 - 1000 caller blocks. unsigned blockMinus = (NumCallerBlocks * NumCallerBlocks) / BlockLimitDenominator * NumCallerBlocks / BlockLimitDenominator; if (Threshold > blockMinus + TrivialFunctionThreshold) Threshold -= blockMinus; else Threshold = TrivialFunctionThreshold; } if (CalleeCost > Threshold) { DEBUG(llvm::dbgs() << " NO: Function too big to inline, " "cost: " << CalleeCost << ", threshold: " << Threshold << "\n"); return false; } DEBUG(llvm::dbgs() << " YES: ready to inline, " "cost: " << CalleeCost << ", threshold: " << Threshold << "\n"); NumCallerBlocks += Callee->size(); return true; }